urllib3
is a powerful HTTP client for Python that provides comprehensive support for multipart/form-data file uploads. This encoding type is the standard method used by web browsers when submitting forms with file inputs.
Installation
First, install urllib3
if it's not already available:
pip install urllib3
Basic File Upload
Here's a complete example of uploading a single file using urllib3
:
import urllib3
from urllib3.fields import RequestField
from urllib3.filepost import encode_multipart_formdata
# Create a PoolManager instance
http = urllib3.PoolManager()
# Define file details
file_path = '/path/to/your/document.pdf'
field_name = 'file'
# Read and create file field
with open(file_path, 'rb') as f:
file_data = f.read()
file_field = RequestField(
name=field_name,
data=file_data,
filename='document.pdf',
headers={'Content-Type': 'application/pdf'}
)
# Prepare form data
fields = {
field_name: file_field,
'description': 'Important document', # Additional text field
'category': 'legal'
}
# Encode multipart data
encoded_data, content_type = encode_multipart_formdata(fields)
# Upload the file
try:
response = http.request(
'POST',
'https://example.com/upload',
body=encoded_data,
headers={'Content-Type': content_type}
)
if response.status == 200:
print('Upload successful!')
print(f'Response: {response.data.decode("utf-8")}')
else:
print(f'Upload failed with status: {response.status}')
except Exception as e:
print(f'Error during upload: {e}')
Multiple File Upload
To upload multiple files simultaneously:
import urllib3
from urllib3.fields import RequestField
from urllib3.filepost import encode_multipart_formdata
http = urllib3.PoolManager()
# Prepare multiple files
files_to_upload = [
{'path': '/path/to/image1.jpg', 'field': 'image1', 'type': 'image/jpeg'},
{'path': '/path/to/image2.png', 'field': 'image2', 'type': 'image/png'},
{'path': '/path/to/document.pdf', 'field': 'document', 'type': 'application/pdf'}
]
fields = {}
# Create RequestField for each file
for file_info in files_to_upload:
with open(file_info['path'], 'rb') as f:
file_data = f.read()
file_field = RequestField(
name=file_info['field'],
data=file_data,
filename=file_info['path'].split('/')[-1],
headers={'Content-Type': file_info['type']}
)
fields[file_info['field']] = file_field
# Add additional form fields
fields['user_id'] = '12345'
fields['upload_type'] = 'batch'
# Encode and upload
encoded_data, content_type = encode_multipart_formdata(fields)
response = http.request(
'POST',
'https://example.com/batch-upload',
body=encoded_data,
headers={'Content-Type': content_type}
)
print(f'Batch upload status: {response.status}')
Streaming Large Files
For large files, use streaming to avoid memory issues:
import urllib3
from urllib3.fields import RequestField
from urllib3.filepost import encode_multipart_formdata
def stream_file_upload(file_path, upload_url):
http = urllib3.PoolManager()
# Get file size for progress tracking
import os
file_size = os.path.getsize(file_path)
with open(file_path, 'rb') as f:
file_field = RequestField(
name='file',
data=f, # Pass file object directly for streaming
filename=os.path.basename(file_path),
headers={'Content-Type': 'application/octet-stream'}
)
fields = {'file': file_field}
encoded_data, content_type = encode_multipart_formdata(fields)
response = http.request(
'POST',
upload_url,
body=encoded_data,
headers={
'Content-Type': content_type,
'Content-Length': str(len(encoded_data))
}
)
return response
# Usage
response = stream_file_upload('/path/to/large_file.zip', 'https://example.com/upload')
print(f'Upload completed with status: {response.status}')
Error Handling and Best Practices
Here's a robust implementation with comprehensive error handling:
import urllib3
from urllib3.fields import RequestField
from urllib3.filepost import encode_multipart_formdata
import os
import mimetypes
class FileUploader:
def __init__(self, base_url, timeout=30):
self.http = urllib3.PoolManager(
cert_reqs='CERT_REQUIRED', # Enforce SSL certificate verification
ca_certs=urllib3.util.ssl_.DEFAULT_CERTS,
timeout=timeout
)
self.base_url = base_url
def upload_file(self, file_path, field_name='file', additional_fields=None):
"""
Upload a file with comprehensive error handling
Args:
file_path (str): Path to the file to upload
field_name (str): Form field name for the file
additional_fields (dict): Additional form fields to include
Returns:
dict: Upload result with status and response data
"""
additional_fields = additional_fields or {}
# Validate file exists
if not os.path.exists(file_path):
return {'success': False, 'error': f'File not found: {file_path}'}
# Get file info
filename = os.path.basename(file_path)
file_size = os.path.getsize(file_path)
content_type, _ = mimetypes.guess_type(file_path)
content_type = content_type or 'application/octet-stream'
# Validate file size (example: max 50MB)
max_size = 50 * 1024 * 1024 # 50MB
if file_size > max_size:
return {'success': False, 'error': f'File too large: {file_size} bytes'}
try:
with open(file_path, 'rb') as f:
file_data = f.read()
# Create file field
file_field = RequestField(
name=field_name,
data=file_data,
filename=filename,
headers={'Content-Type': content_type}
)
# Prepare all fields
fields = {field_name: file_field}
fields.update(additional_fields)
# Encode multipart data
encoded_data, content_type_header = encode_multipart_formdata(fields)
# Make the request
response = self.http.request(
'POST',
f'{self.base_url}/upload',
body=encoded_data,
headers={
'Content-Type': content_type_header,
'Content-Length': str(len(encoded_data))
}
)
return {
'success': response.status == 200,
'status_code': response.status,
'response_data': response.data.decode('utf-8'),
'file_size': file_size
}
except urllib3.exceptions.HTTPError as e:
return {'success': False, 'error': f'HTTP error: {e}'}
except urllib3.exceptions.TimeoutError as e:
return {'success': False, 'error': f'Timeout error: {e}'}
except Exception as e:
return {'success': False, 'error': f'Unexpected error: {e}'}
# Usage example
uploader = FileUploader('https://api.example.com')
result = uploader.upload_file(
'/path/to/document.pdf',
field_name='document',
additional_fields={
'user_id': '12345',
'description': 'Contract document',
'category': 'legal'
}
)
if result['success']:
print(f"Upload successful! File size: {result['file_size']} bytes")
print(f"Server response: {result['response_data']}")
else:
print(f"Upload failed: {result['error']}")
Key Concepts
RequestField Parameters
- name: The form field name expected by the server
- data: File data (bytes) or file object for streaming
- filename: Original filename to send to the server
- headers: Additional headers like Content-Type
Security Considerations
- Always validate file types and sizes before upload
- Use HTTPS with certificate verification in production
- Implement server-side validation and virus scanning
- Consider rate limiting to prevent abuse
- Sanitize filenames to prevent path traversal attacks
Performance Tips
- Use streaming for files larger than available memory
- Implement retry logic for failed uploads
- Consider chunked uploads for very large files
- Use connection pooling (built into PoolManager) for multiple uploads
This comprehensive approach to file uploads with urllib3
provides both flexibility and robustness for production applications.