How does urllib3 handle multipart file uploads?

urllib3 is a powerful HTTP client for Python that provides comprehensive support for multipart/form-data file uploads. This encoding type is the standard method used by web browsers when submitting forms with file inputs.

Installation

First, install urllib3 if it's not already available:

pip install urllib3

Basic File Upload

Here's a complete example of uploading a single file using urllib3:

import urllib3
from urllib3.fields import RequestField
from urllib3.filepost import encode_multipart_formdata

# Create a PoolManager instance
http = urllib3.PoolManager()

# Define file details
file_path = '/path/to/your/document.pdf'
field_name = 'file'

# Read and create file field
with open(file_path, 'rb') as f:
    file_data = f.read()

file_field = RequestField(
    name=field_name,
    data=file_data,
    filename='document.pdf',
    headers={'Content-Type': 'application/pdf'}
)

# Prepare form data
fields = {
    field_name: file_field,
    'description': 'Important document',  # Additional text field
    'category': 'legal'
}

# Encode multipart data
encoded_data, content_type = encode_multipart_formdata(fields)

# Upload the file
try:
    response = http.request(
        'POST',
        'https://example.com/upload',
        body=encoded_data,
        headers={'Content-Type': content_type}
    )

    if response.status == 200:
        print('Upload successful!')
        print(f'Response: {response.data.decode("utf-8")}')
    else:
        print(f'Upload failed with status: {response.status}')

except Exception as e:
    print(f'Error during upload: {e}')

Multiple File Upload

To upload multiple files simultaneously:

import urllib3
from urllib3.fields import RequestField
from urllib3.filepost import encode_multipart_formdata

http = urllib3.PoolManager()

# Prepare multiple files
files_to_upload = [
    {'path': '/path/to/image1.jpg', 'field': 'image1', 'type': 'image/jpeg'},
    {'path': '/path/to/image2.png', 'field': 'image2', 'type': 'image/png'},
    {'path': '/path/to/document.pdf', 'field': 'document', 'type': 'application/pdf'}
]

fields = {}

# Create RequestField for each file
for file_info in files_to_upload:
    with open(file_info['path'], 'rb') as f:
        file_data = f.read()

    file_field = RequestField(
        name=file_info['field'],
        data=file_data,
        filename=file_info['path'].split('/')[-1],
        headers={'Content-Type': file_info['type']}
    )
    fields[file_info['field']] = file_field

# Add additional form fields
fields['user_id'] = '12345'
fields['upload_type'] = 'batch'

# Encode and upload
encoded_data, content_type = encode_multipart_formdata(fields)

response = http.request(
    'POST',
    'https://example.com/batch-upload',
    body=encoded_data,
    headers={'Content-Type': content_type}
)

print(f'Batch upload status: {response.status}')

Streaming Large Files

For large files, use streaming to avoid memory issues:

import urllib3
from urllib3.fields import RequestField
from urllib3.filepost import encode_multipart_formdata

def stream_file_upload(file_path, upload_url):
    http = urllib3.PoolManager()

    # Get file size for progress tracking
    import os
    file_size = os.path.getsize(file_path)

    with open(file_path, 'rb') as f:
        file_field = RequestField(
            name='file',
            data=f,  # Pass file object directly for streaming
            filename=os.path.basename(file_path),
            headers={'Content-Type': 'application/octet-stream'}
        )

        fields = {'file': file_field}
        encoded_data, content_type = encode_multipart_formdata(fields)

        response = http.request(
            'POST',
            upload_url,
            body=encoded_data,
            headers={
                'Content-Type': content_type,
                'Content-Length': str(len(encoded_data))
            }
        )

        return response

# Usage
response = stream_file_upload('/path/to/large_file.zip', 'https://example.com/upload')
print(f'Upload completed with status: {response.status}')

Error Handling and Best Practices

Here's a robust implementation with comprehensive error handling:

import urllib3
from urllib3.fields import RequestField
from urllib3.filepost import encode_multipart_formdata
import os
import mimetypes

class FileUploader:
    def __init__(self, base_url, timeout=30):
        self.http = urllib3.PoolManager(
            cert_reqs='CERT_REQUIRED',  # Enforce SSL certificate verification
            ca_certs=urllib3.util.ssl_.DEFAULT_CERTS,
            timeout=timeout
        )
        self.base_url = base_url

    def upload_file(self, file_path, field_name='file', additional_fields=None):
        """
        Upload a file with comprehensive error handling

        Args:
            file_path (str): Path to the file to upload
            field_name (str): Form field name for the file
            additional_fields (dict): Additional form fields to include

        Returns:
            dict: Upload result with status and response data
        """
        additional_fields = additional_fields or {}

        # Validate file exists
        if not os.path.exists(file_path):
            return {'success': False, 'error': f'File not found: {file_path}'}

        # Get file info
        filename = os.path.basename(file_path)
        file_size = os.path.getsize(file_path)
        content_type, _ = mimetypes.guess_type(file_path)
        content_type = content_type or 'application/octet-stream'

        # Validate file size (example: max 50MB)
        max_size = 50 * 1024 * 1024  # 50MB
        if file_size > max_size:
            return {'success': False, 'error': f'File too large: {file_size} bytes'}

        try:
            with open(file_path, 'rb') as f:
                file_data = f.read()

            # Create file field
            file_field = RequestField(
                name=field_name,
                data=file_data,
                filename=filename,
                headers={'Content-Type': content_type}
            )

            # Prepare all fields
            fields = {field_name: file_field}
            fields.update(additional_fields)

            # Encode multipart data
            encoded_data, content_type_header = encode_multipart_formdata(fields)

            # Make the request
            response = self.http.request(
                'POST',
                f'{self.base_url}/upload',
                body=encoded_data,
                headers={
                    'Content-Type': content_type_header,
                    'Content-Length': str(len(encoded_data))
                }
            )

            return {
                'success': response.status == 200,
                'status_code': response.status,
                'response_data': response.data.decode('utf-8'),
                'file_size': file_size
            }

        except urllib3.exceptions.HTTPError as e:
            return {'success': False, 'error': f'HTTP error: {e}'}
        except urllib3.exceptions.TimeoutError as e:
            return {'success': False, 'error': f'Timeout error: {e}'}
        except Exception as e:
            return {'success': False, 'error': f'Unexpected error: {e}'}

# Usage example
uploader = FileUploader('https://api.example.com')

result = uploader.upload_file(
    '/path/to/document.pdf',
    field_name='document',
    additional_fields={
        'user_id': '12345',
        'description': 'Contract document',
        'category': 'legal'
    }
)

if result['success']:
    print(f"Upload successful! File size: {result['file_size']} bytes")
    print(f"Server response: {result['response_data']}")
else:
    print(f"Upload failed: {result['error']}")

Key Concepts

RequestField Parameters

  • name: The form field name expected by the server
  • data: File data (bytes) or file object for streaming
  • filename: Original filename to send to the server
  • headers: Additional headers like Content-Type

Security Considerations

  • Always validate file types and sizes before upload
  • Use HTTPS with certificate verification in production
  • Implement server-side validation and virus scanning
  • Consider rate limiting to prevent abuse
  • Sanitize filenames to prevent path traversal attacks

Performance Tips

  • Use streaming for files larger than available memory
  • Implement retry logic for failed uploads
  • Consider chunked uploads for very large files
  • Use connection pooling (built into PoolManager) for multiple uploads

This comprehensive approach to file uploads with urllib3 provides both flexibility and robustness for production applications.

Related Questions

Get Started Now

WebScraping.AI provides rotating proxies, Chromium rendering and built-in HTML parser for web scraping
Icon