Downloading images and videos with the Python Requests library involves making GET requests to media URLs and writing the response content to local files. This guide covers both simple downloads and advanced techniques for handling large files efficiently.
Basic Setup
Install the Requests library if you haven't already:
pip install requests
Downloading Images
For small to medium-sized images, you can download the entire content into memory:
import requests
import os
from urllib.parse import urlparse
def download_image(url, filename=None):
"""Download an image from a URL"""
try:
response = requests.get(url, timeout=30)
response.raise_for_status() # Raises HTTPError for bad responses
# Auto-generate filename if not provided
if not filename:
parsed_url = urlparse(url)
filename = os.path.basename(parsed_url.path) or 'image.jpg'
with open(filename, 'wb') as file:
file.write(response.content)
print(f"Image downloaded successfully: {filename}")
return filename
except requests.exceptions.RequestException as e:
print(f"Error downloading image: {e}")
return None
# Example usage
image_url = 'https://example.com/path/to/image.jpg'
download_image(image_url, 'my_image.jpg')
Downloading Videos (Streaming)
For large files like videos, use streaming to avoid memory issues:
import requests
import os
from urllib.parse import urlparse
def download_video(url, filename=None, chunk_size=1024*1024):
"""Download a video with streaming to handle large files"""
try:
# Use stream=True to download in chunks
response = requests.get(url, stream=True, timeout=30)
response.raise_for_status()
# Auto-generate filename if not provided
if not filename:
parsed_url = urlparse(url)
filename = os.path.basename(parsed_url.path) or 'video.mp4'
# Get file size from headers if available
total_size = int(response.headers.get('content-length', 0))
downloaded = 0
with open(filename, 'wb') as file:
for chunk in response.iter_content(chunk_size=chunk_size):
if chunk: # Filter out keep-alive chunks
file.write(chunk)
downloaded += len(chunk)
# Show progress for large files
if total_size > 0:
progress = (downloaded / total_size) * 100
print(f"\rProgress: {progress:.1f}%", end='', flush=True)
print(f"\nVideo downloaded successfully: {filename}")
return filename
except requests.exceptions.RequestException as e:
print(f"Error downloading video: {e}")
return None
# Example usage
video_url = 'https://example.com/path/to/video.mp4'
download_video(video_url, 'my_video.mp4')
Advanced Download Function
A more robust function that handles both images and videos with additional features:
import requests
import os
from urllib.parse import urlparse
from pathlib import Path
def download_media(url, filename=None, chunk_size=1024*1024, headers=None):
"""
Download images or videos with comprehensive error handling
Args:
url (str): URL of the media file
filename (str): Optional custom filename
chunk_size (int): Size of chunks for streaming (default 1MB)
headers (dict): Optional HTTP headers
Returns:
str: Path to downloaded file or None if failed
"""
# Default headers to mimic a browser
default_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
if headers:
default_headers.update(headers)
try:
# Make request with streaming for large files
response = requests.get(
url,
stream=True,
headers=default_headers,
timeout=30,
allow_redirects=True
)
response.raise_for_status()
# Determine filename
if not filename:
# Try to get filename from Content-Disposition header
content_disposition = response.headers.get('content-disposition')
if content_disposition and 'filename=' in content_disposition:
filename = content_disposition.split('filename=')[1].strip('"')
else:
# Fall back to URL path
parsed_url = urlparse(url)
filename = os.path.basename(parsed_url.path)
# If still no filename, use content type
if not filename or '.' not in filename:
content_type = response.headers.get('content-type', '')
if 'image' in content_type:
ext = content_type.split('/')[1].split(';')[0]
filename = f'image.{ext}'
elif 'video' in content_type:
ext = content_type.split('/')[1].split(';')[0]
filename = f'video.{ext}'
else:
filename = 'download'
# Create directory if it doesn't exist
Path(filename).parent.mkdir(parents=True, exist_ok=True)
# Get file size
total_size = int(response.headers.get('content-length', 0))
downloaded = 0
# Download the file
with open(filename, 'wb') as file:
for chunk in response.iter_content(chunk_size=chunk_size):
if chunk:
file.write(chunk)
downloaded += len(chunk)
# Show progress for files larger than 10MB
if total_size > 10 * 1024 * 1024:
progress = (downloaded / total_size) * 100 if total_size > 0 else 0
print(f"\rDownloading {filename}: {progress:.1f}%", end='', flush=True)
if total_size > 10 * 1024 * 1024:
print() # New line after progress
print(f"Successfully downloaded: {filename} ({downloaded} bytes)")
return filename
except requests.exceptions.Timeout:
print("Download timed out")
except requests.exceptions.ConnectionError:
print("Connection error occurred")
except requests.exceptions.HTTPError as e:
print(f"HTTP error occurred: {e}")
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
except IOError as e:
print(f"File write error: {e}")
return None
# Example usage
media_url = 'https://example.com/path/to/media.mp4'
download_media(media_url, 'downloads/my_media.mp4')
Best Practices
1. Always Use Streaming for Large Files
# Good for large files
response = requests.get(url, stream=True)
# Avoid for large files - loads everything into memory
response = requests.get(url)
2. Set Appropriate Timeouts
response = requests.get(url, timeout=30) # 30 second timeout
3. Handle Different Content Types
content_type = response.headers.get('content-type', '')
if content_type.startswith('image/'):
# Handle as image
pass
elif content_type.startswith('video/'):
# Handle as video
pass
4. Verify File Integrity
import hashlib
def verify_download(filename, expected_hash=None):
"""Verify downloaded file integrity"""
if expected_hash:
with open(filename, 'rb') as f:
file_hash = hashlib.md5(f.read()).hexdigest()
return file_hash == expected_hash
return os.path.exists(filename) and os.path.getsize(filename) > 0
Common Issues and Solutions
Memory Issues: Use stream=True
and process files in chunks
Timeouts: Set appropriate timeout values and implement retry logic
Authentication: Add headers or authentication parameters as needed
Rate Limiting: Implement delays between requests when downloading multiple files
Remember to respect website terms of service and robots.txt files when downloading content programmatically.