Table of contents

How do I use the Requests library to download images or videos?

Downloading images and videos with the Python Requests library involves making GET requests to media URLs and writing the response content to local files. This guide covers both simple downloads and advanced techniques for handling large files efficiently.

Basic Setup

Install the Requests library if you haven't already:

pip install requests

Downloading Images

For small to medium-sized images, you can download the entire content into memory:

import requests
import os
from urllib.parse import urlparse

def download_image(url, filename=None):
    """Download an image from a URL"""
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()  # Raises HTTPError for bad responses

        # Auto-generate filename if not provided
        if not filename:
            parsed_url = urlparse(url)
            filename = os.path.basename(parsed_url.path) or 'image.jpg'

        with open(filename, 'wb') as file:
            file.write(response.content)

        print(f"Image downloaded successfully: {filename}")
        return filename

    except requests.exceptions.RequestException as e:
        print(f"Error downloading image: {e}")
        return None

# Example usage
image_url = 'https://example.com/path/to/image.jpg'
download_image(image_url, 'my_image.jpg')

Downloading Videos (Streaming)

For large files like videos, use streaming to avoid memory issues:

import requests
import os
from urllib.parse import urlparse

def download_video(url, filename=None, chunk_size=1024*1024):
    """Download a video with streaming to handle large files"""
    try:
        # Use stream=True to download in chunks
        response = requests.get(url, stream=True, timeout=30)
        response.raise_for_status()

        # Auto-generate filename if not provided
        if not filename:
            parsed_url = urlparse(url)
            filename = os.path.basename(parsed_url.path) or 'video.mp4'

        # Get file size from headers if available
        total_size = int(response.headers.get('content-length', 0))
        downloaded = 0

        with open(filename, 'wb') as file:
            for chunk in response.iter_content(chunk_size=chunk_size):
                if chunk:  # Filter out keep-alive chunks
                    file.write(chunk)
                    downloaded += len(chunk)

                    # Show progress for large files
                    if total_size > 0:
                        progress = (downloaded / total_size) * 100
                        print(f"\rProgress: {progress:.1f}%", end='', flush=True)

        print(f"\nVideo downloaded successfully: {filename}")
        return filename

    except requests.exceptions.RequestException as e:
        print(f"Error downloading video: {e}")
        return None

# Example usage
video_url = 'https://example.com/path/to/video.mp4'
download_video(video_url, 'my_video.mp4')

Advanced Download Function

A more robust function that handles both images and videos with additional features:

import requests
import os
from urllib.parse import urlparse
from pathlib import Path

def download_media(url, filename=None, chunk_size=1024*1024, headers=None):
    """
    Download images or videos with comprehensive error handling

    Args:
        url (str): URL of the media file
        filename (str): Optional custom filename
        chunk_size (int): Size of chunks for streaming (default 1MB)
        headers (dict): Optional HTTP headers

    Returns:
        str: Path to downloaded file or None if failed
    """

    # Default headers to mimic a browser
    default_headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
    }

    if headers:
        default_headers.update(headers)

    try:
        # Make request with streaming for large files
        response = requests.get(
            url, 
            stream=True, 
            headers=default_headers,
            timeout=30,
            allow_redirects=True
        )
        response.raise_for_status()

        # Determine filename
        if not filename:
            # Try to get filename from Content-Disposition header
            content_disposition = response.headers.get('content-disposition')
            if content_disposition and 'filename=' in content_disposition:
                filename = content_disposition.split('filename=')[1].strip('"')
            else:
                # Fall back to URL path
                parsed_url = urlparse(url)
                filename = os.path.basename(parsed_url.path)

                # If still no filename, use content type
                if not filename or '.' not in filename:
                    content_type = response.headers.get('content-type', '')
                    if 'image' in content_type:
                        ext = content_type.split('/')[1].split(';')[0]
                        filename = f'image.{ext}'
                    elif 'video' in content_type:
                        ext = content_type.split('/')[1].split(';')[0]
                        filename = f'video.{ext}'
                    else:
                        filename = 'download'

        # Create directory if it doesn't exist
        Path(filename).parent.mkdir(parents=True, exist_ok=True)

        # Get file size
        total_size = int(response.headers.get('content-length', 0))
        downloaded = 0

        # Download the file
        with open(filename, 'wb') as file:
            for chunk in response.iter_content(chunk_size=chunk_size):
                if chunk:
                    file.write(chunk)
                    downloaded += len(chunk)

                    # Show progress for files larger than 10MB
                    if total_size > 10 * 1024 * 1024:
                        progress = (downloaded / total_size) * 100 if total_size > 0 else 0
                        print(f"\rDownloading {filename}: {progress:.1f}%", end='', flush=True)

        if total_size > 10 * 1024 * 1024:
            print()  # New line after progress

        print(f"Successfully downloaded: {filename} ({downloaded} bytes)")
        return filename

    except requests.exceptions.Timeout:
        print("Download timed out")
    except requests.exceptions.ConnectionError:
        print("Connection error occurred")
    except requests.exceptions.HTTPError as e:
        print(f"HTTP error occurred: {e}")
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
    except IOError as e:
        print(f"File write error: {e}")

    return None

# Example usage
media_url = 'https://example.com/path/to/media.mp4'
download_media(media_url, 'downloads/my_media.mp4')

Best Practices

1. Always Use Streaming for Large Files

# Good for large files
response = requests.get(url, stream=True)

# Avoid for large files - loads everything into memory
response = requests.get(url)

2. Set Appropriate Timeouts

response = requests.get(url, timeout=30)  # 30 second timeout

3. Handle Different Content Types

content_type = response.headers.get('content-type', '')
if content_type.startswith('image/'):
    # Handle as image
    pass
elif content_type.startswith('video/'):
    # Handle as video
    pass

4. Verify File Integrity

import hashlib

def verify_download(filename, expected_hash=None):
    """Verify downloaded file integrity"""
    if expected_hash:
        with open(filename, 'rb') as f:
            file_hash = hashlib.md5(f.read()).hexdigest()
            return file_hash == expected_hash
    return os.path.exists(filename) and os.path.getsize(filename) > 0

Common Issues and Solutions

Memory Issues: Use stream=True and process files in chunks Timeouts: Set appropriate timeout values and implement retry logic Authentication: Add headers or authentication parameters as needed Rate Limiting: Implement delays between requests when downloading multiple files

Remember to respect website terms of service and robots.txt files when downloading content programmatically.

Try WebScraping.AI for Your Web Scraping Needs

Looking for a powerful web scraping solution? WebScraping.AI provides an LLM-powered API that combines Chromium JavaScript rendering with rotating proxies for reliable data extraction.

Key Features:

  • AI-powered extraction: Ask questions about web pages or extract structured data fields
  • JavaScript rendering: Full Chromium browser support for dynamic content
  • Rotating proxies: Datacenter and residential proxies from multiple countries
  • Easy integration: Simple REST API with SDKs for Python, Ruby, PHP, and more
  • Reliable & scalable: Built for developers who need consistent results

Getting Started:

Get page content with AI analysis:

curl "https://api.webscraping.ai/ai/question?url=https://example.com&question=What is the main topic?&api_key=YOUR_API_KEY"

Extract structured data:

curl "https://api.webscraping.ai/ai/fields?url=https://example.com&fields[title]=Page title&fields[price]=Product price&api_key=YOUR_API_KEY"

Try in request builder

Related Questions

Get Started Now

WebScraping.AI provides rotating proxies, Chromium rendering and built-in HTML parser for web scraping
Icon