Table of contents

How do I simulate mouse and keyboard actions using Selenium WebDriver?

Selenium WebDriver's Actions API enables you to simulate complex user interactions including mouse movements, clicks, keyboard input, and gesture combinations. This comprehensive guide covers modern approaches for both Python and JavaScript implementations.

Core Concepts

The Actions API allows you to: - Mouse Actions: Hover, click, double-click, right-click, drag and drop - Keyboard Actions: Key presses, key combinations, text input - Action Chaining: Combine multiple actions into complex sequences - Precise Control: Coordinate-based movements and timing control

Python Implementation

Installation and Setup

pip install selenium

Basic Mouse Actions

from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# Modern WebDriver initialization (no need for executable_path)
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

try:
    driver.get('https://example.com')
    actions = ActionChains(driver)

    # Find elements using modern locator syntax
    element = driver.find_element(By.ID, 'element-id')

    # Hover over element
    actions.move_to_element(element).perform()

    # Left click
    actions.click(element).perform()

    # Double click
    actions.double_click(element).perform()

    # Right click (context menu)
    actions.context_click(element).perform()

finally:
    driver.quit()

Advanced Mouse Operations

# Drag and drop
source = driver.find_element(By.ID, 'source-element')
target = driver.find_element(By.ID, 'target-element')
actions.drag_and_drop(source, target).perform()

# Drag and drop with offset
actions.drag_and_drop_by_offset(source, 100, 50).perform()

# Click and hold
actions.click_and_hold(element).perform()

# Release mouse button
actions.release(element).perform()

# Move to coordinates (relative to element)
actions.move_to_element_with_offset(element, 10, 20).perform()

# Move to absolute coordinates
actions.move_by_offset(300, 200).perform()

Keyboard Actions

# Send text to active element
actions.send_keys("Hello, World!").perform()

# Send text to specific element
input_field = driver.find_element(By.ID, 'input-field')
actions.send_keys_to_element(input_field, "Text input").perform()

# Key combinations
actions.key_down(Keys.CONTROL).send_keys('a').key_up(Keys.CONTROL).perform()  # Ctrl+A
actions.key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform()  # Ctrl+C
actions.key_down(Keys.CONTROL).send_keys('v').key_up(Keys.CONTROL).perform()  # Ctrl+V

# Multiple key combinations
actions.key_down(Keys.CONTROL).key_down(Keys.SHIFT).send_keys('z').key_up(Keys.SHIFT).key_up(Keys.CONTROL).perform()  # Ctrl+Shift+Z

# Special keys
actions.send_keys(Keys.ENTER).perform()
actions.send_keys(Keys.TAB).perform()
actions.send_keys(Keys.ESCAPE).perform()
actions.send_keys(Keys.ARROW_DOWN).perform()

Complex Action Sequences

# Chain multiple actions
search_box = driver.find_element(By.NAME, 'q')
submit_button = driver.find_element(By.NAME, 'btnK')

(actions
 .click(search_box)
 .send_keys("Selenium WebDriver")
 .pause(1)  # Wait 1 second
 .click(submit_button)
 .perform())

# Form interaction example
username = driver.find_element(By.ID, 'username')
password = driver.find_element(By.ID, 'password')
login_btn = driver.find_element(By.ID, 'login')

(actions
 .click(username)
 .send_keys("user@example.com")
 .send_keys(Keys.TAB)
 .send_keys("password123")
 .click(login_btn)
 .perform())

JavaScript Implementation

Installation and Setup

npm install selenium-webdriver

Basic Mouse Actions

const { Builder, By, Key, until } = require('selenium-webdriver');

async function automateMouseActions() {
    let driver = await new Builder().forBrowser('chrome').build();

    try {
        await driver.get('https://example.com');

        const element = await driver.findElement(By.id('element-id'));
        const actions = driver.actions({ async: true });

        // Hover over element
        await actions.move({ origin: element }).perform();

        // Left click
        await actions.click(element).perform();

        // Double click
        await actions.doubleClick(element).perform();

        // Right click
        await actions.contextClick(element).perform();

    } finally {
        await driver.quit();
    }
}

automateMouseActions();

Advanced Mouse Operations

async function advancedMouseActions() {
    let driver = await new Builder().forBrowser('chrome').build();

    try {
        await driver.get('https://example.com');
        const actions = driver.actions({ async: true });

        const source = await driver.findElement(By.id('source'));
        const target = await driver.findElement(By.id('target'));

        // Drag and drop
        await actions.dragAndDrop(source, target).perform();

        // Click and hold
        await actions.press(source).perform();

        // Move while holding
        await actions.move({ origin: target }).perform();

        // Release
        await actions.release().perform();

        // Move to coordinates
        await actions.move({ x: 100, y: 200 }).perform();

        // Move relative to element
        await actions.move({ origin: source, x: 10, y: 20 }).perform();

    } finally {
        await driver.quit();
    }
}

Keyboard Actions

async function keyboardActions() {
    let driver = await new Builder().forBrowser('chrome').build();

    try {
        await driver.get('https://example.com');
        const actions = driver.actions({ async: true });

        const inputField = await driver.findElement(By.id('input'));

        // Send text
        await actions.sendKeys('Hello, World!').perform();

        // Send text to specific element
        await actions.click(inputField).sendKeys('Specific text').perform();

        // Key combinations
        await actions.keyDown(Key.CONTROL).sendKeys('a').keyUp(Key.CONTROL).perform(); // Ctrl+A
        await actions.keyDown(Key.CONTROL).sendKeys('c').keyUp(Key.CONTROL).perform(); // Ctrl+C

        // Multiple modifiers
        await actions
            .keyDown(Key.CONTROL)
            .keyDown(Key.SHIFT)
            .sendKeys('z')
            .keyUp(Key.SHIFT)
            .keyUp(Key.CONTROL)
            .perform(); // Ctrl+Shift+Z

        // Special keys
        await actions.sendKeys(Key.ENTER).perform();
        await actions.sendKeys(Key.TAB).perform();
        await actions.sendKeys(Key.ESCAPE).perform();

    } finally {
        await driver.quit();
    }
}

Complex Action Sequences

async function complexSequence() {
    let driver = await new Builder().forBrowser('chrome').build();

    try {
        await driver.get('https://example.com');
        const actions = driver.actions({ async: true });

        const searchBox = await driver.findElement(By.name('q'));
        const submitBtn = await driver.findElement(By.name('btnK'));

        // Chained actions with pause
        await actions
            .click(searchBox)
            .sendKeys('Selenium WebDriver')
            .pause(1000) // 1 second pause
            .click(submitBtn)
            .perform();

        // Form filling sequence
        const username = await driver.findElement(By.id('username'));
        const password = await driver.findElement(By.id('password'));
        const loginBtn = await driver.findElement(By.id('login'));

        await actions
            .click(username)
            .sendKeys('user@example.com')
            .sendKeys(Key.TAB)
            .sendKeys('password123')
            .click(loginBtn)
            .perform();

    } finally {
        await driver.quit();
    }
}

Best Practices

Error Handling and Waits

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Wait for element before interacting
wait = WebDriverWait(driver, 10)
element = wait.until(EC.element_to_be_clickable((By.ID, 'button')))
actions.click(element).perform()

Performance Optimization

# Batch actions for better performance
actions = ActionChains(driver)
for element in elements:
    actions.move_to_element(element).click()
actions.perform()  # Execute all actions at once

Cross-Browser Compatibility

// Configure for different browsers
const chromeOptions = {
    'goog:chromeOptions': {
        args: ['--disable-web-security', '--allow-running-insecure-content']
    }
};

let driver = await new Builder()
    .forBrowser('chrome')
    .setChromeOptions(chromeOptions)
    .build();

Common Use Cases

File Upload Simulation

# Upload file using send_keys
file_input = driver.find_element(By.CSS_SELECTOR, 'input[type="file"]')
file_input.send_keys('/path/to/file.txt')

Slider Control

slider = driver.find_element(By.CSS_SELECTOR, '.slider-handle')
actions.click_and_hold(slider).move_by_offset(50, 0).release().perform()

Custom Gestures

# Draw a circle-like pattern
center_element = driver.find_element(By.ID, 'canvas')
actions.move_to_element(center_element)
for angle in range(0, 360, 10):
    x = int(50 * math.cos(math.radians(angle)))
    y = int(50 * math.sin(math.radians(angle)))
    actions.move_by_offset(x, y)
actions.perform()

The Actions API in Selenium WebDriver provides powerful capabilities for automating complex user interactions. Remember to always use explicit waits, handle exceptions properly, and test across different browsers for robust automation scripts.

Try WebScraping.AI for Your Web Scraping Needs

Looking for a powerful web scraping solution? WebScraping.AI provides an LLM-powered API that combines Chromium JavaScript rendering with rotating proxies for reliable data extraction.

Key Features:

  • AI-powered extraction: Ask questions about web pages or extract structured data fields
  • JavaScript rendering: Full Chromium browser support for dynamic content
  • Rotating proxies: Datacenter and residential proxies from multiple countries
  • Easy integration: Simple REST API with SDKs for Python, Ruby, PHP, and more
  • Reliable & scalable: Built for developers who need consistent results

Getting Started:

Get page content with AI analysis:

curl "https://api.webscraping.ai/ai/question?url=https://example.com&question=What is the main topic?&api_key=YOUR_API_KEY"

Extract structured data:

curl "https://api.webscraping.ai/ai/fields?url=https://example.com&fields[title]=Page title&fields[price]=Product price&api_key=YOUR_API_KEY"

Try in request builder

Related Questions

Get Started Now

WebScraping.AI provides rotating proxies, Chromium rendering and built-in HTML parser for web scraping
Icon