Selenium WebDriver's Actions
API enables you to simulate complex user interactions including mouse movements, clicks, keyboard input, and gesture combinations. This comprehensive guide covers modern approaches for both Python and JavaScript implementations.
Core Concepts
The Actions API allows you to: - Mouse Actions: Hover, click, double-click, right-click, drag and drop - Keyboard Actions: Key presses, key combinations, text input - Action Chaining: Combine multiple actions into complex sequences - Precise Control: Coordinate-based movements and timing control
Python Implementation
Installation and Setup
pip install selenium
Basic Mouse Actions
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
# Modern WebDriver initialization (no need for executable_path)
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)
try:
driver.get('https://example.com')
actions = ActionChains(driver)
# Find elements using modern locator syntax
element = driver.find_element(By.ID, 'element-id')
# Hover over element
actions.move_to_element(element).perform()
# Left click
actions.click(element).perform()
# Double click
actions.double_click(element).perform()
# Right click (context menu)
actions.context_click(element).perform()
finally:
driver.quit()
Advanced Mouse Operations
# Drag and drop
source = driver.find_element(By.ID, 'source-element')
target = driver.find_element(By.ID, 'target-element')
actions.drag_and_drop(source, target).perform()
# Drag and drop with offset
actions.drag_and_drop_by_offset(source, 100, 50).perform()
# Click and hold
actions.click_and_hold(element).perform()
# Release mouse button
actions.release(element).perform()
# Move to coordinates (relative to element)
actions.move_to_element_with_offset(element, 10, 20).perform()
# Move to absolute coordinates
actions.move_by_offset(300, 200).perform()
Keyboard Actions
# Send text to active element
actions.send_keys("Hello, World!").perform()
# Send text to specific element
input_field = driver.find_element(By.ID, 'input-field')
actions.send_keys_to_element(input_field, "Text input").perform()
# Key combinations
actions.key_down(Keys.CONTROL).send_keys('a').key_up(Keys.CONTROL).perform() # Ctrl+A
actions.key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform() # Ctrl+C
actions.key_down(Keys.CONTROL).send_keys('v').key_up(Keys.CONTROL).perform() # Ctrl+V
# Multiple key combinations
actions.key_down(Keys.CONTROL).key_down(Keys.SHIFT).send_keys('z').key_up(Keys.SHIFT).key_up(Keys.CONTROL).perform() # Ctrl+Shift+Z
# Special keys
actions.send_keys(Keys.ENTER).perform()
actions.send_keys(Keys.TAB).perform()
actions.send_keys(Keys.ESCAPE).perform()
actions.send_keys(Keys.ARROW_DOWN).perform()
Complex Action Sequences
# Chain multiple actions
search_box = driver.find_element(By.NAME, 'q')
submit_button = driver.find_element(By.NAME, 'btnK')
(actions
.click(search_box)
.send_keys("Selenium WebDriver")
.pause(1) # Wait 1 second
.click(submit_button)
.perform())
# Form interaction example
username = driver.find_element(By.ID, 'username')
password = driver.find_element(By.ID, 'password')
login_btn = driver.find_element(By.ID, 'login')
(actions
.click(username)
.send_keys("user@example.com")
.send_keys(Keys.TAB)
.send_keys("password123")
.click(login_btn)
.perform())
JavaScript Implementation
Installation and Setup
npm install selenium-webdriver
Basic Mouse Actions
const { Builder, By, Key, until } = require('selenium-webdriver');
async function automateMouseActions() {
let driver = await new Builder().forBrowser('chrome').build();
try {
await driver.get('https://example.com');
const element = await driver.findElement(By.id('element-id'));
const actions = driver.actions({ async: true });
// Hover over element
await actions.move({ origin: element }).perform();
// Left click
await actions.click(element).perform();
// Double click
await actions.doubleClick(element).perform();
// Right click
await actions.contextClick(element).perform();
} finally {
await driver.quit();
}
}
automateMouseActions();
Advanced Mouse Operations
async function advancedMouseActions() {
let driver = await new Builder().forBrowser('chrome').build();
try {
await driver.get('https://example.com');
const actions = driver.actions({ async: true });
const source = await driver.findElement(By.id('source'));
const target = await driver.findElement(By.id('target'));
// Drag and drop
await actions.dragAndDrop(source, target).perform();
// Click and hold
await actions.press(source).perform();
// Move while holding
await actions.move({ origin: target }).perform();
// Release
await actions.release().perform();
// Move to coordinates
await actions.move({ x: 100, y: 200 }).perform();
// Move relative to element
await actions.move({ origin: source, x: 10, y: 20 }).perform();
} finally {
await driver.quit();
}
}
Keyboard Actions
async function keyboardActions() {
let driver = await new Builder().forBrowser('chrome').build();
try {
await driver.get('https://example.com');
const actions = driver.actions({ async: true });
const inputField = await driver.findElement(By.id('input'));
// Send text
await actions.sendKeys('Hello, World!').perform();
// Send text to specific element
await actions.click(inputField).sendKeys('Specific text').perform();
// Key combinations
await actions.keyDown(Key.CONTROL).sendKeys('a').keyUp(Key.CONTROL).perform(); // Ctrl+A
await actions.keyDown(Key.CONTROL).sendKeys('c').keyUp(Key.CONTROL).perform(); // Ctrl+C
// Multiple modifiers
await actions
.keyDown(Key.CONTROL)
.keyDown(Key.SHIFT)
.sendKeys('z')
.keyUp(Key.SHIFT)
.keyUp(Key.CONTROL)
.perform(); // Ctrl+Shift+Z
// Special keys
await actions.sendKeys(Key.ENTER).perform();
await actions.sendKeys(Key.TAB).perform();
await actions.sendKeys(Key.ESCAPE).perform();
} finally {
await driver.quit();
}
}
Complex Action Sequences
async function complexSequence() {
let driver = await new Builder().forBrowser('chrome').build();
try {
await driver.get('https://example.com');
const actions = driver.actions({ async: true });
const searchBox = await driver.findElement(By.name('q'));
const submitBtn = await driver.findElement(By.name('btnK'));
// Chained actions with pause
await actions
.click(searchBox)
.sendKeys('Selenium WebDriver')
.pause(1000) // 1 second pause
.click(submitBtn)
.perform();
// Form filling sequence
const username = await driver.findElement(By.id('username'));
const password = await driver.findElement(By.id('password'));
const loginBtn = await driver.findElement(By.id('login'));
await actions
.click(username)
.sendKeys('user@example.com')
.sendKeys(Key.TAB)
.sendKeys('password123')
.click(loginBtn)
.perform();
} finally {
await driver.quit();
}
}
Best Practices
Error Handling and Waits
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# Wait for element before interacting
wait = WebDriverWait(driver, 10)
element = wait.until(EC.element_to_be_clickable((By.ID, 'button')))
actions.click(element).perform()
Performance Optimization
# Batch actions for better performance
actions = ActionChains(driver)
for element in elements:
actions.move_to_element(element).click()
actions.perform() # Execute all actions at once
Cross-Browser Compatibility
// Configure for different browsers
const chromeOptions = {
'goog:chromeOptions': {
args: ['--disable-web-security', '--allow-running-insecure-content']
}
};
let driver = await new Builder()
.forBrowser('chrome')
.setChromeOptions(chromeOptions)
.build();
Common Use Cases
File Upload Simulation
# Upload file using send_keys
file_input = driver.find_element(By.CSS_SELECTOR, 'input[type="file"]')
file_input.send_keys('/path/to/file.txt')
Slider Control
slider = driver.find_element(By.CSS_SELECTOR, '.slider-handle')
actions.click_and_hold(slider).move_by_offset(50, 0).release().perform()
Custom Gestures
# Draw a circle-like pattern
center_element = driver.find_element(By.ID, 'canvas')
actions.move_to_element(center_element)
for angle in range(0, 360, 10):
x = int(50 * math.cos(math.radians(angle)))
y = int(50 * math.sin(math.radians(angle)))
actions.move_by_offset(x, y)
actions.perform()
The Actions API in Selenium WebDriver provides powerful capabilities for automating complex user interactions. Remember to always use explicit waits, handle exceptions properly, and test across different browsers for robust automation scripts.