Cheerio provides several powerful methods to filter elements based on their text content. The primary approach uses the .filter()
method with custom filtering functions, allowing you to implement exact matches, partial matches, regex patterns, and more.
Basic Text Filtering with .filter()
The .filter()
method accepts a function that returns true
for elements you want to keep:
const cheerio = require('cheerio');
const $ = cheerio.load('<html>...your HTML...</html>');
// Basic text filtering
const filteredElements = $('selector').filter(function() {
return $(this).text().trim() === 'Your desired text';
});
Exact Text Match Example
Here's a practical example filtering list items by exact text content:
const cheerio = require('cheerio');
const html = `
<div class="products">
<div class="product">iPhone 14</div>
<div class="product">Samsung Galaxy</div>
<div class="product">iPhone 14 Pro</div>
<div class="product">iPhone 14</div>
</div>
`;
const $ = cheerio.load(html);
// Filter products with exact text "iPhone 14"
const iphone14Products = $('.product').filter(function() {
return $(this).text().trim() === 'iPhone 14';
});
console.log(`Found ${iphone14Products.length} exact matches`); // 2
Partial Text Matching
Use .includes()
for partial text matches:
const cheerio = require('cheerio');
const html = `
<div class="articles">
<h2>JavaScript Fundamentals</h2>
<h2>Advanced JavaScript Techniques</h2>
<h2>Python Basics</h2>
<h2>JavaScript Testing</h2>
</div>
`;
const $ = cheerio.load(html);
// Filter headings containing "JavaScript"
const jsArticles = $('h2').filter(function() {
return $(this).text().includes('JavaScript');
});
jsArticles.each(function() {
console.log($(this).text());
});
// Output:
// JavaScript Fundamentals
// Advanced JavaScript Techniques
// JavaScript Testing
Case-Insensitive Text Filtering
Convert text to lowercase for case-insensitive matching:
const cheerio = require('cheerio');
const html = `
<ul class="tags">
<li>JAVASCRIPT</li>
<li>Python</li>
<li>javascript</li>
<li>Java</li>
</ul>
`;
const $ = cheerio.load(html);
// Case-insensitive filtering
const jsTagsInsensitive = $('li').filter(function() {
return $(this).text().toLowerCase().trim() === 'javascript';
});
console.log(`Found ${jsTagsInsensitive.length} JavaScript tags`); // 2
Regular Expression Filtering
Use regex for complex text pattern matching:
const cheerio = require('cheerio');
const html = `
<div class="contacts">
<p>Email: john@example.com</p>
<p>Phone: 555-123-4567</p>
<p>Email: jane@company.org</p>
<p>Address: 123 Main St</p>
</div>
`;
const $ = cheerio.load(html);
// Filter elements containing email addresses
const emailElements = $('p').filter(function() {
const text = $(this).text();
return /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/.test(text);
});
emailElements.each(function() {
console.log($(this).text());
});
// Output:
// Email: john@example.com
// Email: jane@company.org
Filtering with startsWith() and endsWith()
Filter elements based on text prefixes or suffixes:
const cheerio = require('cheerio');
const html = `
<div class="errors">
<p>Error: Invalid input</p>
<p>Warning: Low disk space</p>
<p>Error: Connection failed</p>
<p>Info: Process completed</p>
</div>
`;
const $ = cheerio.load(html);
// Filter error messages
const errorMessages = $('p').filter(function() {
return $(this).text().startsWith('Error:');
});
errorMessages.each(function() {
console.log($(this).text());
});
// Output:
// Error: Invalid input
// Error: Connection failed
Filtering Elements with Nested Content
When elements contain child elements, .text()
returns combined text content:
const cheerio = require('cheerio');
const html = `
<div class="cards">
<div class="card">
<h3>Product A</h3>
<span class="price">$19.99</span>
</div>
<div class="card">
<h3>Product B</h3>
<span class="price">$29.99</span>
</div>
</div>
`;
const $ = cheerio.load(html);
// Filter cards containing "$19.99"
const cheapProducts = $('.card').filter(function() {
return $(this).text().includes('$19.99');
});
console.log(cheapProducts.find('h3').text()); // "Product A"
Arrow Function Syntax
Modern JavaScript syntax for cleaner code:
const cheerio = require('cheerio');
const html = `<ul><li>Apple</li><li>Banana</li><li>Cherry</li></ul>`;
const $ = cheerio.load(html);
// Using arrow function
const fruitsWithA = $('li').filter((i, el) => {
return $(el).text().includes('a');
});
fruitsWithA.each((i, el) => {
console.log($(el).text());
});
// Output: Apple, Banana
Multiple Filter Conditions
Combine multiple conditions using logical operators:
const cheerio = require('cheerio');
const html = `
<div class="items">
<div class="item active">Item 1</div>
<div class="item">Item 2</div>
<div class="item active">Special Item</div>
<div class="item">Item 4</div>
</div>
`;
const $ = cheerio.load(html);
// Filter elements with both "active" class and specific text
const activeSpecialItems = $('.item').filter(function() {
const hasActiveClass = $(this).hasClass('active');
const hasSpecialText = $(this).text().includes('Special');
return hasActiveClass && hasSpecialText;
});
console.log(activeSpecialItems.length); // 1
Key Points to Remember
.text()
retrieves combined text content of an element and all its descendants- Always use
.trim()
to handle whitespace issues .filter()
returns a new Cheerio object containing only matching elements- Filter functions receive the current index and element as parameters
- Use appropriate string methods (
.includes()
,.startsWith()
, etc.) for different matching needs - Regular expressions provide powerful pattern matching capabilities