Navigating through child nodes in Html Agility Pack is essential for DOM traversal and data extraction. This guide covers different methods to access and iterate through child nodes effectively.
Basic Child Node Navigation
Html Agility Pack provides several properties and methods to navigate child nodes:
ChildNodes
- Returns all direct child nodes (including text nodes)Elements()
- Returns only element child nodes (filters out text/comment nodes)FirstChild
/LastChild
- Access the first/last child nodeNextSibling
/PreviousSibling
- Navigate to adjacent nodes
Installation
First, install Html Agility Pack via NuGet:
Install-Package HtmlAgilityPack
Complete Example
using HtmlAgilityPack;
using System;
using System.Linq;
class Program
{
static void Main(string[] args)
{
var htmlDoc = new HtmlDocument();
// Load HTML content
htmlDoc.LoadHtml(@"
<html>
<body>
<div id='parent'>
<h2>Title</h2>
<p class='content'>First paragraph</p>
<p class='content'>Second paragraph</p>
<span>Additional info</span>
<ul>
<li>Item 1</li>
<li>Item 2</li>
</ul>
</div>
</body>
</html>");
var parentNode = htmlDoc.DocumentNode.SelectSingleNode("//div[@id='parent']");
if (parentNode != null)
{
NavigateAllChildren(parentNode);
NavigateElementsOnly(parentNode);
NavigateSpecificElements(parentNode);
}
}
// Method 1: Navigate all child nodes (including text nodes)
static void NavigateAllChildren(HtmlNode parentNode)
{
Console.WriteLine("=== All Child Nodes ===");
foreach (var childNode in parentNode.ChildNodes)
{
if (childNode.NodeType == HtmlNodeType.Element)
{
Console.WriteLine($"Element: {childNode.Name} = '{childNode.InnerText.Trim()}'");
}
else if (childNode.NodeType == HtmlNodeType.Text && !string.IsNullOrWhiteSpace(childNode.InnerText))
{
Console.WriteLine($"Text: '{childNode.InnerText.Trim()}'");
}
}
}
// Method 2: Navigate only element nodes (recommended)
static void NavigateElementsOnly(HtmlNode parentNode)
{
Console.WriteLine("\n=== Element Nodes Only ===");
foreach (var element in parentNode.Elements())
{
Console.WriteLine($"{element.Name}: {element.InnerText.Trim()}");
// Access attributes if they exist
if (element.HasAttributes)
{
foreach (var attr in element.Attributes)
{
Console.WriteLine($" @{attr.Name} = '{attr.Value}'");
}
}
}
}
// Method 3: Navigate specific child elements
static void NavigateSpecificElements(HtmlNode parentNode)
{
Console.WriteLine("\n=== Specific Element Navigation ===");
// Get first and last child elements
var firstChild = parentNode.Elements().FirstOrDefault();
var lastChild = parentNode.Elements().LastOrDefault();
Console.WriteLine($"First child: {firstChild?.Name}");
Console.WriteLine($"Last child: {lastChild?.Name}");
// Navigate specific elements by tag name
var paragraphs = parentNode.Elements("p");
Console.WriteLine($"Found {paragraphs.Count()} paragraph elements:");
foreach (var p in paragraphs)
{
Console.WriteLine($" - {p.InnerText.Trim()}");
}
}
}
Advanced Navigation Techniques
Using XPath for Targeted Selection
// Select direct children only (not descendants)
var directParagraphs = parentNode.SelectNodes("./p");
// Select all paragraph descendants
var allParagraphs = parentNode.SelectNodes(".//p");
// Select children with specific attributes
var contentParagraphs = parentNode.SelectNodes("./p[@class='content']");
// Select by position
var firstParagraph = parentNode.SelectSingleNode("./p[1]");
var lastParagraph = parentNode.SelectSingleNode("./p[last()]");
Conditional Navigation
// Navigate with conditions
foreach (var child in parentNode.Elements())
{
switch (child.Name.ToLower())
{
case "p":
Console.WriteLine($"Paragraph: {child.InnerText}");
break;
case "ul":
Console.WriteLine("Found list with items:");
foreach (var li in child.Elements("li"))
{
Console.WriteLine($" - {li.InnerText}");
}
break;
case "h1":
case "h2":
case "h3":
Console.WriteLine($"Heading: {child.InnerText}");
break;
}
}
Recursive Navigation
static void NavigateRecursively(HtmlNode node, int depth = 0)
{
var indent = new string(' ', depth * 2);
if (node.NodeType == HtmlNodeType.Element)
{
Console.WriteLine($"{indent}{node.Name}: {node.GetDirectInnerText().Trim()}");
// Recursively navigate children
foreach (var child in node.Elements())
{
NavigateRecursively(child, depth + 1);
}
}
}
Loading HTML from Different Sources
// From string
htmlDoc.LoadHtml(htmlString);
// From file
htmlDoc.Load("path/to/file.html");
// From URL
var web = new HtmlWeb();
var doc = web.Load("https://example.com");
// From stream
using (var stream = File.OpenRead("file.html"))
{
htmlDoc.Load(stream);
}
Common Patterns and Best Practices
- Always check for null: Use null-conditional operators or explicit null checks
- Filter node types: Use
Elements()
instead ofChildNodes
to avoid text nodes - Handle whitespace: Trim text content to remove formatting whitespace
- Use XPath wisely: Direct child selection (./tag) vs descendant selection (.//tag)
- Performance: Cache frequently accessed nodes rather than re-selecting them
This comprehensive approach gives you full control over child node navigation in Html Agility Pack, whether you need simple iteration or complex DOM traversal patterns.