The lxml
library provides powerful methods to modify XML documents. This guide covers updating element content, attributes, and removing elements entirely.
Installation
First, install lxml
using pip:
pip install lxml
Updating Elements
Updating Element Text Content
from lxml import etree
# Parse XML from file or string
tree = etree.parse('example.xml')
root = tree.getroot()
# Method 1: Update using iteration
for elem in root.iter('product'):
if elem.get('id') == '123':
elem.text = 'Updated Product Name'
# Method 2: Update using XPath
products = root.xpath('//product[@id="123"]')
for product in products:
product.text = 'Updated Product Name'
# Method 3: Update first matching element
elem = root.find('.//product[@id="123"]')
if elem is not None:
elem.text = 'Updated Product Name'
Updating Element Attributes
from lxml import etree
tree = etree.parse('example.xml')
root = tree.getroot()
# Update single attribute
elem = root.find('.//product[@id="123"]')
if elem is not None:
elem.set('price', '29.99')
elem.set('category', 'electronics')
# Update multiple attributes at once
elem.attrib.update({
'price': '29.99',
'category': 'electronics',
'in_stock': 'true'
})
# Remove an attribute
if 'old_attribute' in elem.attrib:
del elem.attrib['old_attribute']
Adding New Child Elements
from lxml import etree
tree = etree.parse('example.xml')
root = tree.getroot()
# Add new child element
parent = root.find('.//products')
new_product = etree.SubElement(parent, 'product')
new_product.set('id', '456')
new_product.text = 'New Product'
# Add child with attributes and text in one step
etree.SubElement(parent, 'product', id='789', price='19.99').text = 'Another Product'
Removing Elements
Basic Element Removal
from lxml import etree
tree = etree.parse('example.xml')
root = tree.getroot()
# Method 1: Remove by finding parent
products_to_remove = root.xpath('//product[@discontinued="true"]')
for product in products_to_remove:
parent = product.getparent()
if parent is not None:
parent.remove(product)
# Method 2: Remove from known parent
parent = root.find('.//products')
for product in parent.findall('product'):
if product.get('id') == '123':
parent.remove(product)
Advanced Removal Techniques
from lxml import etree
tree = etree.parse('example.xml')
root = tree.getroot()
# Remove elements based on text content
for elem in root.xpath('//product[text()="Discontinued Item"]'):
elem.getparent().remove(elem)
# Remove empty elements
for elem in root.xpath('//*[not(node())]'):
elem.getparent().remove(elem)
# Remove elements and preserve children (unwrap)
wrapper = root.find('.//wrapper')
if wrapper is not None:
parent = wrapper.getparent()
# Move children to parent before removing wrapper
for child in wrapper:
parent.insert(parent.index(wrapper), child)
parent.remove(wrapper)
Complete Example
Here's a comprehensive example working with a sample XML document:
Sample XML (books.xml
)
<?xml version="1.0" encoding="UTF-8"?>
<library>
<book id="1" category="fiction" available="true">
<title>The Great Gatsby</title>
<author>F. Scott Fitzgerald</author>
<price>12.99</price>
<stock>5</stock>
</book>
<book id="2" category="science" available="false">
<title>A Brief History of Time</title>
<author>Stephen Hawking</author>
<price>15.99</price>
<stock>0</stock>
</book>
<book id="3" category="fiction" available="true">
<title>To Kill a Mockingbird</title>
<author>Harper Lee</author>
<price>10.99</price>
<stock>3</stock>
</book>
</library>
Python Script
from lxml import etree
def modify_xml_document():
# Parse the XML
tree = etree.parse('books.xml')
root = tree.getroot()
# Update: Increase all prices by 10%
for price_elem in root.xpath('//price'):
current_price = float(price_elem.text)
new_price = round(current_price * 1.1, 2)
price_elem.text = str(new_price)
# Update: Mark out-of-stock books as unavailable
for book in root.xpath('//book[stock="0"]'):
book.set('available', 'false')
# Add: New element for books with low stock
for book in root.xpath('//book[stock<3 and stock>0]'):
low_stock = etree.SubElement(book, 'status')
low_stock.text = 'Low Stock'
# Remove: Books that are unavailable
for book in root.xpath('//book[@available="false"]'):
book.getparent().remove(book)
# Save with pretty formatting
tree.write('updated_books.xml',
pretty_print=True,
xml_declaration=True,
encoding='UTF-8')
print("XML document updated successfully!")
if __name__ == "__main__":
modify_xml_document()
Error Handling and Best Practices
from lxml import etree
import os
def safe_xml_modification(xml_file):
try:
# Create backup
backup_file = f"{xml_file}.backup"
if os.path.exists(xml_file):
import shutil
shutil.copy2(xml_file, backup_file)
# Parse XML
tree = etree.parse(xml_file)
root = tree.getroot()
# Perform modifications
elements_to_update = root.xpath('//element[@condition="update"]')
if not elements_to_update:
print("No elements found to update")
return
for elem in elements_to_update:
# Safely update element
elem.text = 'Updated content'
elem.set('modified', 'true')
# Validate before saving (if you have a schema)
# xmlschema_doc = etree.parse('schema.xsd')
# xmlschema = etree.XMLSchema(xmlschema_doc)
# if not xmlschema.validate(tree):
# raise ValueError("XML validation failed")
# Save with atomic write
temp_file = f"{xml_file}.tmp"
tree.write(temp_file, pretty_print=True, xml_declaration=True, encoding='UTF-8')
os.replace(temp_file, xml_file)
print("XML modification completed successfully")
except etree.XMLSyntaxError as e:
print(f"XML parsing error: {e}")
except Exception as e:
print(f"Error modifying XML: {e}")
# Restore backup if something went wrong
if os.path.exists(backup_file):
os.replace(backup_file, xml_file)
Key Tips
- Always create backups before modifying XML files
- Use XPath expressions for precise element selection
- Check for None when using
find()
methods to avoid AttributeError - Validate XML structure after modifications when possible
- Use
pretty_print=True
for human-readable output - Handle encoding explicitly to avoid character issues
- Consider memory usage with large XML files - use iterative parsing when needed
The lxml
library provides robust tools for XML manipulation. Combine these techniques based on your specific use case and always test modifications on sample data first.