After modifying an HTML or XML tree with lxml
, you can save it back to a file using the write()
method. This guide covers the different approaches for XML and HTML documents.
Basic XML Example
from lxml import etree
# Parse the XML file
tree = etree.parse('example.xml')
root = tree.getroot()
# Modify the tree - add a new element
new_element = etree.SubElement(root, "new_element")
new_element.text = "This is a new element"
new_element.set("id", "123") # Add attribute
# Save the modified tree
tree.write('modified_example.xml',
pretty_print=True,
xml_declaration=True,
encoding="UTF-8")
Basic HTML Example
from lxml import html
# Parse the HTML file
tree = html.parse('example.html')
root = tree.getroot()
# Modify the tree - add a new div to body
body = root.find('.//body')
new_div = etree.SubElement(body, "div")
new_div.text = "This is a new div"
new_div.set("class", "new-content")
# Save the modified tree
tree.write('modified_example.html',
pretty_print=True,
method="html",
encoding="UTF-8")
Advanced Modification Examples
Modifying Existing Elements
from lxml import etree
tree = etree.parse('data.xml')
root = tree.getroot()
# Find and modify existing elements
for element in root.xpath('//item[@id="1"]'):
element.text = "Updated content"
element.set("status", "modified")
# Remove elements
elements_to_remove = root.xpath('//item[@status="obsolete"]')
for element in elements_to_remove:
element.getparent().remove(element)
tree.write('updated_data.xml', pretty_print=True, xml_declaration=True)
Working with Namespaces
from lxml import etree
# Parse XML with namespaces
tree = etree.parse('namespaced.xml')
root = tree.getroot()
# Define namespace map
nsmap = {'ns': 'http://example.com/namespace'}
# Modify elements with namespaces
for element in root.xpath('//ns:item', namespaces=nsmap):
element.text = "Modified namespace content"
# Preserve namespaces when writing
tree.write('modified_namespaced.xml',
pretty_print=True,
xml_declaration=True,
encoding="UTF-8")
Write Method Options
The write()
method accepts several important parameters:
| Parameter | Description | Default |
|-----------|-------------|---------|
| pretty_print
| Format output for readability | False
|
| xml_declaration
| Add XML declaration (XML only) | False
|
| encoding
| Output file encoding | "ascii"
|
| method
| Serialization method ("xml"
, "html"
, "text"
) | "xml"
|
| doctype
| Custom DOCTYPE declaration | None
|
| with_tail
| Include tail text | True
|
Alternative: Using tostring()
For more control, use etree.tostring()
:
from lxml import etree
tree = etree.parse('example.xml')
# ... make modifications ...
# Convert to string first
xml_string = etree.tostring(tree,
pretty_print=True,
xml_declaration=True,
encoding="UTF-8")
# Write to file manually
with open('output.xml', 'wb') as f:
f.write(xml_string)
Working with Fragments
from lxml import etree, html
# Create new XML document from scratch
root = etree.Element("root")
child = etree.SubElement(root, "child")
child.text = "Hello World"
# Save as new document
tree = etree.ElementTree(root)
tree.write('new_document.xml', pretty_print=True, xml_declaration=True)
# For HTML fragments
html_fragment = html.fromstring('<div><p>Content</p></div>')
html_fragment.append(html.Element('span'))
html_fragment[-1].text = "Added span"
# Convert back to string
result = html.tostring(html_fragment, pretty_print=True, encoding='unicode')
with open('fragment.html', 'w') as f:
f.write(result)
Error Handling
from lxml import etree
try:
tree = etree.parse('input.xml')
# ... make modifications ...
tree.write('output.xml', pretty_print=True, xml_declaration=True)
print("File saved successfully")
except etree.XMLSyntaxError as e:
print(f"XML parsing error: {e}")
except IOError as e:
print(f"File I/O error: {e}")
Best Practices
- Always specify encoding explicitly when writing files
- Use pretty_print=True for human-readable output
- Validate modifications before saving to avoid malformed documents
- Handle exceptions when working with file I/O operations
- Preserve original formatting when possible by using appropriate parser settings
- Test with sample data before processing large files