...
Code Block |
---|
import xml.sax
class XML(xml.sax.handler.ContentHandler):
def __init__(self):
self.current = etree.Element("root")
self.nsmap = {}
def startElement(self, name, attrs):
attributes = {}
for key, value in attrs.items():
key = key.split(':')
if len(key) == 2 and key[0] == 'xmlns':
self.nsmap[key[-1]] = value
else:
attributes[key[-1]] = value
name = name.split(':')
if len(name) == 2:
name = f"{{{ self.nsmap.get(name[0], name[0]) }}}{ name[-1] }"
else:
name = name[-1]
self.current = etree.SubElement(self.current, name, attributes, nsmap=self.nsmap)
def endElement(self, name):
self.current = self.current.getparent()
def characters(self, data):
d = data.strip()
if d:
self.current.text = d
def parse_xml(io, base_url=None):
parser = xml.sax.make_parser()
handler = XML()
parser.setContentHandler(handler)
parser.parse(io)
return etree.ElementTree(handler.current[0]) |
...