...
Code Block |
---|
import xml.sax class XML(xml.sax.handler.ContentHandler): def __init__(self): self.current = etree.Element("root") self.nsmap = { 'xml': 'http://www.w3.org/XML/1998/namespace'} def startElement(self, name, attrs): attributes = {} for key, value in attrs.items(): key = key.split(':') if len(key) == 2 and: if key[0] == 'xmlns': self.nsmap[key[-1]] = value else: attributes[f"{{{ self.nsmap.get(key[0], key[0]) }}}{ key[-1] }"] = value else: attributes[key[-1]] = value name = name.split(':') if len(name) == 2: name = f"{{{ self.nsmap.get(name[0], name[0]) }}}{ name[-1] }" else: name = name[-1] self.current = etree.SubElement(self.current, name, attributes, nsmap=self.nsmap) def endElement(self, name): self.current = self.current.getparent() def characters(self, data): d = data.strip() if d: self.current.text = d def parse_xml(io, base_url=None): parser = xml.sax.make_parser() handler = XML() parser.setContentHandler(handler) parser.parse(io) return etree.ElementTree(handler.current[0]) |
...