...
Code Block |
---|
# Create pickled datafile source = open("edugain.xml", "r", encoding="utf-8") sink = open("edugain.pkl", "w") t = objectify.parse(source) p = pickle.dumps(t).decode('latin1') sink.write(p) # Read pickled object back in pyFF def parse_xml return pickle.loads(io.encode('latin1')) In metadata parser: t = parse_xml(content) #Instead of parse_xml(unicode_stream(content)) |
xml.sax etree.ElementTree parser
This code uses the event based xml.sax parser to create an etree.ElementTree object for pyFF. As of the moment of writing, pyFF refuses validate the result, but it produces correct metadata?
Code Block |
---|
import xml.sax
class XML(xml.sax.handler.ContentHandler):
def __init__(self):
self.current = etree.Element("root")
self.nsmap = {}
def startElement(self, name, attrs):
attributes = {}
for key, value in attrs.items():
key = key.split(':')
if len(key) == 2 and key[0] == 'xmlns':
self.nsmap[key[-1]] = value
else:
attributes[key[-1]] = value
name = name.split(':')
if len(name) == 2:
name = f"{{{ self.nsmap.get(name[0], name[0]) }}}{ name[-1] }"
else:
name = name[-1]
self.current = etree.SubElement(self.current, name, attributes, nsmap=self.nsmap)
def endElement(self, name):
self.current = self.current.getparent()
def characters(self, data):
d = data.strip()
if d:
self.current.text = d
def parse_xml(io, base_url=None):
parser = xml.sax.make_parser()
handler = XML()
parser.setContentHandler(handler)
parser.parse(io)
return etree.ElementTree(handler.current[0]) |