This page will contain the activity log of the pyFF+ experiments and endeavours.
Memory usage investigation
from guppy import hpy import code hp=hpy() ... hp.setrelheap() ... # just print the heap somewhere:h = hp.heap() log.debug(f"\nheapy: {h}") # or possibly interrupt the code execution and inspect the hp object: code.interact(local=dict(globals(), **locals()))
Un/Pickling etree.ElementTree object
# Create pickled datafile source = open("edugain.xml", "r", encoding="utf-8") sink = open("edugain.pkl", "w") t = objectify.parse(source) p = pickle.dumps(t).decode('latin1') sink.write(p) # Read pickled object back in pyFF def parse_xml return pickle.loads(io.encode('latin1')) In metadata parser: t = parse_xml(content) #Instead of parse_xml(unicode_stream(content))
xml.sax etree.ElementTree parser
This code uses the event based xml.sax parser to create an etree.ElementTree object for pyFF. As of the moment of writing, pyFF refuses validate the result, but it produces correct metadata?
import xml.sax class XML(xml.sax.handler.ContentHandler): def __init__(self): self.current = etree.Element("root") self.nsmap = {} def startElement(self, name, attrs): attributes = {} for key, value in attrs.items(): key = key.split(':') if len(key) == 2 and key[0] == 'xmlns': self.nsmap[key[-1]] = value else: attributes[key[-1]] = value name = name.split(':') if len(name) == 2: name = f"{{{ self.nsmap.get(name[0], name[0]) }}}{ name[-1] }" else: name = name[-1] self.current = etree.SubElement(self.current, name, attributes, nsmap=self.nsmap) def endElement(self, name): self.current = self.current.getparent() def characters(self, data): d = data.strip() if d: self.current.text = d def parse_xml(io, base_url=None): parser = xml.sax.make_parser() handler = XML() parser.setContentHandler(handler) parser.parse(io) return etree.ElementTree(handler.current[0])