开发者

HDI: write large string xml into file (python xml.dom.minidom)

I'm currently building large xml files with xml.dom.minidom and then writing them out to file via the toprettyxml. is there a way to stream the xml to a document because I'm hitting memory errors.

def run(self):
    while True:
        domain = self.queue.get()
        try:
            conn = boto.connect_sdb(awsa, awss)
            sdbdomain = conn.get_domain(domain)
            s3conn = boto.connect_s3(awsa, awss)
            archbucket = s3conn.get_bucket("simpledbbu")
            doc = None
            doc = Document()
            root = doc.createElement("items")
            doc.appendChild(root)
            countermax = 0
            counter = 0
            for item in sdbdomain:
                node = doc.createElement("item")
                node.setAttribute("itemName", item.name)
                for k,v in item.items():
                    if not isinstance(v, basestring):
                        i = 0
                        for val in v:
                            node.setAttribute("{0}::{1}".format(k,i),val)
                            i += 1
                    else:
                        node.setAttribute(k,v)
                r开发者_StackOverflow社区oot.appendChild(node)
            k = Key(archbucket)
            k.key = "{0}/{1}.xml".format(datetime.date.today().strftime("%Y%m%d"),sdbdomain.name)
            #x = doc.toprettyxml(indent="  ")
            f = open(domain + ".xml", "w")
            f.truncate()
            f.write(doc.toprettyxml(indent="  "))
            f.close()
            #k.content_type.encode('ascii')
            k.set_contents_from_filename(f.name)
            os.remove(os.path.join(os.getcwd(),f.name))
        except:
            print "failed to load domain: {0}".format(domain)
            print formatExceptionInfo()
        finally:
            self.queue.task_done()


building large xml files with xml.dom.minidom and then writing them out to file via the toprettyxml.

If you run out of memory you should probably stop doing that.

You can build XML with simple string manipulation.

with open(domain + ".xml", "w") as  f:
    f.write( "<?xml..." )
    f.write( "<items>" )
    for item in sdbdomain:
      buffer= []
      for k,v in item.items():
          if not isinstance(v, basestring):
            for i, val in enumerate(v):
              txt= '{0}::{1}="{2}"'.format(k,i,val)
          else:
            txt= '{0}="{1}"'.format(k,v)
          buffer.append( txt )
       f.write( "  <item {0}/>\n".format( " ".join(buffer) ))
     f.write( "</items>" )
k= ................      
k.set_contents_from_filename(f.name)

Something like that ought to allow you to write the XML to a temporary file without making a large DOM object in memory.

0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜