pythonでjson（仮）

pythonでXMLからJSONへのトランスレータを書いたので載せてみます。
クライアント側のjavascriptがとても重くて、サーバ側のパワーがあまりまくってるので作りました。
一応調べてみたけど単純なやつはさっと出てこなかったので作りました。
移植性が目的でPythonで書きました。lispとかはだめです！絶対に犯罪です！
Pythonは初めて書いたので、うまくかけているか自信がありませんが、
Pythonistaさんたちがみて「こんなんありえねーよww」とかがあったら生殺しにしないでぜひ突っ込みお願いします。
厳密にテストしたわけではありませんが、JKL.ParseXML(http://www.kawa.net/works/js/jkl/parsexml.html)フォーマットにあわせてあります。
端末の文字コードをUTF-8にしないと、日本語が使えません。
後でもうちょっと綺麗にする予定。
しました -> d:id:nagayoru:20110203:1296698416

# -*- coding: utf-8 -*-
import sys
import re
import xml.sax
import timeit
import string

class XMLtoJSON_ContentHandler(xml.sax.handler.ContentHandler):

    def __init__(self, output):
        self.output = output
        self.data = {}
        self.last_called = "__init__"
        self.p_data = [ self.data ]
        self.continuations = []
        self.push_text_node = []

        self.indent = 2
        self.indent_space = ' '*self.indent
        self.pretty_print = 1

    def __del__(self):
        pass

    def startDocument(self):
        pass

    def endDocument(self):
        self.print_json()

    def characters(self, content):
        this_push_text_node = self.push_text_node[-1]
        line = re.match('^\s*(.*)$', content)
        if line and len(line.groups()[0]) > 0:
            this_push_text_node(line.groups()[0], self.last_called == "characters")
            self.last_called = "characters"

    def ignorableWhitespace(self, w):
        pass

    def startElement(self, name, attr):
        this_p_data = self.p_data[-1]
        this_data = { 'data': {} }

        def this_push_text_node(node, continued_p):
            data = this_data['data']
            if not data.get('#text'):
                data['#text'] = node
            elif isinstance( (data['#text']) , list):
                if continued_p:
                    data['#text'][-1] = data['#text'][-1] + node
                else:
                    data['#text'].append(node)
            else:
                if continued_p:
                    data['#text'] = data['#text'] + node
                else:
                    data['#text'] = [data['#text'], node]

        self.push_text_node.append(this_push_text_node)

        for key in attr.getNames():
            value = attr.getValue(key)
            this_data['data'][key] = value

        def cont():
            keys = this_data['data'].keys()

            if keys == ['#text']:
                if not isinstance( (this_data['data']['#text']) , list) :
                    this_data['data'] = this_data['data']['#text']

            if keys:
                if not this_p_data.get(name):
                    this_p_data[name] = this_data['data']
                elif isinstance( (this_p_data[name]) , list) :
                    this_p_data[name].append(this_data['data'])
                else:
                    this_p_data[name] = [this_p_data[name], this_data['data']]

        self.continuations.append(cont)
        self.p_data.append(this_data['data'])

        self.last_called = "startElement"

    def endElement(self, name):
        self.p_data.pop()
        self.push_text_node.pop()
        cont = self.continuations.pop()
        cont()
        self.last_called = "endElement"

    def print_json(self):
        def it (h, nesting=0):
            if isinstance(h, dict):
                self.output.write ( "\n" )
                if self.pretty_print:
                    for i in range(nesting):
                        self.output.write (self.indent_space)

                self.output.write ( "{" )
                l, i = len(h), 0
                for k, v in h.iteritems():
                    self.output.write ( '"' + k + '"' + ':' )
                    it(v, nesting+1)
                    if i < (l-1):
                        self.output.write ( "," )
                        if self.pretty_print:
                            self.output.write ( "\n " )
                            for j in range(nesting):
                                self.output.write (self.indent_space)
                    i+=1
                self.output.write( "}" )

            if isinstance(h, list):
                self.output.write ( "[" )
                l, i = len(h), 0
                for a in h:
                    it(a, nesting+1)
                    if i < (l-1):
                        self.output.write(',')
                    i+=1
                self.output.write ( "]" )

            if isinstance (h, basestring):
                h = h.replace('"', '\\"')
                self.output.write ('"' + h + '"')

        it(self.data)


def xml_parse_string(string, output=sys.stdout):
    xml.sax.parseString(string, XMLtoJSON_ContentHandler(output))

例）

// 例1,2,3は JKL.ParseXML(http://www.kawa.net/works/js/jkl/parsexml.html) から頂きました。
// 1（基本）
// xml /////////////
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<items>
  <item>
    <jcity>Chiyoda-ku</jcity>
    <jlocal>Chiyoda</jlocal>
    <jpref>TOKYO</jpref>
    <pref_cd>13</pref_cd>
    <zip_cd>1000001</zip_cd>
  </item>
</items>

// json /////////////
{"items":
     {"item":
          {"jlocal":"Chiyoda",
           "zip_cd":"1000001",
           "jpref":"TOKYO",
           "pref_cd":"13",
           "jcity":"Chiyoda-ku"}}}

// 2（結合）
// xml /////////////
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<children>
  <girl>HANAKO</girl>
  <boy>TARO</boy>
  <boy>JIRO</boy>
</children>

// json /////////////
{"children":
  {"boy":["TARO","JIRO"],
   "girl":"HANAKO"}}

// 3（長文）
// xml /////////////
<?xml version="1.0" encoding="UTF-8" ?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/">
<channel rdf:about="http://www.kawa.net/xp/index-j.html">
<title>Kawa.net xp - Ajax＆Perl技術情報（川崎有亮）</title>
<link>http://www.kawa.net/xp/index-j.html</link>
<dc:date>2010-10-31T20:57:00+09:00</dc:date>
<dc:language>ja</dc:language>
<dc:rights>Copyright 1995-2010 Yusuke Kawasaki. All rights reserved.</dc:rights>
<description>川崎有亮の制作したプログラムのご紹介・技術情報など。ajax/JavaScript/Perl/CGI/...</description>
<image rdf:resource="http://www.kawa.net/xp/images/xp-title-128x32.gif" />
<items>
<rdf:Seq>
<rdf:li rdf:resource="http://kawa.at.webry.info/201101/article_3.html" />
<rdf:li rdf:resource="http://kawa.at.webry.info/201101/article_2.html" />
................................以下略

// json /////////////
{"rdf:RDF":
  {"xmlns:rdf":"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
   "xmlns":"http://purl.org/rss/1.0/",
   "image":
    {"url":"http://www.kawa.net/xp/images/xp-title-128x32.gif",
     "link":"http://www.kawa.net/xp/index-j.html",
     "rdf:about":"http://www.kawa.net/xp/images/xp-title-128x32.gif",
     "title":"Kawa.net xp - Ajax＆Perl技術情報（川崎有亮）"},
   "item":[
      {"dc:date":"2011-01-12T02:55:00+09:00",
       "description":"openFrameworks では、Windows・Mac 向けアプリに限らず、ofxiPhone アドオンでiPhone・iPod touch・iPad 上で稼働するアプリケーションを開発することができます。OpenCV など openFrameworks のライブラリを手軽に iPhone 上で使えるらしい。",
       "dc:creator":"Kawanet Tech Blog",
       "title":"openFrameworks＋ofxiPhoneでiPhone・iPad用Hello, world!",
       "link":"http://kawa.at.webry.info/201101/article_3.html",
       "rdf:about":"http://kawa.at.webry.info/201101/article_3.html",
       "dc:subject":["Xcode","openFrameworks","iPhone"]},
................................以下略

// 4 (複合)
// xml /////////////
<?xml version='1.0' encoding='UTF-8'?>
<abc version='0.1'>
  auuauauuaau
  byaaaaaaaaaaaaaaaou
  <version><moe>gyoeee</moe></version>
  <list>
    uiuiueoooo
    <file permisssion='0777' size='10485760'><![CDATA[/var/test.dat]]></file>
    <file><![CDATA[/var/test0.dat]]></file>
    <file permisssion='0644' modifiedTime='1296081769'><![CDATA[/var/test1.dat]]></file>
    <file></file>
    <file permisssion='0744' owner='0' group='0' checksum='0'><![CDATA[/var/test2.dat]]></file>
    myaoooou
    <file><![CDATA[%2f%2f%2f"\\""%2f%2f%2f]]></file>
  </list>
  hieeeeeeeeeeeee
</abc>

// json /////////////
{"abc":
  {"#text":["auuauauuaaubyaaaaaaaaaaaaaaaou","hieeeeeeeeeeeee"],
   "version":["0.1",
      {"moe":"gyoeee"}],
   "list":
    {"#text":["uiuiueoooo","myaoooou"],
     "file":[
        {"permisssion":"0777",
         "#text":"/var/test.dat",
         "size":"10485760"},"/var/test0.dat",
        {"permisssion":"0644",
         "#text":"/var/test1.dat",
         "modifiedTime":"1296081769"},
        {"owner":"0",
         "permisssion":"0744",
         "#text":"/var/test2.dat",
         "group":"0",
         "checksum":"0"},"%2f%2f%2f\"\\"\"%2f%2f%2f"]}}}

// *連続したテキストは結合する。
// *attributeとnodeに同じものがあったら結合する。
// *空ノードは捨てる。
// *CDATAは普通のテキストにする。

順番が変わってしまうので、3番は正確に出来てるのかわかりにくいですが･･･。