[ Index ] |
|
Code source de Kupu-1.3.5 |
1 """Simple script to generate .pox files 2 3 parses XML for i18n attrs and JS files for _() calls and generates an 4 XML .pox template document (.poxt file) 5 6 (c) Guido Wesdorp 2005 7 8 """ 9 10 from xml.dom.minidom import parseString, getDOMImplementation 11 import sys, re, os 12 13 stderr = sys.stderr 14 15 warn_on_broken_xml = True 16 17 class POX: 18 """container for the results""" 19 def __init__(self): 20 impl = getDOMImplementation() 21 self.doc = impl.createDocument(None, 'catalog', None) 22 self.root = self.doc.documentElement 23 self.processed = {} # mapping from mid to ([filenames], node) 24 25 def add(self, msgid, filename): 26 # strip and reduce whitespace 27 msgid = msgid.strip().replace('\n', ' ').replace('\t', ' ') 28 while msgid.find(' ') > -1: 29 msgid.replace(' ', ' ') 30 if self.processed.has_key(msgid): 31 filenames, node = self.processed[msgid] 32 if not filename in filenames: 33 filenames.append(filename) 34 node.setAttribute('filenames', 35 '%s %s' % (node.getAttribute('filenames'), filename)) 36 return 37 doc = self.doc 38 root = self.root 39 # add the nodes 40 msgnode = doc.createElement('message') 41 msgnode.setAttribute('filenames', filename) 42 root.appendChild(msgnode) 43 msgidnode = doc.createElement('msgid') 44 msgidnode.appendChild(doc.createTextNode(msgid)) 45 msgnode.appendChild(msgidnode) 46 msgstrnode = doc.createElement('msgstr') 47 msgstrnode.appendChild(doc.createTextNode(msgid)) 48 msgnode.appendChild(msgstrnode) 49 msgstrnode.setAttribute('i18n:translate', '') 50 root.appendChild(msgnode) 51 self.processed[msgid] = ([filename], msgnode) 52 53 def get_result(self): 54 return self.doc.toprettyxml() 55 56 class XMLParser: 57 """scans XML files (or well-formed HTML files, obviously) for i18 attrs""" 58 def __init__(self, files, pox): 59 self._current = None 60 for file in files: 61 self.parse_file(file, pox) 62 63 def parse_file(self, filename, pox): 64 fp = open(filename) 65 try: 66 dom = parseString(fp.read()) 67 except: 68 exc, e, tb = sys.exc_info() 69 del tb 70 if warn_on_broken_xml: 71 print >>stderr, 'Error parsing %s: %s - %s' % (filename, exc, e) 72 return 73 # walk through all the nodes and scan for i18n: stuff 74 while 1: 75 node = self.next_node(dom) 76 if not node: 77 break 78 if node.nodeType == 1: 79 attrs = node.attributes 80 translate = attrs.getNamedItem('i18n:translate') 81 if translate: 82 msgid = translate.value 83 if not msgid.strip(): 84 msgid = self.extract_text(node) 85 pox.add(msgid, filename) 86 attributes = attrs.getNamedItem('i18n:attributes') 87 if attributes: 88 attributes = [a.strip() for a in 89 attributes.value.split(';')] 90 for attr in attributes: 91 attritem = attrs.getNamedItem(attr) 92 if not attritem: 93 raise AttributeError, \ 94 'No %s on %s in %s' % ( 95 attr, node.nodeName, filename) 96 msgid = attritem.value; 97 pox.add(msgid, filename) 98 99 def extract_text(self, node): 100 xml = '' 101 for child in node.childNodes: 102 xml += child.toxml().strip().replace('\n', ' ').replace('\t', ' ') 103 while xml.find(' ') > -1: 104 xml = xml.replace(' ', ' ') 105 return xml 106 107 def next_node(self, dom): 108 if not self._current or self._current.ownerDocument != dom: 109 self._current = dom.documentElement 110 else: 111 cur = self._current 112 if cur.hasChildNodes(): 113 self._current = cur.childNodes[0] 114 elif cur != cur.parentNode.lastChild: 115 self._current = cur.nextSibling 116 else: 117 self._current = cur.parentNode.nextSibling 118 return self._current 119 120 class JSParser: 121 """scans JS files for _() calls""" 122 def __init__(self, files, pox): 123 for file in files: 124 self.parse_file(file, pox) 125 126 _startfuncreg = re.compile('.*?[^a-zA-Z0-9_]_\(') 127 _startfuncreg_2 = re.compile('^_\(') 128 def parse_file(self, filename, pox): 129 lines = open(filename).readlines() 130 lineno = 0 131 more = False 132 chunks = [] 133 for line in lines: 134 lineno += 1 135 if more is True or self._startfuncreg.search(line): 136 chunk, more = self._get_func_content(line, filename, 137 lineno, more) 138 chunks.append(chunk) 139 if chunks and more is False: 140 literal = ''.join(chunks).strip() 141 if not literal: 142 raise ValueError, ('Unrecognized function content -- ' 143 'file %s, line %s' % ( 144 filename, lineno)) 145 literal = literal.replace('\t', ' ').replace('\n', ' ') 146 while literal.find(' ') > -1: 147 literal = literal.replace(' ', ' ') 148 more = False 149 chunks = [] 150 pox.add(literal, filename) 151 152 def _get_func_content(self, line, filename, lineno, more=False): 153 """return the content of the _() call in line 154 155 if more is True, this will assume the function is already opened 156 and continue adding to the result from the start of the line 157 without searching for '[^a-zA-Z_]_(' first 158 159 returns a tuple (content, more) where more is True if the end of 160 the function body is not reached, in that case this method should 161 be called again with the 'more' argument set to True 162 """ 163 line = line.strip() 164 if not more: 165 match = self._startfuncreg.search(line) or \ 166 self._startfuncreg_2.search(line) 167 line = line.replace(match.group(0), '') 168 line = line.strip() 169 quote = line[0] 170 line = line[1:] 171 if not quote in ['"', "'"]: 172 raise ValueError, ('beginning of function body not a recognized ' 173 'quote character: %s -- (file %s, line %s)' % ( 174 quote, filename, lineno)) 175 ret = [] 176 previous_char = None 177 while 1: 178 new_char = line[0] 179 line = line[1:] 180 if new_char == quote: 181 if previous_char != '\\': 182 break 183 ret.append(new_char) 184 previous_char = new_char 185 186 # find out if we should continue after this (do we have a '+' 187 # or a ');'?) 188 more = False 189 line = line.strip() 190 if line and line[0] == '+': 191 line = line[1:].strip() 192 if line: 193 raise ValueError, ('string concatenation only allowed for ' 194 'multiline strings, not for variable ' 195 'interpolation (use ${} instead) -- ' 196 '(file %s, line %s)' % ( 197 filename, lineno)) 198 more = True 199 return ''.join(ret), more 200 201 if __name__ == '__main__': 202 print >>stderr, 'POX extract v0.1' 203 print >>stderr, '(c) Guido Wesdorp 2004' 204 files = sys.argv[1:] 205 print >>stderr, 'Going to parse files', ', '.join(files) 206 pox = POX() 207 xml = [f for f in files if not f.endswith('.js')] 208 js = [f for f in files if f.endswith('.js')] 209 XMLParser(xml, pox) 210 JSParser(js, pox) 211 pres = pox.get_result() 212 pres = pres.replace('<catalog>', 213 ('<catalog xmlns:i18n="http://xml.zope.org/namespaces/i18n" ' 214 'i18n:domain="kupu">')) 215 print pres 216 print >>stderr, 'Done'
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Sun Feb 25 15:30:41 2007 | par Balluche grâce à PHPXref 0.7 |