Kupu-1.3.5 : /tools/makepox.py source

[Sommaire] [Imprimer]
   1  """Simple script to generate .pox files
   2  
   3      parses XML for i18n attrs and JS files for _() calls and generates an
   4      XML .pox template document (.poxt file)
   5  
   6      (c) Guido Wesdorp 2005
   7  
   8  """
   9  
  10  from xml.dom.minidom import parseString, getDOMImplementation
  11  import sys, re, os
  12  
  13  stderr = sys.stderr
  14  
  15  warn_on_broken_xml = True
  16  
  17  class POX:
  18      """container for the results"""
  19      def __init__(self):
  20          impl = getDOMImplementation()
  21          self.doc = impl.createDocument(None, 'catalog', None)
  22          self.root = self.doc.documentElement
  23          self.processed = {} # mapping from mid to ([filenames], node)
  24  
  25      def add(self, msgid, filename):
  26          # strip and reduce whitespace
  27          msgid = msgid.strip().replace('\n', ' ').replace('\t', ' ')
  28          while msgid.find('  ') > -1:
  29              msgid.replace('  ', ' ')
  30          if self.processed.has_key(msgid):
  31              filenames, node = self.processed[msgid]
  32              if not filename in filenames:
  33                  filenames.append(filename)
  34                  node.setAttribute('filenames', 
  35                      '%s %s' % (node.getAttribute('filenames'), filename))
  36              return
  37          doc = self.doc
  38          root = self.root
  39          # add the nodes
  40          msgnode = doc.createElement('message')
  41          msgnode.setAttribute('filenames', filename)
  42          root.appendChild(msgnode)
  43          msgidnode = doc.createElement('msgid')
  44          msgidnode.appendChild(doc.createTextNode(msgid))
  45          msgnode.appendChild(msgidnode)
  46          msgstrnode = doc.createElement('msgstr')
  47          msgstrnode.appendChild(doc.createTextNode(msgid))
  48          msgnode.appendChild(msgstrnode)
  49          msgstrnode.setAttribute('i18n:translate', '')
  50          root.appendChild(msgnode)
  51          self.processed[msgid] = ([filename], msgnode)
  52  
  53      def get_result(self):
  54          return self.doc.toprettyxml()
  55  
  56  class XMLParser:
  57      """scans XML files (or well-formed HTML files, obviously) for i18 attrs"""
  58      def __init__(self, files, pox):
  59          self._current = None
  60          for file in files:
  61              self.parse_file(file, pox)
  62  
  63      def parse_file(self, filename, pox):
  64          fp = open(filename)
  65          try:
  66              dom = parseString(fp.read())
  67          except:
  68              exc, e, tb = sys.exc_info()
  69              del tb
  70              if warn_on_broken_xml:
  71                  print >>stderr, 'Error parsing %s: %s - %s' % (filename, exc, e)
  72              return
  73          # walk through all the nodes and scan for i18n: stuff
  74          while 1:
  75              node = self.next_node(dom)
  76              if not node:
  77                  break
  78              if node.nodeType == 1:
  79                  attrs = node.attributes
  80                  translate = attrs.getNamedItem('i18n:translate')
  81                  if translate:
  82                      msgid = translate.value
  83                      if not msgid.strip():
  84                          msgid = self.extract_text(node)
  85                      pox.add(msgid, filename)
  86                  attributes = attrs.getNamedItem('i18n:attributes')
  87                  if attributes:
  88                      attributes = [a.strip() for a in 
  89                                          attributes.value.split(';')]
  90                      for attr in attributes:
  91                          attritem = attrs.getNamedItem(attr)
  92                          if not attritem:
  93                              raise AttributeError, \
  94                                  'No %s on %s in %s' % (
  95                                      attr, node.nodeName, filename)
  96                          msgid = attritem.value;
  97                          pox.add(msgid, filename)
  98  
  99      def extract_text(self, node):
 100          xml = ''
 101          for child in node.childNodes:
 102              xml += child.toxml().strip().replace('\n', ' ').replace('\t', ' ')
 103          while xml.find('  ') > -1:
 104              xml = xml.replace('  ', ' ')
 105          return xml
 106  
 107      def next_node(self, dom):
 108          if not self._current or self._current.ownerDocument != dom:
 109              self._current = dom.documentElement
 110          else:
 111              cur = self._current
 112              if cur.hasChildNodes():
 113                  self._current = cur.childNodes[0]
 114              elif cur != cur.parentNode.lastChild:
 115                  self._current = cur.nextSibling
 116              else:
 117                  self._current = cur.parentNode.nextSibling
 118          return self._current
 119  
 120  class JSParser:
 121      """scans JS files for _() calls"""
 122      def __init__(self, files, pox):
 123          for file in files:
 124              self.parse_file(file, pox)
 125  
 126      _startfuncreg = re.compile('.*?[^a-zA-Z0-9_]_\(')
 127      _startfuncreg_2 = re.compile('^_\(')
 128      def parse_file(self, filename, pox):
 129          lines = open(filename).readlines()
 130          lineno = 0
 131          more = False
 132          chunks = []
 133          for line in lines:
 134              lineno += 1
 135              if more is True or self._startfuncreg.search(line):
 136                  chunk, more = self._get_func_content(line, filename, 
 137                                                          lineno, more)
 138                  chunks.append(chunk)
 139              if chunks and more is False:
 140                  literal = ''.join(chunks).strip()
 141                  if not literal:
 142                      raise ValueError, ('Unrecognized function content -- ' 
 143                                          'file %s, line %s' % (
 144                                              filename, lineno))
 145                  literal = literal.replace('\t', ' ').replace('\n', ' ')
 146                  while literal.find('  ') > -1:
 147                      literal = literal.replace('  ', ' ')
 148                  more = False
 149                  chunks = []
 150                  pox.add(literal, filename)
 151                  
 152      def _get_func_content(self, line, filename, lineno, more=False):
 153          """return the content of the _() call in line
 154  
 155              if more is True, this will assume the function is already opened
 156              and continue adding to the result from the start of the line 
 157              without searching for '[^a-zA-Z_]_(' first
 158  
 159              returns a tuple (content, more) where more is True if the end of
 160              the function body is not reached, in that case this method should
 161              be called again with the 'more' argument set to True
 162          """
 163          line = line.strip()
 164          if not more:
 165              match = self._startfuncreg.search(line) or \
 166                          self._startfuncreg_2.search(line)
 167              line = line.replace(match.group(0), '')
 168          line = line.strip()
 169          quote = line[0]
 170          line = line[1:]
 171          if not quote in ['"', "'"]:
 172              raise ValueError, ('beginning of function body not a recognized '
 173                                  'quote character: %s -- (file %s, line %s)' % (
 174                                      quote, filename, lineno))
 175          ret = []
 176          previous_char = None
 177          while 1:
 178              new_char = line[0]
 179              line = line[1:]
 180              if new_char == quote:
 181                  if previous_char != '\\':
 182                      break
 183              ret.append(new_char)
 184              previous_char = new_char
 185          
 186          # find out if we should continue after this (do we have a '+' 
 187          # or a ');'?)
 188          more = False
 189          line = line.strip()
 190          if line and line[0] == '+':
 191              line = line[1:].strip()
 192              if line:
 193                  raise ValueError, ('string concatenation only allowed for '
 194                                      'multiline strings, not for variable '
 195                                      'interpolation (use ${} instead) -- '
 196                                      '(file %s, line %s)' % (
 197                                          filename, lineno))
 198              more = True
 199          return ''.join(ret), more
 200  
 201  if __name__ == '__main__':
 202      print >>stderr, 'POX extract v0.1'
 203      print >>stderr, '(c) Guido Wesdorp 2004'
 204      files = sys.argv[1:]
 205      print >>stderr, 'Going to parse files', ', '.join(files)
 206      pox = POX()
 207      xml = [f for f in files if not f.endswith('.js')]
 208      js = [f for f in files if f.endswith('.js')]
 209      XMLParser(xml, pox)
 210      JSParser(js, pox)
 211      pres = pox.get_result()
 212      pres = pres.replace('<catalog>',
 213          ('<catalog xmlns:i18n="http://xml.zope.org/namespaces/i18n" '
 214          'i18n:domain="kupu">'))
 215      print pres
 216      print >>stderr, 'Done'
Code source de Kupu-1.3.5

/tools/ -> makepox.py (source)