Kupu-1.3.5 : /python/nationalizer.py source

[Sommaire] [Imprimer]
   1  #!/usr/bin/python2.3
   2  
   3  """Return the Kupu .html file with i18n applied"""
   4  
   5  from xml.dom.minidom import parseString
   6  import os
   7  
   8  ID = 0
   9  STR = 1
  10  
  11  I18NNS = 'http://xml.zope.org/namespaces/i18n'
  12  
  13  def ustr(i):
  14      if type(i) == unicode:
  15          return i
  16      else:
  17          return unicode(str(i), 'UTF-8')
  18  
  19  def get_locale():
  20      if os.environ.has_key('HTTP_ACCEPT_LANGUAGE'):
  21          charsets = [l.strip() for l in 
  22                  os.environ['HTTP_ACCEPT_LANGUAGE'].split(';')[0].split(',')]
  23          return charsets
  24  
  25  class Nationalizer:
  26      """Translates string in an HTML or XML file using i18n: directives"""
  27  
  28      not_single = ['a', 'abbr', 'acronym', 'address', 'applet', 
  29                      'b', 'bdo', 'big', 'blink', 'blockquote', 
  30                      'button', 'caption', 'center', 'cite', 
  31                      'comment', 'del', 'dfn', 'dir', 'div',
  32                      'dl', 'dt', 'em', 'embed', 'fieldset',
  33                      'font', 'form', 'frameset', 'h1', 'h2',
  34                      'h3', 'h4', 'h5', 'h6', 'i', 'iframe',
  35                      'ins', 'kbd', 'label', 'legend', 'li',
  36                      'listing', 'map', 'marquee', 'menu',
  37                      'multicol', 'nobr', 'noembed', 'noframes',
  38                      'noscript', 'object', 'ol', 'optgroup',
  39                      'option', 'p', 'pre', 'q', 's', 'script',
  40                      'select', 'small', 'span', 'strike', 
  41                      'strong', 'style', 'sub', 'sup', 'table',
  42                      'tbody', 'td', 'textarea', 'tfoot',
  43                      'th', 'thead', 'title', 'tr', 'tt', 'u',
  44                      'ul', 'xmp']
  45  
  46      def __init__(self, htmlfile, locale):
  47          self.htmlfile = htmlfile
  48          self.locale = locale
  49  
  50      def translate(self):
  51          """load and translate everything"""
  52          popath = self.get_po_file_path(self.locale)
  53          if popath is not None:
  54              pofp = open(popath)
  55              try:
  56                  msgcat = self.parse_po_file(pofp)
  57              finally:
  58                  pofp.close()
  59          else:
  60              # if no pofile, parse anyway to get rid of those nasty i18n:
  61              # attributes (obviously not very fast, perhaps we need to either
  62              # cache a parsed version and send that back or just remove the
  63              # attributes here)
  64              msgcat = {}
  65          xmlfp = open(self.htmlfile)
  66          try:
  67              xml = xmlfp.read()
  68          finally:
  69              xmlfp.close()
  70          dom = parseString(xml)
  71          self.apply_i18n(dom, msgcat)
  72          return self.serialize(dom.documentElement)
  73  
  74      def parse_po_file(self, pofp):
  75          """parse the .po file, create a mapping msgid->msgstr"""
  76          cat = {}
  77          state = None
  78          msgid = None
  79          msgstr = None
  80          for line in pofp.readlines():
  81              line = line.strip()
  82              if line.startswith('#') or not line:
  83                  continue
  84              if line.startswith('msgid'):
  85                  if msgid and msgstr:
  86                      cat[msgid] = msgstr
  87                  msgid = line[7:-1]
  88                  state = ID
  89              elif line.startswith('msgstr'):
  90                  msgstr = line[8:-1]
  91              else:
  92                  # ignore for now, might be a multiline msgstr, if we
  93                  # want to support those we should add some code here...
  94                  pass
  95          if msgid and msgstr:
  96              cat[msgid] = msgstr
  97          return cat
  98  
  99      def apply_i18n(self, dom, msgcat):
 100          """apply nationalization of the full dom"""
 101          nodes = dom.documentElement.getElementsByTagName('*')
 102          for node in nodes:
 103              if node.hasAttributeNS(I18NNS, 'translate'):
 104                  self.apply_translate(node, msgcat)
 105              if node.hasAttributeNS(I18NNS, 'attributes'):
 106                  self.apply_attributes(node, msgcat)
 107  
 108      def apply_translate(self, node, msgcat):
 109          """handle Zope-style i18n:translate"""
 110          buf = []
 111          msgid = msgstr = node.getAttributeNS(I18NNS, 'translate').strip()
 112          if not msgid:
 113              # no msgid in the attribute, use the node value
 114              for child in node.childNodes:
 115                  if child.nodeType == 3:
 116                      buf.append(child.nodeValue)
 117                  else:
 118                      raise TypeError, \
 119                          ('illegal element %s in i18n:translate element' % 
 120                              child.nodeName)
 121              msgid = msgstr = self.reduce_whitespace(u''.join(buf).strip())
 122          if msgcat.has_key(msgid):
 123              msgstr = msgcat[msgid]
 124          # now replace the contents of the node with the new contents
 125          while node.hasChildNodes():
 126              node.removeChild(node.firstChild)
 127          node.removeAttributeNS(I18NNS, 'translate')
 128          node.appendChild(node.ownerDocument.createTextNode(msgstr))
 129  
 130      def apply_attributes(self, node, msgcat):
 131          """handle Zope-style i18n:attributes"""
 132          attrnames = node.getAttributeNS(I18NNS, 'attributes').split(' ')
 133          for attr in attrnames:
 134              value = node.getAttribute(attr)
 135              if value and msgcat.has_key(value):
 136                  node.setAttribute(attr, unicode(msgcat[value], 'UTF-8'))
 137          node.removeAttributeNS(I18NNS, 'attributes')
 138  
 139      def reduce_whitespace(self, string):
 140          for char in ['\n', '\t', '\r']:
 141              string  = string.replace(char, ' ')
 142          while string.find('  ') > -1:
 143              string = string.replace('  ', ' ')
 144          return string
 145  
 146      def get_po_file_path(self, locale):
 147          for language in locale:
 148              startdir = '../i18n'
 149              language = language.split('-')
 150              pathstart = '%s/kupu-%s' % (startdir, language[0])
 151              paths = []
 152              if len(language) == 2:
 153                  paths.append('%s-%s.po' % (pathstart, language[1]))
 154              paths += [
 155                  '%s-default.po' % pathstart,
 156                  '%s.po' % pathstart,
 157                  ]
 158              for path in paths:
 159                  if os.path.isfile(path):
 160                      return path
 161  
 162      def serialize(self, el):
 163          buf = []
 164          if el.nodeType == 1:
 165              buf.append('<%s' % el.nodeName)
 166              if len(el.attributes):
 167                  for attr, value in el.attributes.items():
 168                      if value is not None:
 169                          buf.append(' %s="%s"' % (attr, self.entitize(value)))
 170              if el.hasChildNodes() or el.nodeName in self.not_single:
 171                  buf.append('>')
 172                  for child in el.childNodes:
 173                      buf += self.serialize(child)
 174                  buf.append('</%s>' % el.nodeName)
 175              else:
 176                  buf.append(' />')
 177          elif el.nodeType == 3:
 178              buf.append(el.nodeValue)
 179          else:
 180              print 'ignoring node of type', node.nodeType
 181          return ''.join([ustr(b) for b in buf])
 182  
 183      def entitize(self, string):
 184          string = string.replace('&', '&amp;')
 185          string = string.replace('<', '&lt;')
 186          string = string.replace('>', '&gt;')
 187          string = string.replace('"', '&quot;')
 188          return string
 189          
 190  if __name__ == '__main__':
 191      # test code
 192      os.chdir(os.path.abspath(os.path.dirname(__file__)))
 193      i = Nationalizer('../common/kupu.html', ['nl'])
 194      print i.translate().encode('UTF-8')
Code source de Kupu-1.3.5

/python/ -> nationalizer.py (source)