195 lines
7.1 KiB
Python
195 lines
7.1 KiB
Python
#!/usr/bin/python2.3
|
|
|
|
"""Return the Kupu .html file with i18n applied"""
|
|
|
|
from xml.dom.minidom import parseString
|
|
import os
|
|
|
|
ID = 0
|
|
STR = 1
|
|
|
|
I18NNS = 'http://xml.zope.org/namespaces/i18n'
|
|
|
|
def ustr(i):
|
|
if type(i) == unicode:
|
|
return i
|
|
else:
|
|
return unicode(str(i), 'UTF-8')
|
|
|
|
def get_locale():
|
|
if os.environ.has_key('HTTP_ACCEPT_LANGUAGE'):
|
|
charsets = [l.strip() for l in
|
|
os.environ['HTTP_ACCEPT_LANGUAGE'].split(';')[0].split(',')]
|
|
return charsets
|
|
|
|
class Nationalizer:
|
|
"""Translates string in an HTML or XML file using i18n: directives"""
|
|
|
|
not_single = ['a', 'abbr', 'acronym', 'address', 'applet',
|
|
'b', 'bdo', 'big', 'blink', 'blockquote',
|
|
'button', 'caption', 'center', 'cite',
|
|
'comment', 'del', 'dfn', 'dir', 'div',
|
|
'dl', 'dt', 'em', 'embed', 'fieldset',
|
|
'font', 'form', 'frameset', 'h1', 'h2',
|
|
'h3', 'h4', 'h5', 'h6', 'i', 'iframe',
|
|
'ins', 'kbd', 'label', 'legend', 'li',
|
|
'listing', 'map', 'marquee', 'menu',
|
|
'multicol', 'nobr', 'noembed', 'noframes',
|
|
'noscript', 'object', 'ol', 'optgroup',
|
|
'option', 'p', 'pre', 'q', 's', 'script',
|
|
'select', 'small', 'span', 'strike',
|
|
'strong', 'style', 'sub', 'sup', 'table',
|
|
'tbody', 'td', 'textarea', 'tfoot',
|
|
'th', 'thead', 'title', 'tr', 'tt', 'u',
|
|
'ul', 'xmp']
|
|
|
|
def __init__(self, htmlfile, locale):
|
|
self.htmlfile = htmlfile
|
|
self.locale = locale
|
|
|
|
def translate(self):
|
|
"""load and translate everything"""
|
|
popath = self.get_po_file_path(self.locale)
|
|
if popath is not None:
|
|
pofp = open(popath)
|
|
try:
|
|
msgcat = self.parse_po_file(pofp)
|
|
finally:
|
|
pofp.close()
|
|
else:
|
|
# if no pofile, parse anyway to get rid of those nasty i18n:
|
|
# attributes (obviously not very fast, perhaps we need to either
|
|
# cache a parsed version and send that back or just remove the
|
|
# attributes here)
|
|
msgcat = {}
|
|
xmlfp = open(self.htmlfile)
|
|
try:
|
|
xml = xmlfp.read()
|
|
finally:
|
|
xmlfp.close()
|
|
dom = parseString(xml)
|
|
self.apply_i18n(dom, msgcat)
|
|
return self.serialize(dom.documentElement)
|
|
|
|
def parse_po_file(self, pofp):
|
|
"""parse the .po file, create a mapping msgid->msgstr"""
|
|
cat = {}
|
|
state = None
|
|
msgid = None
|
|
msgstr = None
|
|
for line in pofp.readlines():
|
|
line = line.strip()
|
|
if line.startswith('#') or not line:
|
|
continue
|
|
if line.startswith('msgid'):
|
|
if msgid and msgstr:
|
|
cat[msgid] = msgstr
|
|
msgid = line[7:-1]
|
|
state = ID
|
|
elif line.startswith('msgstr'):
|
|
msgstr = line[8:-1]
|
|
else:
|
|
# ignore for now, might be a multiline msgstr, if we
|
|
# want to support those we should add some code here...
|
|
pass
|
|
if msgid and msgstr:
|
|
cat[msgid] = msgstr
|
|
return cat
|
|
|
|
def apply_i18n(self, dom, msgcat):
|
|
"""apply nationalization of the full dom"""
|
|
nodes = dom.documentElement.getElementsByTagName('*')
|
|
for node in nodes:
|
|
if node.hasAttributeNS(I18NNS, 'translate'):
|
|
self.apply_translate(node, msgcat)
|
|
if node.hasAttributeNS(I18NNS, 'attributes'):
|
|
self.apply_attributes(node, msgcat)
|
|
|
|
def apply_translate(self, node, msgcat):
|
|
"""handle Zope-style i18n:translate"""
|
|
buf = []
|
|
msgid = msgstr = node.getAttributeNS(I18NNS, 'translate').strip()
|
|
if not msgid:
|
|
# no msgid in the attribute, use the node value
|
|
for child in node.childNodes:
|
|
if child.nodeType == 3:
|
|
buf.append(child.nodeValue)
|
|
else:
|
|
raise TypeError, \
|
|
('illegal element %s in i18n:translate element' %
|
|
child.nodeName)
|
|
msgid = msgstr = self.reduce_whitespace(u''.join(buf).strip())
|
|
if msgcat.has_key(msgid):
|
|
msgstr = msgcat[msgid]
|
|
# now replace the contents of the node with the new contents
|
|
while node.hasChildNodes():
|
|
node.removeChild(node.firstChild)
|
|
node.removeAttributeNS(I18NNS, 'translate')
|
|
node.appendChild(node.ownerDocument.createTextNode(msgstr))
|
|
|
|
def apply_attributes(self, node, msgcat):
|
|
"""handle Zope-style i18n:attributes"""
|
|
attrnames = node.getAttributeNS(I18NNS, 'attributes').split(' ')
|
|
for attr in attrnames:
|
|
value = node.getAttribute(attr)
|
|
if value and msgcat.has_key(value):
|
|
node.setAttribute(attr, unicode(msgcat[value], 'UTF-8'))
|
|
node.removeAttributeNS(I18NNS, 'attributes')
|
|
|
|
def reduce_whitespace(self, string):
|
|
for char in ['\n', '\t', '\r']:
|
|
string = string.replace(char, ' ')
|
|
while string.find(' ') > -1:
|
|
string = string.replace(' ', ' ')
|
|
return string
|
|
|
|
def get_po_file_path(self, locale):
|
|
for language in locale:
|
|
startdir = '../i18n'
|
|
language = language.split('-')
|
|
pathstart = '%s/kupu-%s' % (startdir, language[0])
|
|
paths = []
|
|
if len(language) == 2:
|
|
paths.append('%s-%s.po' % (pathstart, language[1]))
|
|
paths += [
|
|
'%s-default.po' % pathstart,
|
|
'%s.po' % pathstart,
|
|
]
|
|
for path in paths:
|
|
if os.path.isfile(path):
|
|
return path
|
|
|
|
def serialize(self, el):
|
|
buf = []
|
|
if el.nodeType == 1:
|
|
buf.append('<%s' % el.nodeName)
|
|
if len(el.attributes):
|
|
for attr, value in el.attributes.items():
|
|
if value is not None:
|
|
buf.append(' %s="%s"' % (attr, self.entitize(value)))
|
|
if el.hasChildNodes() or el.nodeName in self.not_single:
|
|
buf.append('>')
|
|
for child in el.childNodes:
|
|
buf += self.serialize(child)
|
|
buf.append('</%s>' % el.nodeName)
|
|
else:
|
|
buf.append(' />')
|
|
elif el.nodeType == 3:
|
|
buf.append(el.nodeValue)
|
|
else:
|
|
print 'ignoring node of type', node.nodeType
|
|
return ''.join([ustr(b) for b in buf])
|
|
|
|
def entitize(self, string):
|
|
string = string.replace('&', '&')
|
|
string = string.replace('<', '<')
|
|
string = string.replace('>', '>')
|
|
string = string.replace('"', '"')
|
|
return string
|
|
|
|
if __name__ == '__main__':
|
|
# test code
|
|
os.chdir(os.path.abspath(os.path.dirname(__file__)))
|
|
i = Nationalizer('../common/kupu.html', ['nl'])
|
|
print i.translate().encode('UTF-8')
|