
217 lines
8.1 KiB
Raw Normal View History

"""Simple script to generate .pox files
parses XML for i18n attrs and JS files for _() calls and generates an
XML .pox template document (.poxt file)
(c) Guido Wesdorp 2005
from xml.dom.minidom import parseString, getDOMImplementation
import sys, re, os
stderr = sys.stderr
warn_on_broken_xml = True
class POX:
"""container for the results"""
def __init__(self):
impl = getDOMImplementation()
self.doc = impl.createDocument(None, 'catalog', None)
self.root = self.doc.documentElement
self.processed = {} # mapping from mid to ([filenames], node)
def add(self, msgid, filename):
# strip and reduce whitespace
msgid = msgid.strip().replace('\n', ' ').replace('\t', ' ')
while msgid.find(' ') > -1:
msgid.replace(' ', ' ')
if self.processed.has_key(msgid):
filenames, node = self.processed[msgid]
if not filename in filenames:
'%s %s' % (node.getAttribute('filenames'), filename))
doc = self.doc
root = self.root
# add the nodes
msgnode = doc.createElement('message')
msgnode.setAttribute('filenames', filename)
msgidnode = doc.createElement('msgid')
msgstrnode = doc.createElement('msgstr')
msgstrnode.setAttribute('i18n:translate', '')
self.processed[msgid] = ([filename], msgnode)
def get_result(self):
return self.doc.toprettyxml()
class XMLParser:
"""scans XML files (or well-formed HTML files, obviously) for i18 attrs"""
def __init__(self, files, pox):
self._current = None
for file in files:
self.parse_file(file, pox)
def parse_file(self, filename, pox):
fp = open(filename)
dom = parseString(
exc, e, tb = sys.exc_info()
del tb
if warn_on_broken_xml:
print >>stderr, 'Error parsing %s: %s - %s' % (filename, exc, e)
# walk through all the nodes and scan for i18n: stuff
while 1:
node = self.next_node(dom)
if not node:
if node.nodeType == 1:
attrs = node.attributes
translate = attrs.getNamedItem('i18n:translate')
if translate:
msgid = translate.value
if not msgid.strip():
msgid = self.extract_text(node)
pox.add(msgid, filename)
attributes = attrs.getNamedItem('i18n:attributes')
if attributes:
attributes = [a.strip() for a in
for attr in attributes:
attritem = attrs.getNamedItem(attr)
if not attritem:
raise AttributeError, \
'No %s on %s in %s' % (
attr, node.nodeName, filename)
msgid = attritem.value;
pox.add(msgid, filename)
def extract_text(self, node):
xml = ''
for child in node.childNodes:
xml += child.toxml().strip().replace('\n', ' ').replace('\t', ' ')
while xml.find(' ') > -1:
xml = xml.replace(' ', ' ')
return xml
def next_node(self, dom):
if not self._current or self._current.ownerDocument != dom:
self._current = dom.documentElement
cur = self._current
if cur.hasChildNodes():
self._current = cur.childNodes[0]
elif cur != cur.parentNode.lastChild:
self._current = cur.nextSibling
self._current = cur.parentNode.nextSibling
return self._current
class JSParser:
"""scans JS files for _() calls"""
def __init__(self, files, pox):
for file in files:
self.parse_file(file, pox)
_startfuncreg = re.compile('.*?[^a-zA-Z0-9_]_\(')
_startfuncreg_2 = re.compile('^_\(')
def parse_file(self, filename, pox):
lines = open(filename).readlines()
lineno = 0
more = False
chunks = []
for line in lines:
lineno += 1
if more is True or
chunk, more = self._get_func_content(line, filename,
lineno, more)
if chunks and more is False:
literal = ''.join(chunks).strip()
if not literal:
raise ValueError, ('Unrecognized function content -- '
'file %s, line %s' % (
filename, lineno))
literal = literal.replace('\t', ' ').replace('\n', ' ')
while literal.find(' ') > -1:
literal = literal.replace(' ', ' ')
more = False
chunks = []
pox.add(literal, filename)
def _get_func_content(self, line, filename, lineno, more=False):
"""return the content of the _() call in line
if more is True, this will assume the function is already opened
and continue adding to the result from the start of the line
without searching for '[^a-zA-Z_]_(' first
returns a tuple (content, more) where more is True if the end of
the function body is not reached, in that case this method should
be called again with the 'more' argument set to True
line = line.strip()
if not more:
match = or \
line = line.replace(, '')
line = line.strip()
quote = line[0]
line = line[1:]
if not quote in ['"', "'"]:
raise ValueError, ('beginning of function body not a recognized '
'quote character: %s -- (file %s, line %s)' % (
quote, filename, lineno))
ret = []
previous_char = None
while 1:
new_char = line[0]
line = line[1:]
if new_char == quote:
if previous_char != '\\':
previous_char = new_char
# find out if we should continue after this (do we have a '+'
# or a ');'?)
more = False
line = line.strip()
if line and line[0] == '+':
line = line[1:].strip()
if line:
raise ValueError, ('string concatenation only allowed for '
'multiline strings, not for variable '
'interpolation (use ${} instead) -- '
'(file %s, line %s)' % (
filename, lineno))
more = True
return ''.join(ret), more
if __name__ == '__main__':
print >>stderr, 'POX extract v0.1'
print >>stderr, '(c) Guido Wesdorp 2004'
files = sys.argv[1:]
print >>stderr, 'Going to parse files', ', '.join(files)
pox = POX()
xml = [f for f in files if not f.endswith('.js')]
js = [f for f in files if f.endswith('.js')]
XMLParser(xml, pox)
JSParser(js, pox)
pres = pox.get_result()
pres = pres.replace('<catalog>',
('<catalog xmlns:i18n="" '
print pres
print >>stderr, 'Done'