[rkward-cvs] SF.net SVN: rkward-code:[4982] trunk/rkward/scripts/extract_plugin_messages .py
tfry at users.sf.net
tfry at users.sf.net
Sat Nov 1 16:35:22 UTC 2014
Revision: 4982
http://sourceforge.net/p/rkward/code/4982
Author: tfry
Date: 2014-11-01 16:35:21 +0000 (Sat, 01 Nov 2014)
Log Message:
-----------
Support extraction from inlined <script>s and work avoid bailing out on and other character entities that are defined in (X)HTML, but not XML.
Modified Paths:
--------------
trunk/rkward/scripts/extract_plugin_messages.py
Modified: trunk/rkward/scripts/extract_plugin_messages.py
===================================================================
--- trunk/rkward/scripts/extract_plugin_messages.py 2014-10-31 13:40:23 UTC (rev 4981)
+++ trunk/rkward/scripts/extract_plugin_messages.py 2014-11-01 16:35:21 UTC (rev 4982)
@@ -39,6 +39,15 @@
if (len (toplevel_sources) < 1):
usage ()
+# For crying out loud! So we are not strictly using XML, because we allow the use of (X)HTML entities, esp. inside <text>-elements,
+# without formally declaring these entities. Python seems to make a point of making it real hard to deal with this. So what we do is
+# escaping all entities before parsing, then passing all through HTMLParser.unescape () before writing the output.
+def parseFile (filename):
+ f = codecs.open (filename, 'r', 'utf-8')
+ content = f.read ()
+ f.close ()
+ return minidom.parseString (content.replace ("&", "&"))
+
# Where available, include the labels of parent elements. Particularly helpful for radio-options
def getElementShort (element, dot_attribute=""):
ret = "<" + element.tagName
@@ -56,13 +65,12 @@
ret += "i18n: ectx: "
if (infile["caption"] != ""):
ret += "(" + infile["caption"] + ") "
+ refer_to = ""
if ((element.tagName in referring_elements) and (element.hasAttribute ("id"))):
if (not (element.getAttribute ("id") in infile["id_labels"])):
sys.stderr.write ("WARNING in " + infile["infile"] + ": Reference to unknown element id '" + element.getAttribute ("id") + "'")
else:
refer_to = " (refers to element labelled " + quote (infile["id_labels"][element.getAttribute ("id")]) + ")"
- else:
- refer_to = ""
tag_stack = [getElementShort (element, attribute)]
while ((element.parentNode.nodeType != element.DOCUMENT_NODE)):
element = element.parentNode
@@ -92,13 +100,13 @@
for cn in element.childNodes:
if cn.nodeType != cn.COMMENT_NODE:
rc.append(cn.toxml ("utf-8"))
- return ''.join (rc).strip ()
+ return ''.join (rc).strip ().replace ("&", "&")
# get the content of all text nodes inside this node (does not include xml tags)
def getText (node):
rc = []
for cn in node.childNodes:
- if cn.nodeType == cn.TEXT_NODE:
+ if cn.nodeType in [cn.TEXT_NODE, cn.CDATA_SECTION_NODE]:
rc.append(cn.data)
return ''.join (rc).strip ()
@@ -131,7 +139,9 @@
jsfile.close ()
else:
handleSubFile (filename, node.tagName == "component")
- if (node.tagName in text_containers):
+ if (node.tagName == "script"):
+ handleJSChunk (getText (node), infile["infile"], -1, infile["caption"])
+ elif (node.tagName in text_containers):
textchunks = getFullText (node).split ("\n\n")
for chunk in textchunks:
outfile.write (getI18nComment (node))
@@ -279,7 +289,7 @@
text += "i18n: file: " + filename
if (offset >= 0):
text += ":" + str (offset + line + 1)
- text += "\ni18n: ectx: " + caption + " */"
+ text += "\ni18n: ectx: (" + caption + ") */"
text += call
outfile.write (text)
@@ -294,7 +304,8 @@
if (not os.path.isfile (filename)):
sys.stderr.write (getFileContext (node) + " WARNING: File " + filename + " does not exist\n")
return
- xmldoc = minidom.parse (filename)
+ print filename
+ xmldoc = parseFile (filename)
if (xmldoc.documentElement.hasAttribute ("po_id") and (xmldoc.documentElement.getAttribute ("po_id") != current_po_id)):
toplevel_sources.append (filename)
#sys.stderr.write ("Added " + filename + " to toplevel\n")
@@ -327,7 +338,7 @@
# NOTE: toplevel_sources may grow, dynamically, but only at the end.
i = 0
while i < len (toplevel_sources):
- xmldoc = minidom.parse (toplevel_sources[i])
+ xmldoc = parseFile (toplevel_sources[i])
po_id = xmldoc.documentElement.getAttribute ("po_id")
if (po_id == ""):
po_id = default_po
More information about the rkward-tracker
mailing list