1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Parent class for LISA standards (TMX, TBX, XLIFF)"""
22
23 import re
24
25 from translate.storage import base
26 from translate.lang import data
27 try:
28 from lxml import etree
29 from translate.misc.xml_helpers import getText, getXMLlang, setXMLlang, getXMLspace, setXMLspace, namespaced
30 except ImportError, e:
31 raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.")
32
33
35 """generate match objects for all L{re_obj} matches in L{text}."""
36 start = 0
37 max = len(text)
38 while start < max:
39 m = re_obj.search(text, start)
40 if not m:
41 break
42 yield m
43 start = m.end()
44
45
46 placeholders = ['(%[diouxXeEfFgGcrs])', r'(\\+.?)', '(%[0-9]$lx)', '(%[0-9]\$[a-z])', '(<.+?>)']
47 re_placeholders = [re.compile(ph) for ph in placeholders]
57
58
60 """
61 A single unit in the file. Provisional work is done to make several
62 languages possible.
63 """
64
65
66 rootNode = ""
67
68 languageNode = ""
69
70 textNode = ""
71
72 namespace = None
73 _default_xml_space = "preserve"
74 """The default handling of spacing in the absense of an xml:space attribute.
75
76 This is mostly for correcting XLIFF behaviour."""
77
78 - def __init__(self, source, empty=False, **kwargs):
79 """Constructs a unit containing the given source string"""
80 self._rich_source = None
81 self._rich_target = None
82 if empty:
83 return
84 self.xmlelement = etree.Element(self.namespaced(self.rootNode))
85
86 super(LISAunit, self).__init__(source)
87
103
105 """Returns name in Clark notation.
106
107 For example namespaced("source") in an XLIFF document might return::
108 {urn:oasis:names:tc:xliff:document:1.1}source
109 This is needed throughout lxml.
110 """
111 return namespaced(self.namespace, name)
112
114 languageNodes = self.getlanguageNodes()
115 if len(languageNodes) > 0:
116 self.xmlelement.replace(languageNodes[0], dom_node)
117 else:
118 self.xmlelement.append(dom_node)
119
122 source_dom = property(get_source_dom, set_source_dom)
123
129
132 source = property(getsource, setsource)
133
135 languageNodes = self.getlanguageNodes()
136 assert len(languageNodes) > 0
137 if dom_node is not None:
138 if append or len(languageNodes) == 0:
139 self.xmlelement.append(dom_node)
140 else:
141 self.xmlelement.insert(1, dom_node)
142 if not append and len(languageNodes) > 1:
143 self.xmlelement.remove(languageNodes[1])
144
150 target_dom = property(get_target_dom)
151
152 - def settarget(self, text, lang='xx', append=False):
177
182 target = property(gettarget, settarget)
183
185 """Returns a xml Element setup with given parameters to represent a
186 single language entry. Has to be overridden."""
187 return None
188
217
219 """Returns a list of all nodes that contain per language information."""
220 return list(self.xmlelement.iterchildren(self.namespaced(self.languageNode)))
221
223 """Retrieves a languageNode either by language or by index"""
224 if lang is None and index is None:
225 raise KeyError("No criterea for languageNode given")
226 languageNodes = self.getlanguageNodes()
227 if lang:
228 for set in languageNodes:
229 if getXMLlang(set) == lang:
230 return set
231 else:
232 if index >= len(languageNodes):
233 return None
234 else:
235 return languageNodes[index]
236 return None
237
238 - def getNodeText(self, languageNode, xml_space="preserve"):
239 """Retrieves the term from the given languageNode"""
240 if languageNode is None:
241 return None
242 if self.textNode:
243 terms = languageNode.iterdescendants(self.namespaced(self.textNode))
244 if terms is None:
245 return None
246 else:
247 return getText(terms.next(), xml_space)
248 else:
249 return getText(languageNode, xml_space)
250
252 return etree.tostring(self.xmlelement, pretty_print=True, encoding='utf-8')
253
256
257 xid = property(lambda self: self.xmlelement.attrib[self.namespaced('xid')],
258 lambda self, value: self._set_property(self.namespaced('xid'), value))
259
260 rid = property(lambda self: self.xmlelement.attrib[self.namespaced('rid')],
261 lambda self, value: self._set_property(self.namespaced('rid'), value))
262
264 term = cls(None, empty=True)
265 term.xmlelement = element
266 return term
267 createfromxmlElement = classmethod(createfromxmlElement)
268
270 """A class representing a file store for one of the LISA file formats."""
271 UnitClass = LISAunit
272
273 rootNode = ""
274
275 bodyNode = ""
276
277 XMLskeleton = ""
278
279 namespace = None
280
281 - def __init__(self, inputfile=None, sourcelanguage='en', targetlanguage=None, unitclass=None):
294
296 """Method to be overridden to initialise headers, etc."""
297 pass
298
300 """Returns name in Clark notation.
301
302 For example namespaced("source") in an XLIFF document might return::
303 {urn:oasis:names:tc:xliff:document:1.1}source
304 This is needed throughout lxml.
305 """
306 return namespaced(self.namespace, name)
307
308 - def initbody(self):
309 """Initialises self.body so it never needs to be retrieved from the XML again."""
310 self.namespace = self.document.getroot().nsmap.get(None, None)
311 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
312
314
315 """Adds and returns a new unit with the given string as first entry."""
316 newunit = self.UnitClass(source)
317 self.addunit(newunit)
318 return newunit
319
320 - def addunit(self, unit, new=True):
325
327 """Converts to a string containing the file's XML"""
328 return etree.tostring(self.document, pretty_print=True, xml_declaration=True, encoding='utf-8')
329
331 """Populates this object from the given xml string"""
332 if not hasattr(self, 'filename'):
333 self.filename = getattr(xml, 'name', '')
334 if hasattr(xml, "read"):
335 xml.seek(0)
336 posrc = xml.read()
337 xml = posrc
338 if etree.LXML_VERSION >= (2, 1, 0):
339
340
341 parser = etree.XMLParser(strip_cdata=False)
342 else:
343 parser = etree.XMLParser()
344 self.document = etree.fromstring(xml, parser).getroottree()
345 self._encoding = self.document.docinfo.encoding
346 self.initbody()
347 assert self.document.getroot().tag == self.namespaced(self.rootNode)
348 for entry in self.document.getroot().iterdescendants(self.namespaced(self.UnitClass.rootNode)):
349 term = self.UnitClass.createfromxmlElement(entry)
350 self.addunit(term, new=False)
351