| Home | Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2006-2009 Zuza Software Foundation
5 #
6 # This file is part of the Translate Toolkit.
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, see <http://www.gnu.org/licenses/>.
20
21 """Parent class for LISA standards (TMX, TBX, XLIFF)"""
22
23 import re
24
25 from translate.storage import base
26 from translate.lang import data
27 try:
28 from lxml import etree
29 from translate.misc.xml_helpers import *
30 except ImportError, e:
31 raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.")
32
33
35 """generate match objects for all L{re_obj} matches in L{text}."""
36 start = 0
37 max = len(text)
38 while start < max:
39 m = re_obj.search(text, start)
40 if not m:
41 break
42 yield m
43 start = m.end()
44
45 #TODO: we can now do better with our proper placeables support
46 placeholders = ['(%[diouxXeEfFgGcrs])', r'(\\+.?)', '(%[0-9]$lx)', '(%[0-9]\$[a-z])', '(<.+?>)']
47 re_placeholders = [re.compile(ph) for ph in placeholders]
49 'return list of regexp matchobjects for with all place holders in the L{text}'
50 matches = []
51 for re_ph in re_placeholders:
52 matches.extend(list(_findAllMatches(text, re_ph)))
53
54 # sort them so they come sequentially
55 matches.sort(lambda a, b: cmp(a.start(), b.start()))
56 return matches
57
58
60 """
61 A single unit in the file. Provisional work is done to make several
62 languages possible.
63 """
64
65 #The name of the root element of this unit type:(termEntry, tu, trans-unit)
66 rootNode = ""
67 #The name of the per language element of this unit type:(termEntry, tu, trans-unit)
68 languageNode = ""
69 #The name of the innermost element of this unit type:(term, seg)
70 textNode = ""
71
72 namespace = None
73 _default_xml_space = "preserve"
74 """The default handling of spacing in the absense of an xml:space attribute.
75
76 This is mostly for correcting XLIFF behaviour."""
77
79 """Constructs a unit containing the given source string"""
80 if empty:
81 self._rich_source = None
82 self._rich_target = None
83 return
84 self.xmlelement = etree.Element(self.rootNode)
85 #add descrip, note, etc.
86 super(LISAunit, self).__init__(source)
87
89 """Compares two units"""
90 if not isinstance(other, LISAunit):
91 return super(LISAunit, self).__eq__(other)
92 languageNodes = self.getlanguageNodes()
93 otherlanguageNodes = other.getlanguageNodes()
94 if len(languageNodes) != len(otherlanguageNodes):
95 return False
96 for i in range(len(languageNodes)):
97 mytext = self.getNodeText(languageNodes[i], getXMLspace(self.xmlelement, self._default_xml_space))
98 othertext = other.getNodeText(otherlanguageNodes[i], getXMLspace(self.xmlelement, self._default_xml_space))
99 if mytext != othertext:
100 #TODO:^ maybe we want to take children and notes into account
101 return False
102 return True
103
105 """Returns name in Clark notation.
106
107 For example namespaced("source") in an XLIFF document might return::
108 {urn:oasis:names:tc:xliff:document:1.1}source
109 This is needed throughout lxml.
110 """
111 return namespaced(self.namespace, name)
112
114 languageNodes = self.getlanguageNodes()
115 if len(languageNodes) > 0:
116 self.xmlelement[0] = dom_node
117 else:
118 self.xmlelement.append(dom_node)
119
122 source_dom = property(get_source_dom, set_source_dom)
123
125 text = data.forceunicode(text)
126 self.source_dom = self.createlanguageNode(sourcelang, text, "source")
127
130 source = property(getsource, setsource)
131
133 languageNodes = self.getlanguageNodes()
134 assert len(languageNodes) > 0
135 if dom_node is not None:
136 if append or len(languageNodes) == 0:
137 self.xmlelement.append(dom_node)
138 else:
139 self.xmlelement.insert(1, dom_node)
140 if not append and len(languageNodes) > 1:
141 self.xmlelement.remove(languageNodes[1])
142
144 if lang:
145 return self.getlanguageNode(lang=lang)
146 else:
147 return self.getlanguageNode(lang=None, index=1)
148 target_dom = property(get_target_dom)
149
151 #XXX: we really need the language - can't really be optional, and we
152 # need to propagate it
153 """Sets the "target" string (second language), or alternatively appends to the list"""
154 text = data.forceunicode(text)
155 #Firstly deal with reinitialising to None or setting to identical string
156 if self.gettarget() == text:
157 return
158 languageNode = self.get_target_dom(None)
159 if not text is None:
160 if languageNode is None:
161 languageNode = self.createlanguageNode(lang, text, "target")
162 self.set_target_dom(languageNode, append)
163 else:
164 if self.textNode:
165 terms = languageNode.iter(self.namespaced(self.textNode))
166 try:
167 languageNode = terms.next()
168 except StopIteration, e:
169 pass
170 languageNode.text = text
171 else:
172 self.set_target_dom(None, False)
173
175 """retrieves the "target" text (second entry), or the entry in the
176 specified language, if it exists"""
177 return self.getNodeText(self.get_target_dom(lang), getXMLspace(self.xmlelement, self._default_xml_space))
178 target = property(gettarget, settarget)
179
181 """Returns a xml Element setup with given parameters to represent a
182 single language entry. Has to be overridden."""
183 return None
184
186 """Create the text node in parent containing all the ph tags"""
187 matches = _getPhMatches(text)
188 if not matches:
189 parent.text = text
190 return
191
192 # Now we know there will definitely be some ph tags
193 start = matches[0].start()
194 pretext = text[:start]
195 if pretext:
196 parent.text = pretext
197 lasttag = parent
198 for i, m in enumerate(matches):
199 #pretext
200 pretext = text[start:m.start()]
201 # this will never happen with the first ph tag
202 if pretext:
203 lasttag.tail = pretext
204 #ph node
205 phnode = etree.SubElement(parent, "ph")
206 phnode.set("id", str(i+1))
207 phnode.text = m.group()
208 lasttag = phnode
209 start = m.end()
210 #post text
211 if text[start:]:
212 lasttag.tail = text[start:]
213
215 """Returns a list of all nodes that contain per language information."""
216 return list(self.xmlelement.iterchildren(self.namespaced(self.languageNode)))
217
219 """Retrieves a languageNode either by language or by index"""
220 if lang is None and index is None:
221 raise KeyError("No criterea for languageNode given")
222 languageNodes = self.getlanguageNodes()
223 if lang:
224 for set in languageNodes:
225 if getXMLlang(set) == lang:
226 return set
227 else:#have to use index
228 if index >= len(languageNodes):
229 return None
230 else:
231 return languageNodes[index]
232 return None
233
235 """Retrieves the term from the given languageNode"""
236 if languageNode is None:
237 return None
238 if self.textNode:
239 terms = languageNode.iterdescendants(self.namespaced(self.textNode))
240 if terms is None:
241 return None
242 else:
243 return getText(terms.next(), xml_space)
244 else:
245 return getText(languageNode, xml_space)
246
249
252
253 xid = property(lambda self: self.xmlelement.attrib[self.namespaced('xid')],
254 lambda self, value: self._set_property(self.namespaced('xid'), value))
255
256 rid = property(lambda self: self.xmlelement.attrib[self.namespaced('rid')],
257 lambda self, value: self._set_property(self.namespaced('rid'), value))
258
263 createfromxmlElement = classmethod(createfromxmlElement)
264
266 """A class representing a file store for one of the LISA file formats."""
267 UnitClass = LISAunit
268 #The root node of the XML document:
269 rootNode = ""
270 #The root node of the content section:
271 bodyNode = ""
272 #The XML skeleton to use for empty construction:
273 XMLskeleton = ""
274
275 namespace = None
276
278 super(LISAfile, self).__init__(unitclass=unitclass)
279 if inputfile is not None:
280 self.parse(inputfile)
281 assert self.document.getroot().tag == self.namespaced(self.rootNode)
282 else:
283 # We strip out newlines to ensure that spaces in the skeleton doesn't
284 # interfere with the the pretty printing of lxml
285 self.parse(self.XMLskeleton.replace("\n", ""))
286 self.setsourcelanguage(sourcelanguage)
287 self.settargetlanguage(targetlanguage)
288 self.addheader()
289 self._encoding = "UTF-8"
290
294
296 """Returns name in Clark notation.
297
298 For example namespaced("source") in an XLIFF document might return::
299 {urn:oasis:names:tc:xliff:document:1.1}source
300 This is needed throughout lxml.
301 """
302 return namespaced(self.namespace, name)
303
305 """Initialises self.body so it never needs to be retrieved from the XML again."""
306 self.namespace = self.document.getroot().nsmap.get(None, None)
307 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
308
310 #TODO: miskien moet hierdie eerder addsourcestring of iets genoem word?
311 """Adds and returns a new unit with the given string as first entry."""
312 newunit = self.UnitClass(source)
313 self.addunit(newunit)
314 return newunit
315
317 unit.namespace = self.namespace
318 super(LISAfile, self).addunit(unit)
319 if new:
320 self.body.append(unit.xmlelement)
321
323 """Converts to a string containing the file's XML"""
324 return etree.tostring(self.document, pretty_print=True, xml_declaration=True, encoding='utf-8')
325
327 """Populates this object from the given xml string"""
328 if not hasattr(self, 'filename'):
329 self.filename = getattr(xml, 'name', '')
330 if hasattr(xml, "read"):
331 xml.seek(0)
332 posrc = xml.read()
333 xml = posrc
334 if etree.LXML_VERSION >= (2, 1, 0):
335 #Since version 2.1.0 we can pass the strip_cdata parameter to
336 #indicate that we don't want cdata to be converted to raw XML
337 parser = etree.XMLParser(strip_cdata=False)
338 else:
339 parser = etree.XMLParser()
340 self.document = etree.fromstring(xml, parser).getroottree()
341 self._encoding = self.document.docinfo.encoding
342 self.initbody()
343 assert self.document.getroot().tag == self.namespaced(self.rootNode)
344 for entry in self.body.iterdescendants(self.namespaced(self.UnitClass.rootNode)):
345 term = self.UnitClass.createfromxmlElement(entry)
346 self.addunit(term, new=False)
347
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Mon Feb 1 16:44:42 2010 | http://epydoc.sourceforge.net |