| Home | Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2004-2008 Zuza Software Foundation
5 #
6 # This file is part of translate.
7 #
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
22 """This is a set of validation checks that can be performed on translation
23 units.
24
25 Derivatives of UnitChecker (like StandardUnitChecker) check translation units,
26 and derivatives of TranslationChecker (like StandardChecker) check
27 (source, target) translation pairs.
28
29 When adding a new test here, please document and explain the behaviour on the
30 U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}.
31 """
32
33 from translate.filters import helpers
34 from translate.filters import decoration
35 from translate.filters import prefilters
36 from translate.filters import spelling
37 from translate.lang import factory
38 from translate.lang import data
39 # The import of xliff could fail if the user doesn't have lxml installed. For
40 # now we try to continue gracefully to help users who aren't interested in
41 # support for XLIFF or other XML formats.
42 try:
43 from translate.storage import xliff
44 except ImportError, e:
45 xliff = None
46 # The import of xliff fail silently in the absence of lxml if another module
47 # already tried to import it unsuccessfully, so let's make 100% sure:
48 if not hasattr(xliff, "xliffunit"):
49 xliff = None
50 import re
51
52 # These are some regular expressions that are compiled for use in some tests
53
54 # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't cover everything we leave \w instead of specifying the exact letters as
55 # this should capture printf types defined in other platforms.
56 # extended to support Python named format specifiers
57 printf_pat = re.compile('%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#-]*(?:\d+)?(?:\.\d+)?(hh\|h\|l\|ll)?(?P<type>[\w%])))')
58
59 # The name of the XML tag
60 tagname_re = re.compile("<[\s]*([\w\/]*)")
61
62 # We allow escaped quotes, probably for old escaping style of OOo helpcontent
63 #TODO: remove escaped strings once usage is audited
64 property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))")
65
66 # The whole tag
67 tag_re = re.compile("<[^>]+>")
68
69 gconf_attribute_re = re.compile('"[a-z_]+?"')
70
72 """Returns the name of the XML/HTML tag in string"""
73 return tagname_re.match(string).groups(1)[0]
74
76 """Tests to see if pair == (a,b,c) is in list, but handles None entries in
77 list as wildcards (only allowed in positions "a" and "c"). We take a shortcut
78 by only considering "c" if "b" has already matched."""
79 a, b, c = pair
80 if (b, c) == (None, None):
81 #This is a tagname
82 return pair
83 for pattern in list:
84 x, y, z = pattern
85 if (x, y) in [(a, b), (None, b)]:
86 if z in [None, c]:
87 return pattern
88 return pair
89
91 """Returns all the properties in the XML/HTML tag string as
92 (tagname, propertyname, propertyvalue), but ignore those combinations
93 specified in ignore."""
94 properties = []
95 for string in strings:
96 tag = tagname(string)
97 properties += [(tag, None, None)]
98 #Now we isolate the attribute pairs.
99 pairs = property_re.findall(string)
100 for property, value, a, b in pairs:
101 #Strip the quotes:
102 value = value[1:-1]
103
104 canignore = False
105 if (tag, property, value) in ignore or \
106 intuplelist((tag,property,value), ignore) != (tag,property,value):
107 canignore = True
108 break
109 if not canignore:
110 properties += [(tag, property, value)]
111 return properties
112
113
115 """This exception signals that a Filter didn't pass, and gives an explanation
116 or a comment"""
118 if not isinstance(messages, list):
119 messages = [messages]
120 assert isinstance(messages[0], unicode) # Assumption: all of same type
121 joined = u", ".join(messages)
122 Exception.__init__(self, joined)
123 # Python 2.3 doesn't have .args
124 if not hasattr(self, "args"):
125 self.args = joined
126
128 """This exception signals that a Filter didn't pass, and the bad translation
129 might break an application (so the string will be marked fuzzy)"""
130 pass
131
132 #(tag, attribute, value) specifies a certain attribute which can be changed/
133 #ignored if it exists inside tag. In the case where there is a third element
134 #in the tuple, it indicates a property value that can be ignored if present
135 #(like defaults, for example)
136 #If a certain item is None, it indicates that it is relevant for all values of
137 #the property/tag that is specified as None. A non-None value of "value"
138 #indicates that the value of the attribute must be taken into account.
139 common_ignoretags = [(None, "xml-lang", None)]
140 common_canchangetags = [("img", "alt", None), (None, "title", None)]
141 # Actually the title tag is allowed on many tags in HTML (but probably not all)
142
144 """object representing the configuration of a checker"""
145 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None,
146 notranslatewords=None, musttranslatewords=None, validchars=None,
147 punctuation=None, endpunctuation=None, ignoretags=None,
148 canchangetags=None, criticaltests=None, credit_sources=None):
149 # Init lists
150 self.accelmarkers = self._init_list(accelmarkers)
151 self.varmatches = self._init_list(varmatches)
152 self.criticaltests = self._init_list(criticaltests)
153 self.credit_sources = self._init_list(credit_sources)
154 # Lang data
155 self.targetlanguage = targetlanguage
156 self.updatetargetlanguage(targetlanguage)
157 self.sourcelang = factory.getlanguage('en')
158 # Inits with default values
159 self.punctuation = self._init_default(data.normalized_unicode(punctuation), self.lang.punctuation)
160 self.endpunctuation = self._init_default(data.normalized_unicode(endpunctuation), self.lang.sentenceend)
161 self.ignoretags = self._init_default(ignoretags, common_ignoretags)
162 self.canchangetags = self._init_default(canchangetags, common_canchangetags)
163 # Other data
164 # TODO: allow user configuration of untranslatable words
165 self.notranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(notranslatewords)])
166 self.musttranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(musttranslatewords)])
167 validchars = data.normalized_unicode(validchars)
168 self.validcharsmap = {}
169 self.updatevalidchars(validchars)
170
172 """initialise configuration paramaters that are lists
173
174 @type list: List
175 @param list: None (we'll initialise a blank list) or a list paramater
176 @rtype: List
177 """
178 if list is None:
179 list = []
180 return list
181
183 """initialise parameters that can have default options
184
185 @param param: the user supplied paramater value
186 @param default: default values when param is not specified
187 @return: the paramater as specified by the user of the default settings
188 """
189 if param is None:
190 return default
191 return param
192
194 """combines the info in otherconfig into this config object"""
195 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage
196 self.updatetargetlanguage(self.targetlanguage)
197 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers])
198 self.varmatches.extend(otherconfig.varmatches)
199 self.notranslatewords.update(otherconfig.notranslatewords)
200 self.musttranslatewords.update(otherconfig.musttranslatewords)
201 self.validcharsmap.update(otherconfig.validcharsmap)
202 self.punctuation += otherconfig.punctuation
203 self.endpunctuation += otherconfig.endpunctuation
204 #TODO: consider also updating in the following cases:
205 self.ignoretags = otherconfig.ignoretags
206 self.canchangetags = otherconfig.canchangetags
207 self.criticaltests.extend(otherconfig.criticaltests)
208 self.credit_sources = otherconfig.credit_sources
209
211 """updates the map that eliminates valid characters"""
212 if validchars is None:
213 return True
214 validcharsmap = dict([(ord(validchar), None) for validchar in data.normalized_unicode(validchars)])
215 self.validcharsmap.update(validcharsmap)
216
218 """Updates the target language in the config to the given target language"""
219 self.lang = factory.getlanguage(langcode)
220
222 def cached_f(self, param1):
223 key = (f.__name__, param1)
224 res_cache = self.results_cache
225 if key in res_cache:
226 return res_cache[key]
227 else:
228 value = f(self, param1)
229 res_cache[key] = value
230 return value
231 return cached_f
232
234 """Parent Checker class which does the checking based on functions available
235 in derived classes."""
236 preconditions = {}
237
238 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
239 self.errorhandler = errorhandler
240 if checkerconfig is None:
241 self.setconfig(CheckerConfig())
242 else:
243 self.setconfig(checkerconfig)
244 # exclude functions defined in UnitChecker from being treated as tests...
245 self.helperfunctions = {}
246 for functionname in dir(UnitChecker):
247 function = getattr(self, functionname)
248 if callable(function):
249 self.helperfunctions[functionname] = function
250 self.defaultfilters = self.getfilters(excludefilters, limitfilters)
251
252 self.results_cache = {}
253
255 """returns dictionary of available filters, including/excluding those in
256 the given lists"""
257 filters = {}
258 if limitfilters is None:
259 # use everything available unless instructed
260 limitfilters = dir(self)
261 if excludefilters is None:
262 excludefilters = {}
263 for functionname in limitfilters:
264 if functionname in excludefilters: continue
265 if functionname in self.helperfunctions: continue
266 if functionname == "errorhandler": continue
267 filterfunction = getattr(self, functionname, None)
268 if not callable(filterfunction): continue
269 filters[functionname] = filterfunction
270 return filters
271
273 """sets the accelerator list"""
274 self.config = config
275 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers]
276 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname)
277 for startmatch, endmatch in self.config.varmatches]
278 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, prefilters.varnone)
279 for startmatch, endmatch in self.config.varmatches]
280
282 """Sets the filename that a checker should use for evaluating suggestions."""
283 self.suggestion_store = store
284 if self.suggestion_store:
285 self.suggestion_store.require_index()
286
290 filtervariables = cache_results(filtervariables)
291
295 removevariables = cache_results(removevariables)
296
298 """filter out accelerators from str1"""
299 return helpers.multifilter(str1, self.accfilters, None)
300 filteraccelerators = cache_results(filteraccelerators)
301
303 """filter out accelerators from str1"""
304 return helpers.multifilter(str1, self.accfilters, acceptlist)
305
307 """replaces words with punctuation with their unpunctuated equivalents"""
308 return prefilters.filterwordswithpunctuation(str1)
309 filterwordswithpunctuation = cache_results(filterwordswithpunctuation)
310
314 filterxml = cache_results(filterxml)
315
317 """Runs the given test on the given unit.
318
319 Note that this can raise a FilterFailure as part of normal operation"""
320 return test(unit)
321
323 """run all the tests in this suite, return failures as testname, message_or_exception"""
324 self.results_cache = {}
325 failures = {}
326 ignores = self.config.lang.ignoretests[:]
327 functionnames = self.defaultfilters.keys()
328 priorityfunctionnames = self.preconditions.keys()
329 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames)
330 for functionname in priorityfunctionnames + otherfunctionnames:
331 if functionname in ignores:
332 continue
333 filterfunction = getattr(self, functionname, None)
334 # this filterfunction may only be defined on another checker if using TeeChecker
335 if filterfunction is None:
336 continue
337 filtermessage = filterfunction.__doc__
338 try:
339 filterresult = self.run_test(filterfunction, unit)
340 except FilterFailure, e:
341 filterresult = False
342 filtermessage = e.args[0]
343 except Exception, e:
344 if self.errorhandler is None:
345 raise ValueError("error in filter %s: %r, %r, %s" % \
346 (functionname, unit.source, unit.target, e))
347 else:
348 filterresult = self.errorhandler(functionname, unit.source, unit.target, e)
349 if not filterresult:
350 # we test some preconditions that aren't actually a cause for failure
351 if functionname in self.defaultfilters:
352 failures[functionname] = filtermessage
353 if functionname in self.preconditions:
354 for ignoredfunctionname in self.preconditions[functionname]:
355 ignores.append(ignoredfunctionname)
356 self.results_cache = {}
357 return failures
358
360 """A checker that passes source and target strings to the checks, not the
361 whole unit.
362
363 This provides some speedup and simplifies testing."""
364 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
365 super(TranslationChecker, self).__init__(checkerconfig, excludefilters, limitfilters, errorhandler)
366
368 """Runs the given test on the given unit.
369
370 Note that this can raise a FilterFailure as part of normal operation."""
371 if self.hasplural:
372 filtermessages = []
373 filterresult = True
374 for pluralform in unit.target.strings:
375 try:
376 if not test(self.str1, unicode(pluralform)):
377 filterresult = False
378 except FilterFailure, e:
379 filterresult = False
380 filtermessages.append( unicode(e.args) )
381 if not filterresult and filtermessages:
382 raise FilterFailure(filtermessages)
383 else:
384 return filterresult
385 else:
386 return test(self.str1, self.str2)
387
389 """Do some optimisation by caching some data of the unit for the benefit
390 of run_test()."""
391 self.str1 = data.normalized_unicode(unit.source) or u""
392 self.str2 = data.normalized_unicode(unit.target) or u""
393 self.hasplural = unit.hasplural()
394 self.locations = unit.getlocations()
395 return super(TranslationChecker, self).run_filters(unit)
396
398 """A Checker that controls multiple checkers."""
399 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None,
400 checkerclasses=None, errorhandler=None, languagecode=None):
401 """construct a TeeChecker from the given checkers"""
402 self.limitfilters = limitfilters
403 if checkerclasses is None:
404 checkerclasses = [StandardChecker]
405 self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses]
406 if languagecode:
407 for checker in self.checkers:
408 checker.config.updatetargetlanguage(languagecode)
409 # Let's hook up the language specific checker
410 lang_checker = self.checkers[0].config.lang.checker
411 if lang_checker:
412 self.checkers.append(lang_checker)
413
414 self.combinedfilters = self.getfilters(excludefilters, limitfilters)
415 self.config = checkerconfig or self.checkers[0].config
416
418 """returns dictionary of available filters, including/excluding those in
419 the given lists"""
420 if excludefilters is None:
421 excludefilters = {}
422 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers]
423 self.combinedfilters = {}
424 for filters in filterslist:
425 self.combinedfilters.update(filters)
426 # TODO: move this somewhere more sensible (a checkfilters method?)
427 if limitfilters is not None:
428 for filtername in limitfilters:
429 if not filtername in self.combinedfilters:
430 import sys
431 print >> sys.stderr, "warning: could not find filter %s" % filtername
432 return self.combinedfilters
433
435 """run all the tests in the checker's suites"""
436 failures = {}
437 for checker in self.checkers:
438 failures.update(checker.run_filters(unit))
439 return failures
440
442 """Sets the filename that a checker should use for evaluating suggestions."""
443 for checker in self.checkers:
444 checker.setsuggestionstore(store)
445
446
448 """The basic test suite for source -> target translations."""
450 """checks whether a string has been translated at all"""
451 str2 = prefilters.removekdecomments(str2)
452 return not (len(str1.strip()) > 0 and len(str2) == 0)
453
455 """checks whether a translation is basically identical to the original string"""
456 str1 = self.filteraccelerators(self.removevariables(str1)).strip()
457 str2 = self.filteraccelerators(self.removevariables(str2)).strip()
458 if len(str1) < 2:
459 return True
460 # If the whole string is upperase, or nothing in the string can go
461 # towards uppercase, let's assume there is nothing translatable
462 # TODO: reconsider
463 if (str1.isupper() or str1.upper() == str1) and str1 == str2:
464 return True
465 if self.config.notranslatewords:
466 words1 = str1.split()
467 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]:
468 #currently equivalent to:
469 # if len(words1) == 1 and words1[0] in self.config.notranslatewords:
470 #why do we only test for one notranslate word?
471 return True
472 # we could also check for things like str1.isnumeric(), but the test
473 # above (str1.upper() == str1) makes this unnecessary
474 if str1.lower() == str2.lower():
475 raise FilterFailure(u"please translate")
476 return True
477
479 """checks whether a translation only contains spaces"""
480 len1 = len(str1.strip())
481 len2 = len(str2.strip())
482 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
483
485 """checks whether a translation is much shorter than the original string"""
486 len1 = len(str1.strip())
487 len2 = len(str2.strip())
488 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
489
491 """checks whether a translation is much longer than the original string"""
492 len1 = len(str1.strip())
493 len2 = len(str2.strip())
494 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
495
497 """checks whether escaping is consistent between the two strings"""
498 if not helpers.countsmatch(str1, str2, (u"\\", u"\\\\")):
499 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if u"\\" in word])
500 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if u"\\" in word])
501 raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2))
502 else:
503 return True
504
506 """checks whether newlines are consistent between the two strings"""
507 if not helpers.countsmatch(str1, str2, (u"\n", u"\r")):
508 raise FilterFailure(u"line endings in original don't match line endings in translation")
509 else:
510 return True
511
513 """checks whether tabs are consistent between the two strings"""
514 if not helpers.countmatch(str1, str2, "\t"):
515 raise SeriousFilterFailure(u"tabs in original don't match tabs in translation")
516 else:
517 return True
518
520 """checks whether singlequoting is consistent between the two strings"""
521 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1)))
522 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2)))
523 return helpers.countsmatch(str1, str2, (u"'", u"''", u"\\'"))
524
526 """checks whether doublequoting is consistent between the two strings"""
527 str1 = self.filteraccelerators(self.filtervariables(str1))
528 str1 = self.filterxml(str1)
529 str1 = self.config.lang.punctranslate(str1)
530 str2 = self.filteraccelerators(self.filtervariables(str2))
531 str2 = self.filterxml(str2)
532 return helpers.countsmatch(str1, str2, (u'"', u'""', u'\\"', u"«", u"»", u"“", u"”"))
533
535 """checks for bad double-spaces by comparing to original"""
536 str1 = self.filteraccelerators(str1)
537 str2 = self.filteraccelerators(str2)
538 return helpers.countmatch(str1, str2, u" ")
539
541 """checks for bad spacing after punctuation"""
542 if str1.find(u" ") == -1:
543 return True
544 str1 = self.filteraccelerators(self.filtervariables(str1))
545 str1 = self.config.lang.punctranslate(str1)
546 str2 = self.filteraccelerators(self.filtervariables(str2))
547 for puncchar in self.config.punctuation:
548 plaincount1 = str1.count(puncchar)
549 plaincount2 = str2.count(puncchar)
550 if not plaincount1 or plaincount1 != plaincount2:
551 continue
552 spacecount1 = str1.count(puncchar + u" ")
553 spacecount2 = str2.count(puncchar + u" ")
554 if spacecount1 != spacecount2:
555 # handle extra spaces that are because of transposed punctuation
556 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1-spacecount2) == 1:
557 continue
558 return False
559 return True
560
562 """checks whether printf format strings match"""
563 count1 = count2 = plural = None
564 # self.hasplural only set by run_filters, not always available
565 if 'hasplural' in self.__dict__:
566 plural = self.hasplural
567 for var_num2, match2 in enumerate(printf_pat.finditer(str2)):
568 count2 = var_num2 + 1
569 str2key = match2.group('key')
570 if match2.group('ord'):
571 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
572 count1 = var_num1 + 1
573 if int(match2.group('ord')) == var_num1 + 1:
574 if match2.group('fullvar') != match1.group('fullvar'):
575 return 0
576 elif str2key:
577 str1key = None
578 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
579 count1 = var_num1 + 1
580 if match1.group('key') and str2key == match1.group('key'):
581 str1key = match1.group('key')
582 # '%.0s' "placeholder" in plural will match anything
583 if plural and match2.group('fullvar') == '.0s':
584 continue
585 if match1.group('fullvar') != match2.group('fullvar'):
586 return 0
587 if str1key == None:
588 return 0
589 else:
590 for var_num1, match1 in enumerate(printf_pat.finditer(str1)):
591 count1 = var_num1 + 1
592 # '%.0s' "placeholder" in plural will match anything
593 if plural and match2.group('fullvar') == '.0s':
594 continue
595 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')):
596 return 0
597
598 if count2 is None:
599 if list(printf_pat.finditer(str1)):
600 return 0
601
602 if (count1 or count2) and (count1 != count2):
603 return 0
604 return 1
605
607 """checks whether accelerators are consistent between the two strings"""
608 str1 = self.filtervariables(str1)
609 str2 = self.filtervariables(str2)
610 messages = []
611 for accelmarker in self.config.accelmarkers:
612 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel)
613 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel)
614 count1, countbad1 = counter1(str1)
615 count2, countbad2 = counter2(str2)
616 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel)
617 accel2, bad2 = getaccel(str2)
618 if count1 == count2:
619 continue
620 if count1 == 1 and count2 == 0:
621 if countbad2 == 1:
622 messages.append(u"accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0]))
623 else:
624 messages.append(u"accelerator %s is missing from translation" % accelmarker)
625 elif count1 == 0:
626 messages.append(u"accelerator %s does not occur in original and should not be in translation" % accelmarker)
627 elif count1 == 1 and count2 > count1:
628 messages.append(u"accelerator %s is repeated in translation" % accelmarker)
629 else:
630 messages.append(u"accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2))
631 if messages:
632 if "accelerators" in self.config.criticaltests:
633 raise SeriousFilterFailure(messages)
634 else:
635 raise FilterFailure(messages)
636 return True
637
638 # def acceleratedvariables(self, str1, str2):
639 # """checks that no variables are accelerated"""
640 # messages = []
641 # for accelerator in self.config.accelmarkers:
642 # for variablestart, variableend in self.config.varmatches:
643 # error = accelerator + variablestart
644 # if str1.find(error) >= 0:
645 # messages.append(u"original has an accelerated variable")
646 # if str2.find(error) >= 0:
647 # messages.append(u"translation has an accelerated variable")
648 # if messages:
649 # raise FilterFailure(messages)
650 # return True
651
653 """checks whether variables of various forms are consistent between the two strings"""
654 messages = []
655 mismatch1, mismatch2 = [], []
656 varnames1, varnames2 = [], []
657 for startmarker, endmarker in self.config.varmatches:
658 varchecker = decoration.getvariables(startmarker, endmarker)
659 if startmarker and endmarker:
660 if isinstance(endmarker, int):
661 redecorate = lambda var: startmarker + var
662 else:
663 redecorate = lambda var: startmarker + var + endmarker
664 elif startmarker:
665 redecorate = lambda var: startmarker + var
666 else:
667 redecorate = lambda var: var
668 vars1 = varchecker(str1)
669 vars2 = varchecker(str2)
670 if vars1 != vars2:
671 # we use counts to compare so we can handle multiple variables
672 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)]
673 # filter variable names we've already seen, so they aren't matched by more than one filter...
674 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2]
675 varnames1.extend(vars1)
676 varnames2.extend(vars2)
677 vars1 = map(redecorate, vars1)
678 vars2 = map(redecorate, vars2)
679 mismatch1.extend(vars1)
680 mismatch2.extend(vars2)
681 if mismatch1:
682 messages.append(u"do not translate: %s" % u", ".join(mismatch1))
683 elif mismatch2:
684 messages.append(u"translation contains variables not in original: %s" % u", ".join(mismatch2))
685 if messages and mismatch1:
686 raise SeriousFilterFailure(messages)
687 elif messages:
688 raise FilterFailure(messages)
689 return True
690
692 """checks that function names are not translated"""
693 return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
694
696 """checks that emails are not translated"""
697 return helpers.funcmatch(str1, str2, decoration.getemails)
698
700 """checks that URLs are not translated"""
701 return helpers.funcmatch(str1, str2, decoration.geturls)
702
704 """checks whether numbers of various forms are consistent between the two strings"""
705 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
706
708 """checks whether whitespace at the beginning of the strings matches"""
709 return helpers.funcmatch(str1, str2, decoration.spacestart)
710
712 """checks whether whitespace at the end of the strings matches"""
713 str1 = self.config.lang.punctranslate(str1)
714 return helpers.funcmatch(str1, str2, decoration.spaceend)
715
717 """checks whether punctuation at the beginning of the strings match"""
718 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1)))
719 str1 = self.config.lang.punctranslate(str1)
720 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2)))
721 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
722
724 """checks whether punctuation at the end of the strings match"""
725 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1)))
726 str1 = self.config.lang.punctranslate(str1)
727 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2)))
728 str1 = str1.rstrip()
729 str2 = str2.rstrip()
730 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation + u":")
731
733 """checks that strings that are purely punctuation are not changed"""
734 # this test is a subset of startandend
735 if (decoration.ispurepunctuation(str1)):
736 return str1 == str2
737 else:
738 return not decoration.ispurepunctuation(str2)
739
741 """checks that the number of brackets in both strings match"""
742 str1 = self.filtervariables(str1)
743 str2 = self.filtervariables(str2)
744 messages = []
745 missing = []
746 extra = []
747 for bracket in (u"[", u"]", u"{", u"}", u"(", u")"):
748 count1 = str1.count(bracket)
749 count2 = str2.count(bracket)
750 if count2 < count1:
751 missing.append(u"'%s'" % bracket)
752 elif count2 > count1:
753 extra.append(u"'%s'" % bracket)
754 if missing:
755 messages.append(u"translation is missing %s" % u", ".join(missing))
756 if extra:
757 messages.append(u"translation has extra %s" % u", ".join(extra))
758 if messages:
759 raise FilterFailure(messages)
760 return True
761
763 """checks that the number of sentences in both strings match"""
764 str1 = self.filteraccelerators(str1)
765 str2 = self.filteraccelerators(str2)
766 sentences1 = len(self.config.sourcelang.sentences(str1))
767 sentences2 = len(self.config.lang.sentences(str2))
768 if not sentences1 == sentences2:
769 raise FilterFailure(u"The number of sentences differ: %d versus %d" % (sentences1, sentences2))
770 return True
771
773 """checks that options are not translated"""
774 str1 = self.filtervariables(str1)
775 for word1 in str1.split():
776 if word1 != u"--" and word1.startswith(u"--") and word1[-1].isalnum():
777 parts = word1.split(u"=")
778 if not parts[0] in str2:
779 raise FilterFailure(u"The option %s does not occur or is translated in the translation." % parts[0])
780 if len(parts) > 1 and parts[1] in str2:
781 raise FilterFailure(u"The parameter %(param)s in option %(option)s is not translated." % {"param": parts[1], "option": parts[0]})
782 return True
783
785 """checks that the message starts with the correct capitalisation"""
786 str1 = self.filteraccelerators(str1)
787 str2 = self.filteraccelerators(str2)
788 if len(str1) > 1 and len(str2) > 1:
789 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2)
790 if len(str1) == 0 and len(str2) == 0:
791 return True
792 if len(str1) == 0 or len(str2) == 0:
793 return False
794 return True
795
797 """checks the capitalisation of two strings isn't wildly different"""
798 str1 = self.removevariables(str1)
799 str2 = self.removevariables(str2)
800 # TODO: review this. The 'I' is specific to English, so it probably serves
801 # no purpose to get sourcelang.sentenceend
802 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, u" i ", str1)
803 capitals1 = helpers.filtercount(str1, unicode.isupper)
804 capitals2 = helpers.filtercount(str2, unicode.isupper)
805 alpha1 = helpers.filtercount(str1, unicode.isalpha)
806 alpha2 = helpers.filtercount(str2, unicode.isalpha)
807 # Capture the all caps case
808 if capitals1 == alpha1:
809 return capitals2 == alpha2
810 # some heuristic tests to try and see that the style of capitals is vaguely the same
811 if capitals1 == 0 or capitals1 == 1:
812 return capitals2 == capitals1
813 elif capitals1 < len(str1) / 10:
814 return capitals2 <= len(str2) / 8
815 elif len(str1) < 10:
816 return abs(capitals1 - capitals2) < 3
817 elif capitals1 > len(str1) * 6 / 10:
818 return capitals2 > len(str2) * 6 / 10
819 else:
820 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
821
823 """checks that acronyms that appear are unchanged"""
824 acronyms = []
825 allowed = []
826 for startmatch, endmatch in self.config.varmatches:
827 allowed += decoration.getvariables(startmatch, endmatch)(str1)
828 allowed += self.config.musttranslatewords.keys()
829 str1 = self.filteraccelerators(self.filtervariables(str1))
830 iter = self.config.lang.word_iter(str1)
831 str2 = self.filteraccelerators(self.filtervariables(str2))
832 #TODO: strip XML? - should provide better error messsages
833 # see mail/chrome/messanger/smime.properties.po
834 #TODO: consider limiting the word length for recognising acronyms to
835 #something like 5/6 characters
836 for word in iter:
837 if word.isupper() and len(word) > 1 and word not in allowed:
838 if str2.find(word) == -1:
839 acronyms.append(word)
840 if acronyms:
841 raise FilterFailure(u"acronyms should not be translated: " + u", ".join(acronyms))
842 return True
843
845 """checks for repeated words in the translation"""
846 lastword = ""
847 without_newlines = "\n".join(str2.split("\n"))
848 words = self.filteraccelerators(self.removevariables(without_newlines)).replace(u".", u"").lower().split()
849 for word in words:
850 if word == lastword and word not in self.config.lang.validdoublewords:
851 raise FilterFailure(u"The word '%s' is repeated" % word)
852 lastword = word
853 return True
854
856 """checks that words configured as untranslatable appear in the translation too"""
857 if not self.config.notranslatewords:
858 return True
859 str1 = self.filtervariables(str1)
860 str2 = self.filtervariables(str2)
861 #The above is full of strange quotes and things in utf-8 encoding.
862 #single apostrophe perhaps problematic in words like "doesn't"
863 for seperator in self.config.punctuation:
864 str1 = str1.replace(seperator, u" ")
865 str2 = str2.replace(seperator, u" ")
866 words1 = self.filteraccelerators(str1).split()
867 words2 = self.filteraccelerators(str2).split()
868 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2]
869 if stopwords:
870 raise FilterFailure(u"do not translate: %s" % (u", ".join(stopwords)))
871 return True
872
874 """checks that words configured as definitely translatable don't appear in
875 the translation"""
876 if not self.config.musttranslatewords:
877 return True
878 str1 = self.removevariables(str1)
879 str2 = self.removevariables(str2)
880 #The above is full of strange quotes and things in utf-8 encoding.
881 #single apostrophe perhaps problematic in words like "doesn't"
882 for seperator in self.config.punctuation:
883 str1 = str1.replace(seperator, u" ")
884 str2 = str2.replace(seperator, u" ")
885 words1 = self.filteraccelerators(str1).split()
886 words2 = self.filteraccelerators(str2).split()
887 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2]
888 if stopwords:
889 raise FilterFailure(u"please translate: %s" % (u", ".join(stopwords)))
890 return True
891
893 """checks that only characters specified as valid appear in the translation"""
894 if not self.config.validcharsmap:
895 return True
896 invalid1 = str1.translate(self.config.validcharsmap)
897 invalid2 = str2.translate(self.config.validcharsmap)
898 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1]
899 if invalidchars:
900 raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars)))
901 return True
902
904 """checks that file paths have not been translated"""
905 for word1 in self.filteraccelerators(str1).split():
906 if word1.startswith(u"/"):
907 if not helpers.countsmatch(str1, str2, (word1,)):
908 return False
909 return True
910
937
939 """checks to ensure that no KDE style comments appear in the translation"""
940 return str2.find(u"\n_:") == -1 and not str2.startswith(u"_:")
941
943 """checks for Gettext compendium conflicts (#-#-#-#-#)"""
944 return str2.find(u"#-#-#-#-#") == -1
945
947 """checks for English style plural(s) for you to review"""
948 def numberofpatterns(string, patterns):
949 number = 0
950 for pattern in patterns:
951 number += len(re.findall(pattern, string))
952 return number
953
954 sourcepatterns = ["\(s\)"]
955 targetpatterns = ["\(s\)"]
956 sourcecount = numberofpatterns(str1, sourcepatterns)
957 targetcount = numberofpatterns(str2, targetpatterns)
958 if self.config.lang.nplurals == 1:
959 return not targetcount
960 return sourcecount == targetcount
961
963 """checks words that don't pass a spell check"""
964 if not self.config.targetlanguage:
965 return True
966 if not spelling.available:
967 return True
968 # TODO: filterxml?
969 str1 = self.filteraccelerators_by_list(self.filtervariables(str1), self.config.sourcelang.validaccel)
970 str2 = self.filteraccelerators_by_list(self.filtervariables(str2), self.config.lang.validaccel)
971 ignore1 = []
972 messages = []
973 for word, index, suggestions in spelling.check(str1, lang="en"):
974 ignore1.append(word)
975 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage):
976 if word in self.config.notranslatewords:
977 continue
978 if word in ignore1:
979 continue
980 # hack to ignore hyphenisation rules
981 if word in suggestions:
982 continue
983 messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions[:5])))
984 if messages:
985 raise FilterFailure(messages)
986 return True
987
989 """checks for messages containing translation credits instead of normal translations."""
990 return not str1 in self.config.credit_sources
991
992 # If the precondition filter is run and fails then the other tests listed are ignored
993 preconditions = {"untranslated": ("simplecaps", "variables", "startcaps",
994 "accelerators", "brackets", "endpunc",
995 "acronyms", "xmltags", "startpunc",
996 "endwhitespace", "startwhitespace",
997 "escapes", "doublequoting", "singlequoting",
998 "filepaths", "purepunc", "doublespacing",
999 "sentencecount", "numbers", "isfuzzy",
1000 "isreview", "notranslatewords", "musttranslatewords",
1001 "emails", "simpleplurals", "urls", "printf",
1002 "tabs", "newlines", "functions", "options",
1003 "blank", "nplurals", "gconf"),
1004 "blank": ("simplecaps", "variables", "startcaps",
1005 "accelerators", "brackets", "endpunc",
1006 "acronyms", "xmltags", "startpunc",
1007 "endwhitespace", "startwhitespace",
1008 "escapes", "doublequoting", "singlequoting",
1009 "filepaths", "purepunc", "doublespacing",
1010 "sentencecount", "numbers", "isfuzzy",
1011 "isreview", "notranslatewords", "musttranslatewords",
1012 "emails", "simpleplurals", "urls", "printf",
1013 "tabs", "newlines", "functions", "options",
1014 "gconf"),
1015 "credits": ("simplecaps", "variables", "startcaps",
1016 "accelerators", "brackets", "endpunc",
1017 "acronyms", "xmltags", "startpunc",
1018 "escapes", "doublequoting", "singlequoting",
1019 "filepaths", "doublespacing",
1020 "sentencecount", "numbers",
1021 "emails", "simpleplurals", "urls", "printf",
1022 "tabs", "newlines", "functions", "options"),
1023 "purepunc": ("startcaps", "options"),
1024 # This is causing some problems since Python 2.6, as
1025 # startcaps is now seen as an important one to always execute
1026 # and could now be done before it is blocked by a failing
1027 # "untranslated" or "blank" test. This is probably happening
1028 # due to slightly different implementation of the internal
1029 # dict handling since Python 2.6. We should never have relied
1030 # on this ordering anyway.
1031 #"startcaps": ("simplecaps",),
1032 "endwhitespace": ("endpunc",),
1033 "startwhitespace":("startpunc",),
1034 "unchanged": ("doublewords",),
1035 "compendiumconflicts": ("accelerators", "brackets", "escapes",
1036 "numbers", "startpunc", "long", "variables",
1037 "startcaps", "sentencecount", "simplecaps",
1038 "doublespacing", "endpunc", "xmltags",
1039 "startwhitespace", "endwhitespace",
1040 "singlequoting", "doublequoting",
1041 "filepaths", "purepunc", "doublewords", "printf") }
1042
1043 # code to actually run the tests (use unittest?)
1044
1045 openofficeconfig = CheckerConfig(
1046 accelmarkers = ["~"],
1047 varmatches = [("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)],
1048 ignoretags = [("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), ("img", "width", None), ("img", "height", None)],
1049 canchangetags = [("link", "name", None)]
1050 )
1051
1054 checkerconfig = kwargs.get("checkerconfig", None)
1055 if checkerconfig is None:
1056 checkerconfig = CheckerConfig()
1057 kwargs["checkerconfig"] = checkerconfig
1058 checkerconfig.update(openofficeconfig)
1059 StandardChecker.__init__(self, **kwargs)
1060
1061 mozillaconfig = CheckerConfig(
1062 accelmarkers = ["&"],
1063 varmatches = [("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), ("#", 1), ("${", "}"), ("$(^", ")")],
1064 criticaltests = ["accelerators"]
1065 )
1066
1069 checkerconfig = kwargs.get("checkerconfig", None)
1070 if checkerconfig is None:
1071 checkerconfig = CheckerConfig()
1072 kwargs["checkerconfig"] = checkerconfig
1073 checkerconfig.update(mozillaconfig)
1074 StandardChecker.__init__(self, **kwargs)
1075
1082
1083 drupalconfig = CheckerConfig(
1084 varmatches = [("%", None), ("@", None), ("!", None)],
1085 )
1086
1089 checkerconfig = kwargs.get("checkerconfig", None)
1090 if checkerconfig is None:
1091 checkerconfig = CheckerConfig()
1092 kwargs["checkerconfig"] = checkerconfig
1093 checkerconfig.update(drupalconfig)
1094 StandardChecker.__init__(self, **kwargs)
1095
1096 gnomeconfig = CheckerConfig(
1097 accelmarkers = ["_"],
1098 varmatches = [("%", 1), ("$(", ")")],
1099 credit_sources = [u"translator-credits"]
1100 )
1101
1104 checkerconfig = kwargs.get("checkerconfig", None)
1105 if checkerconfig is None:
1106 checkerconfig = CheckerConfig()
1107 kwargs["checkerconfig"] = checkerconfig
1108 checkerconfig.update(gnomeconfig)
1109 StandardChecker.__init__(self, **kwargs)
1110
1112 """Checks if we have any gconf config settings translated."""
1113 for location in self.locations:
1114 if location.find('schemas.in') != -1:
1115 gconf_attributes = gconf_attribute_re.findall(str1)
1116 #stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2]
1117 stopwords = [word for word in gconf_attributes if word[1:-1] not in str2]
1118 if stopwords:
1119 raise FilterFailure(u"do not translate gconf attribute: %s" % (u", ".join(stopwords)))
1120 return True
1121
1122 kdeconfig = CheckerConfig(
1123 accelmarkers = ["&"],
1124 varmatches = [("%", 1)],
1125 credit_sources = [u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"]
1126 )
1127
1130 # TODO allow setup of KDE plural and translator comments so that they do
1131 # not create false postives
1132 checkerconfig = kwargs.get("checkerconfig", None)
1133 if checkerconfig is None:
1134 checkerconfig = CheckerConfig()
1135 kwargs["checkerconfig"] = checkerconfig
1136 checkerconfig.update(kdeconfig)
1137 StandardChecker.__init__(self, **kwargs)
1138
1139 cclicenseconfig = CheckerConfig(varmatches = [("@", "@")])
1142 checkerconfig = kwargs.get("checkerconfig", None)
1143 if checkerconfig is None:
1144 checkerconfig = CheckerConfig()
1145 kwargs["checkerconfig"] = checkerconfig
1146 checkerconfig.update(cclicenseconfig)
1147 StandardChecker.__init__(self, **kwargs)
1148
1149 projectcheckers = {
1150 "openoffice": OpenOfficeChecker,
1151 "mozilla": MozillaChecker,
1152 "kde": KdeChecker,
1153 "wx": KdeChecker,
1154 "gnome": GnomeChecker,
1155 "creativecommons": CCLicenseChecker,
1156 "drupal": DrupalChecker,
1157 }
1158
1159
1161 """The standard checks for common checks on translation units."""
1165
1169
1171 """Checks for the correct number of noun forms for plural translations."""
1172 if unit.hasplural():
1173 # if we don't have a valid nplurals value, don't run the test
1174 nplurals = self.config.lang.nplurals
1175 if nplurals > 0:
1176 return len(unit.target.strings) == nplurals
1177 return True
1178
1180 """Checks if there is at least one suggested translation for this unit."""
1181 self.suggestion_store = getattr(self, 'suggestion_store', None)
1182 suggestions = []
1183 if self.suggestion_store:
1184 suggestions = self.suggestion_store.findunits(unit.source)
1185 elif xliff and isinstance(unit, xliff.xliffunit):
1186 # TODO: we probably want to filter them somehow
1187 suggestions = unit.getalttrans()
1188 return not bool(suggestions)
1189
1190
1192 """verifies that the tests pass for a pair of strings"""
1193 from translate.storage import base
1194 str1 = data.normalized_unicode(str1)
1195 str2 = data.normalized_unicode(str2)
1196 unit = base.TranslationUnit(str1)
1197 unit.target = str2
1198 checker = StandardChecker(excludefilters=ignorelist)
1199 failures = checker.run_filters(unit)
1200 for test in failures:
1201 print "failure: %s: %s\n %r\n %r" % (test, failures[test], str1, str2)
1202 return failures
1203
1205 """runs test on a batch of string pairs"""
1206 passed, numpairs = 0, len(pairs)
1207 for str1, str2 in pairs:
1208 if runtests(str1, str2):
1209 passed += 1
1210 print
1211 print "total: %d/%d pairs passed" % (passed, numpairs)
1212
1213 if __name__ == '__main__':
1214 testset = [(r"simple", r"somple"),
1215 (r"\this equals \that", r"does \this equal \that?"),
1216 (r"this \'equals\' that", r"this 'equals' that"),
1217 (r" start and end! they must match.", r"start and end! they must match."),
1218 (r"check for matching %variables marked like %this", r"%this %variable is marked"),
1219 (r"check for mismatching %variables marked like %this", r"%that %variable is marked"),
1220 (r"check for mismatching %variables% too", r"how many %variable% are marked"),
1221 (r"%% %%", r"%%"),
1222 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"),
1223 (r"simple lowercase", r"it is all lowercase"),
1224 (r"simple lowercase", r"It Is All Lowercase"),
1225 (r"Simple First Letter Capitals", r"First Letters"),
1226 (r"SIMPLE CAPITALS", r"First Letters"),
1227 (r"SIMPLE CAPITALS", r"ALL CAPITALS"),
1228 (r"forgot to translate", r" ")
1229 ]
1230 batchruntests(testset)
1231
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Mon Feb 1 16:45:02 2010 | http://epydoc.sourceforge.net |