[sdk/pology] /: [fr] Add a new sieve for french BFW translators
Johnny Jazeix
null at kde.org
Fri Oct 25 17:29:39 BST 2024
Git commit 33f3c459ac20f66403e6e8180ed6a1cb346bdcc0 by Johnny Jazeix, on behalf of Albéric Poinsard.
Committed on 25/10/2024 at 16:29.
Pushed by jjazeix into branch 'master'.
[fr] Add a new sieve for french BFW translators
Adding a useful sieve to make several automated correction.
Designed to be used on french BFW po files, but can be used on other places.
M +76 -0 doc/user/sieving.docbook
M +1 -0 lang/fr/sieve/CMakeLists.txt
A +240 -0 lang/fr/sieve/wesnoth.py
https://invent.kde.org/sdk/pology/-/commit/33f3c459ac20f66403e6e8180ed6a1cb346bdcc0
diff --git a/doc/user/sieving.docbook b/doc/user/sieving.docbook
index 8ffdeb9ee..1c6d407de 100644
--- a/doc/user/sieving.docbook
+++ b/doc/user/sieving.docbook
@@ -2583,6 +2583,82 @@ Note that percent characters in the <literal>plural-forms</literal> field are es
</sect2>
+<sect2 id="sv-fr:wesnoth">
+<title><command>fr:wesnoth</command></title>
+
+<para>A set of rules and corrections that can be automatically applied to the French translations of the Battle For Wesnoth.</para>
+
+<para>There are three levels of correction:
+<itemizedlist>
+
+<listitem>
+<para>level 1: basic correction</para>
+<para>Use unbreakable space before « » ; : ! ? % and remove double space.</para>
+</listitem>
+
+
+<listitem>
+<para>level 2: advanced correction (may break something)</para>
+<para>Replace ' by typographic apostrophe ’, and paid attention to pango/cairo balise and escaped <literal>\\'</literal>, like in <literal><italic>text='...'</italic></literal>
+It may need to be run several times to work correctly.</para>
+</listitem>
+
+<listitem>
+<para>level 3: very unstable correction (surely break something)</para>
+<para>Remove space before . and ,</para>
+</listitem>
+
+</itemizedlist>
+
+You can also use special filters, which make one specific task.
+
+</para>
+
+<para>Parameters:
+
+<variablelist>
+
+<varlistentry>
+<term><option>quiet</option></term>
+<listitem>
+<para>Don't show the summary at the end of the sieve. Useful for script usage.</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term><option>level</option></term>
+<listitem>
+<para>Set the level(s) of correction (1, 2 or 3). To use multiple levels, just put them together like <literal>level:12</literal> for levels 1 and 2</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term><option>extra_spaces</option></term>
+<listitem>
+<para>(Special filter) Replace extra spaces at start and end of a message by punctuation space. You must give the number of the message where you want to replace extra spaces, in a comma-separated list. For example, <literal>extra_spaces:12,45,789</literal> to replace in the messages 12, 45 and 789.</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term><option>ellipsis3points</option></term>
+<listitem>
+<para>(Special filter) Replace all Unicode ellipsis (…) by three dots (...).</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term><option>ellipsisUnicode</option></term>
+<listitem>
+<para>(Special filter) Replace all three dots (...) by Unicode ellipsis (…). May break something.</para>
+</listitem>
+</varlistentry>
+
+</variablelist>
+
+</para>
+
+</sect2>
+
<sect2 id="sv-ru:fill-doc-date-kde">
<title><command>ru:fill-doc-date-kde</command></title>
diff --git a/lang/fr/sieve/CMakeLists.txt b/lang/fr/sieve/CMakeLists.txt
index 9d4bb3c83..a55594b3a 100644
--- a/lang/fr/sieve/CMakeLists.txt
+++ b/lang/fr/sieve/CMakeLists.txt
@@ -1,6 +1,7 @@
set(sieves
setUbsp.py
setApostrophe.py
+ wesnoth.py
)
get_current_source_subdir(srcsubdir)
install(FILES ${sieves} DESTINATION ${DATA_INSTALL_DIR}/${srcsubdir})
diff --git a/lang/fr/sieve/wesnoth.py b/lang/fr/sieve/wesnoth.py
new file mode 100644
index 000000000..d89f6b90e
--- /dev/null
+++ b/lang/fr/sieve/wesnoth.py
@@ -0,0 +1,240 @@
+# -*- coding: UTF-8 -*-
+
+"""
+Auto correct translation according to the rules etablished by the French
+BFW traduction team.
+It includes unbreakable spaces, hyphen (…), apostrophe, extra spaces (option).
+
+inspired from other sieves
+ at author: alberic89 <alberic89 at gmx.com>
+ at license: GPLv3"""
+
+import re
+
+from typing import List
+from pology import _, n_
+from pology.report import report
+from pology.sieve import add_param_filter
+
+
+def setup_sieve(p):
+ p.set_desc(
+ _(
+ "@info sieve description",
+ "Correct message according to BFW french standard",
+ )
+ )
+
+ p.add_param(
+ "quiet",
+ bool,
+ defval=False,
+ desc=_(
+ "@info sieve parameter description",
+ "Do not show summary (for script usage)",
+ ),
+ )
+
+ p.add_param(
+ "level",
+ str,
+ defval="",
+ desc=_(
+ "@info sieve parameter description",
+ "Set level of correction (1, 2 and 3). You can use multiple levels, for example level:12",
+ ),
+ )
+
+ p.add_param(
+ "extra_spaces",
+ list,
+ defval=[],
+ desc=_(
+ "@info sieve parameter description",
+ "Replace all extra spaces by punctuation space on the message of the numero given. You can specify multiple messages with comma-separated list.",
+ ),
+ )
+
+ p.add_param(
+ "ellipsis3points",
+ bool,
+ defval=False,
+ desc=_(
+ "@info sieve parameter description",
+ "Replace all Unicode ellipsis (…) by three dots (...)",
+ ),
+ )
+
+ p.add_param(
+ "ellipsisUnicode",
+ bool,
+ defval=False,
+ desc=_(
+ "@info sieve parameter description",
+ "Replace all three dots (...) by Unicode ellipsis (…)",
+ ),
+ )
+
+
+class SpecialFilter:
+ """A special filter"""
+
+ def __init__(self, name, value, condition, action):
+ """value is a boolean to know if this filter must be used.
+ action should be a function with the msg object.
+ condition a function which takes msg in argument and returns boolean (if the filter is conditional)"""
+ self.name = name
+ self.value = value
+ self.condition = condition
+ self.action = action
+
+ def process(self, msg):
+ if self.condition(msg):
+ for i in range(len(msg.msgstr)):
+ msg.msgstr[i] = self.action(msg.msgstr[i])
+ return msg
+
+ def __eq__(self, y):
+ return y == self.value
+
+ def __repr__(self):
+ return f"{self.name} : {self.value} : {self.action}"
+
+
+def _replace_group(match, group, replacement):
+ if groupn := match.group(group):
+ return match.group().replace(groupn, replacement)
+ else:
+ return match.group()
+
+
+class Sieve(object):
+ """Correct translation according to BFW French standard"""
+
+ # apostrophe typographique "’" : \u2019
+ # espace insécable " " : \u00A0
+ # espace insécable fine " " : \u202F
+
+ def __init__(self, params):
+ self.nmatch = 0
+ self.p = params
+ self.level = params.level
+ nums = [0]
+ for _ in params.extra_spaces:
+ if _.isdigit():
+ nums.append(nums.pop() * 10 + int(_))
+ else:
+ nums.append(0)
+
+ self.spaces = nums
+ self.space_start = re.compile(r"^ +")
+ self.space_end = re.compile(r" +$")
+ regex_replacements_1 = (
+ (re.compile(r"(?<=\d)(\s+)(?=%(?=$| |\.|,))"), "\u00A0"), # %
+ (re.compile(r"\b(\s+)(?=:|»)"), "\u00A0"), # : »
+ (re.compile(r"(?<=«)(\s+)\b"), "\u00A0"), # «
+ (re.compile(r"\b(\s+)(?=;|!|\?)"), "\u202F"), # ; ! ?
+ (re.compile(r"\b( )\b"), " "), # double space
+ )
+ regex_replacements_2 = (
+ (
+ re.compile(r"(?<==')([^\\']*(\b\\'\b))*([^\\']*)(?=')"),
+ lambda m: _replace_group(m, 2, "\u2019"),
+ ),
+ (re.compile(r"\b(')(?=$|\b|\s[:;!?]|[.,])"), "\u2019"), # '
+ )
+
+ regex_replacements_3 = (
+ (re.compile(r"\b( )(?=\.|,)"), ""), # remove space before point and virgule
+ )
+
+ self.regex_replacements = {
+ "1": regex_replacements_1,
+ "2": regex_replacements_2,
+ "3": regex_replacements_3,
+ }
+
+ replacements_1 = ()
+ replacements_2 = ()
+ replacements_3 = ()
+
+ self.replacements = {
+ "1": replacements_1,
+ "2": replacements_2,
+ "3": replacements_3,
+ }
+
+ self.filters = (
+ SpecialFilter(
+ "extra_spaces",
+ params.extra_spaces,
+ lambda msg: msg.refentry in self.spaces,
+ self.replace_extra_spaces,
+ ),
+ SpecialFilter(
+ "ellipsis3points",
+ params.ellipsis3points,
+ lambda _: True,
+ lambda text: text.replace("\u2026", "..."),
+ ),
+ SpecialFilter(
+ "ellipsisUnicode",
+ params.ellipsisUnicode,
+ lambda _: True,
+ lambda text: text.replace("...", "\u2026"),
+ ),
+ ) # in future, add other specials filters
+ self.used_filters = [_ for _ in self.filters if _.value]
+
+ def process(self, msg, cat):
+ oldcount = msg.modcount
+
+ for nb in self.level:
+ for i in range(len(msg.msgstr)):
+ msg.msgstr[i] = self.correctTypo(
+ msg.msgstr[i],
+ self.replacements[nb],
+ self.regex_replacements[nb],
+ )
+
+ for _ in self.used_filters:
+ if _.value:
+ msg = _.process(msg)
+
+ if oldcount < msg.modcount:
+ self.nmatch += 1
+
+ def finalize(self):
+ if self.nmatch > 0 and not self.p.quiet:
+ report(
+ n_(
+ "@info",
+ "There was %(num)d corrected message.",
+ "There were %(num)d corrected messages.",
+ num=self.nmatch,
+ )
+ )
+
+ def correctTypo(self, text, replacements, regex_replacements):
+ """Set correct typo"""
+
+ for _ in replacements:
+ text = text.replace(_[0], _[1])
+ for _ in regex_replacements:
+ text = _[0].sub(_[1], text)
+
+ return text
+
+ def replace_extra_spaces(self, text):
+ """Replace space at start and end by punctuation space"""
+ # punctuation space " " : \u2008
+ match_start = re.search(self.space_start, text)
+ match_end = re.search(self.space_end, text)
+
+ if match_start:
+ text = re.sub(self.space_start, "\u2008" * len(match_start[0]), text)
+
+ if match_end:
+ text = re.sub(self.space_end, "\u2008" * len(match_end[0]), text)
+
+ return text
More information about the kde-doc-english
mailing list