[sdk/pology] /: [fr] Add a new sieve for french BFW translators

Fri Oct 25 17:29:39 BST 2024

Git commit 33f3c459ac20f66403e6e8180ed6a1cb346bdcc0 by Johnny Jazeix, on behalf of Albéric Poinsard.
Committed on 25/10/2024 at 16:29.
Pushed by jjazeix into branch 'master'.

[fr] Add a new sieve for french BFW translators

Adding a useful sieve to make several automated correction.
Designed to be used on french BFW po files, but can be used on other places.

M  +76   -0    doc/user/sieving.docbook
M  +1    -0    lang/fr/sieve/CMakeLists.txt
A  +240  -0    lang/fr/sieve/wesnoth.py

https://invent.kde.org/sdk/pology/-/commit/33f3c459ac20f66403e6e8180ed6a1cb346bdcc0

diff --git a/doc/user/sieving.docbook b/doc/user/sieving.docbook
index 8ffdeb9ee..1c6d407de 100644
--- a/doc/user/sieving.docbook
+++ b/doc/user/sieving.docbook
@@ -2583,6 +2583,82 @@ Note that percent characters in the <literal>plural-forms</literal> field are es
 
 </sect2>
 
+<sect2 id="sv-fr:wesnoth">
+<title><command>fr:wesnoth</command></title>
+
+<para>A set of rules and corrections that can be automatically applied to the French translations of the Battle For Wesnoth.</para>
+
+<para>There are three levels of correction:
+<itemizedlist>
+
+<listitem>
+<para>level 1: basic correction</para>
+<para>Use unbreakable space before « » ; : ! ? % and remove double space.</para>
+</listitem>
+
+
+<listitem>
+<para>level 2: advanced correction (may break something)</para>
+<para>Replace ' by typographic apostrophe ’, and paid attention to pango/cairo balise and escaped <literal>\\'</literal>, like in <literal><italic>text='...'</italic></literal>
+It may need to be run several times to work correctly.</para>
+</listitem>
+
+<listitem>
+<para>level 3: very unstable correction (surely break something)</para>
+<para>Remove space before . and ,</para>
+</listitem>
+
+</itemizedlist>
+
+You can also use special filters, which make one specific task.
+
+</para>
+
+<para>Parameters:
+
+<variablelist>
+
+<varlistentry>
+<term><option>quiet</option></term>
+<listitem>
+<para>Don't show the summary at the end of the sieve. Useful for script usage.</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term><option>level</option></term>
+<listitem>
+<para>Set the level(s) of correction (1, 2 or 3). To use multiple levels, just put them together like <literal>level:12</literal> for levels 1 and 2</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term><option>extra_spaces</option></term>
+<listitem>
+<para>(Special filter) Replace extra spaces at start and end of a message by punctuation space. You must give the number of the message where you want to replace extra spaces, in a comma-separated list. For example, <literal>extra_spaces:12,45,789</literal> to replace in the messages 12, 45 and 789.</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term><option>ellipsis3points</option></term>
+<listitem>
+<para>(Special filter) Replace all Unicode ellipsis (…) by three dots (...).</para>
+</listitem>
+</varlistentry>
+
+<varlistentry>
+<term><option>ellipsisUnicode</option></term>
+<listitem>
+<para>(Special filter) Replace all three dots (...) by Unicode ellipsis (…). May break something.</para>
+</listitem>
+</varlistentry>
+
+</variablelist>
+
+</para>
+
+</sect2>
+
 <sect2 id="sv-ru:fill-doc-date-kde">
 <title><command>ru:fill-doc-date-kde</command></title>
 
diff --git a/lang/fr/sieve/CMakeLists.txt b/lang/fr/sieve/CMakeLists.txt
index 9d4bb3c83..a55594b3a 100644
--- a/lang/fr/sieve/CMakeLists.txt
+++ b/lang/fr/sieve/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(sieves
     setUbsp.py
     setApostrophe.py
+    wesnoth.py
 )
 get_current_source_subdir(srcsubdir)
 install(FILES ${sieves} DESTINATION ${DATA_INSTALL_DIR}/${srcsubdir})
diff --git a/lang/fr/sieve/wesnoth.py b/lang/fr/sieve/wesnoth.py
new file mode 100644
index 000000000..d89f6b90e
--- /dev/null
+++ b/lang/fr/sieve/wesnoth.py
@@ -0,0 +1,240 @@
+# -*- coding: UTF-8 -*-
+
+"""
+Auto correct translation according to the rules etablished by the French
+BFW traduction team.
+It includes unbreakable spaces, hyphen (…), apostrophe, extra spaces (option).
+
+inspired from other sieves
+ at author: alberic89 <alberic89 at gmx.com>
+ at license: GPLv3"""
+
+import re
+
+from typing import List
+from pology import _, n_
+from pology.report import report
+from pology.sieve import add_param_filter
+
+
+def setup_sieve(p):
+    p.set_desc(
+        _(
+            "@info sieve description",
+            "Correct message according to BFW french standard",
+        )
+    )
+
+    p.add_param(
+        "quiet",
+        bool,
+        defval=False,
+        desc=_(
+            "@info sieve parameter description",
+            "Do not show summary (for script usage)",
+        ),
+    )
+
+    p.add_param(
+        "level",
+        str,
+        defval="",
+        desc=_(
+            "@info sieve parameter description",
+            "Set level of correction (1, 2 and 3). You can use multiple levels, for example level:12",
+        ),
+    )
+
+    p.add_param(
+        "extra_spaces",
+        list,
+        defval=[],
+        desc=_(
+            "@info sieve parameter description",
+            "Replace all extra spaces by punctuation space on the message of the numero given. You can specify multiple messages with comma-separated list.",
+        ),
+    )
+
+    p.add_param(
+        "ellipsis3points",
+        bool,
+        defval=False,
+        desc=_(
+            "@info sieve parameter description",
+            "Replace all Unicode ellipsis (…) by three dots (...)",
+        ),
+    )
+
+    p.add_param(
+        "ellipsisUnicode",
+        bool,
+        defval=False,
+        desc=_(
+            "@info sieve parameter description",
+            "Replace all three dots (...) by Unicode ellipsis (…)",
+        ),
+    )
+
+
+class SpecialFilter:
+    """A special filter"""
+
+    def __init__(self, name, value, condition, action):
+        """value is a boolean to know if this filter must be used.
+        action should be a function with the msg object.
+        condition a function which takes msg in argument and returns boolean (if the filter is conditional)"""
+        self.name = name
+        self.value = value
+        self.condition = condition
+        self.action = action
+
+    def process(self, msg):
+        if self.condition(msg):
+            for i in range(len(msg.msgstr)):
+                msg.msgstr[i] = self.action(msg.msgstr[i])
+        return msg
+
+    def __eq__(self, y):
+        return y == self.value
+
+    def __repr__(self):
+        return f"{self.name} : {self.value} : {self.action}"
+
+
+def _replace_group(match, group, replacement):
+    if groupn := match.group(group):
+        return match.group().replace(groupn, replacement)
+    else:
+        return match.group()
+
+
+class Sieve(object):
+    """Correct translation according to BFW French standard"""
+
+    # apostrophe typographique "’" : \u2019
+    # espace insécable " " : \u00A0
+    # espace insécable fine " " : \u202F
+
+    def __init__(self, params):
+        self.nmatch = 0
+        self.p = params
+        self.level = params.level
+        nums = [0]
+        for _ in params.extra_spaces:
+            if _.isdigit():
+                nums.append(nums.pop() * 10 + int(_))
+            else:
+                nums.append(0)
+
+        self.spaces = nums
+        self.space_start = re.compile(r"^ +")
+        self.space_end = re.compile(r" +$")
+        regex_replacements_1 = (
+            (re.compile(r"(?<=\d)(\s+)(?=%(?=$| |\.|,))"), "\u00A0"),  # %
+            (re.compile(r"\b(\s+)(?=:|»)"), "\u00A0"),  # : »
+            (re.compile(r"(?<=«)(\s+)\b"), "\u00A0"),  # «
+            (re.compile(r"\b(\s+)(?=;|!|\?)"), "\u202F"),  # ; ! ?
+            (re.compile(r"\b(  )\b"), " "),  # double space
+        )
+        regex_replacements_2 = (
+            (
+                re.compile(r"(?<==')([^\\']*(\b\\'\b))*([^\\']*)(?=')"),
+                lambda m: _replace_group(m, 2, "\u2019"),
+            ),
+            (re.compile(r"\b(')(?=$|\b|\s[:;!?]|[.,])"), "\u2019"),  # '
+        )
+
+        regex_replacements_3 = (
+            (re.compile(r"\b( )(?=\.|,)"), ""),  # remove space before point and virgule
+        )
+
+        self.regex_replacements = {
+            "1": regex_replacements_1,
+            "2": regex_replacements_2,
+            "3": regex_replacements_3,
+        }
+
+        replacements_1 = ()
+        replacements_2 = ()
+        replacements_3 = ()
+
+        self.replacements = {
+            "1": replacements_1,
+            "2": replacements_2,
+            "3": replacements_3,
+        }
+
+        self.filters = (
+            SpecialFilter(
+                "extra_spaces",
+                params.extra_spaces,
+                lambda msg: msg.refentry in self.spaces,
+                self.replace_extra_spaces,
+            ),
+            SpecialFilter(
+                "ellipsis3points",
+                params.ellipsis3points,
+                lambda _: True,
+                lambda text: text.replace("\u2026", "..."),
+            ),
+            SpecialFilter(
+                "ellipsisUnicode",
+                params.ellipsisUnicode,
+                lambda _: True,
+                lambda text: text.replace("...", "\u2026"),
+            ),
+        )  # in future, add other specials filters
+        self.used_filters = [_ for _ in self.filters if _.value]
+
+    def process(self, msg, cat):
+        oldcount = msg.modcount
+
+        for nb in self.level:
+            for i in range(len(msg.msgstr)):
+                msg.msgstr[i] = self.correctTypo(
+                    msg.msgstr[i],
+                    self.replacements[nb],
+                    self.regex_replacements[nb],
+                )
+
+        for _ in self.used_filters:
+            if _.value:
+                msg = _.process(msg)
+
+        if oldcount < msg.modcount:
+            self.nmatch += 1
+
+    def finalize(self):
+        if self.nmatch > 0 and not self.p.quiet:
+            report(
+                n_(
+                    "@info",
+                    "There was %(num)d corrected message.",
+                    "There were %(num)d corrected messages.",
+                    num=self.nmatch,
+                )
+            )
+
+    def correctTypo(self, text, replacements, regex_replacements):
+        """Set correct typo"""
+
+        for _ in replacements:
+            text = text.replace(_[0], _[1])
+        for _ in regex_replacements:
+            text = _[0].sub(_[1], text)
+
+        return text
+
+    def replace_extra_spaces(self, text):
+        """Replace space at start and end by punctuation space"""
+        # punctuation space " " : \u2008
+        match_start = re.search(self.space_start, text)
+        match_end = re.search(self.space_end, text)
+
+        if match_start:
+            text = re.sub(self.space_start, "\u2008" * len(match_start[0]), text)
+
+        if match_end:
+            text = re.sub(self.space_end, "\u2008" * len(match_end[0]), text)
+
+        return text