[1469] Update Translation Scripting

Also runs through the Black linter for parsability sake.
2025-07-13 07:01:26 +03:00 · 2024-05-30 18:22:35 -04:00 · 2024-05-30 18:22:35 -04:00 · b6d4371d67
commit b6d4371d67
parent 3dd3652c59
2 changed files with 106 additions and 71 deletions
--- a/.github/workflows/push-checks.yml
+++ b/.github/workflows/push-checks.yml
@ -72,8 +72,4 @@ jobs:
    - name: translation checks
      run: |
-          output=$(python ./scripts/find_localised_strings.py --compare-lang L10n/en.template --directory . --ignore coriolis-data 2>&1)
+          python ./scripts/find_localised_strings.py --compare-lang L10n/en.template --directory . --ignore coriolis-data
          if [ -n "$output" ]; then
              echo $output
              exit 1
          fi
--- a/scripts/find_localised_strings.py
+++ b/scripts/find_localised_strings.py
@ -1,4 +1,5 @@
 """Search all given paths recursively for localised string calls."""
 from __future__ import annotations
 import argparse
@ -17,20 +18,20 @@ def get_func_name(thing: ast.AST) -> str:
    if isinstance(thing, ast.Attribute):
        return get_func_name(thing.value)
-    return ''
+    return ""
 def get_arg(call: ast.Call) -> str:
    """Extract the argument string to the translate function."""
    if len(call.args) > 1:
-        print('??? > 1 args', call.args, file=sys.stderr)
+        print("??? > 1 args", call.args, file=sys.stderr)
    arg = call.args[0]
    if isinstance(arg, ast.Constant):
        return arg.value
    if isinstance(arg, ast.Name):
-        return f'VARIABLE! CHECK CODE! {arg.id}'
+        return f"VARIABLE! CHECK CODE! {arg.id}"
-    return f'Unknown! {type(arg)=} {ast.dump(arg)} ||| {ast.unparse(arg)}'
+    return f"Unknown! {type(arg)=} {ast.dump(arg)} ||| {ast.unparse(arg)}"
 def find_calls_in_stmt(statement: ast.AST) -> list[ast.Call]:
@ -38,8 +39,14 @@ def find_calls_in_stmt(statement: ast.AST) -> list[ast.Call]:
    out = []
    for n in ast.iter_child_nodes(statement):
        out.extend(find_calls_in_stmt(n))
-    if isinstance(statement, ast.Call) and get_func_name(statement.func) in ('tr', 'translations'):
+    if isinstance(statement, ast.Call) and get_func_name(statement.func) in (
-        if ast.unparse(statement).find('.tl') != -1 or ast.unparse(statement).find('translate') != -1:
+        "tr",
        "translations",
    ):
        if (
            ast.unparse(statement).find(".tl") != -1
            or ast.unparse(statement).find("translate") != -1
        ):
            out.append(statement)
    return out
@ -53,11 +60,13 @@ COMMENT_OWN_LINE_RE is for a comment on its own line.
 The difference is necessary in order to tell if a 'above' LANG comment is for
 its own line (SAME_LINE), or meant to be for this following line (OWN_LINE).
 """
-COMMENT_SAME_LINE_RE = re.compile(r'^.*?(#.*)$')
+COMMENT_SAME_LINE_RE = re.compile(r"^.*?(#.*)$")
-COMMENT_OWN_LINE_RE = re.compile(r'^\s*?(#.*)$')
+COMMENT_OWN_LINE_RE = re.compile(r"^\s*?(#.*)$")
-def extract_comments(call: ast.Call, lines: list[str], file: pathlib.Path) -> str | None:  # noqa: CCR001
+def extract_comments(  # noqa: CCR001
    call: ast.Call, lines: list[str], file: pathlib.Path
 ) -> str | None:
    """
    Extract comments from source code based on the given call.
@ -83,23 +92,23 @@ def extract_comments(call: ast.Call, lines: list[str], file: pathlib.Path) -> st
        match = COMMENT_OWN_LINE_RE.match(above_line)
        if match:
            above_comment = match.group(1).strip()
-            if not above_comment.startswith('# LANG:'):
+            if not above_comment.startswith("# LANG:"):
-                bad_comment = f'Unknown comment for {file}:{call.lineno} {above_line}'
+                bad_comment = f"Unknown comment for {file}:{call.lineno} {above_line}"
                above_comment = None
            else:
-                above_comment = above_comment.replace('# LANG:', '').strip()
+                above_comment = above_comment.replace("# LANG:", "").strip()
    if current_line is not None:
        match = COMMENT_SAME_LINE_RE.match(current_line)
        if match:
            current_comment = match.group(1).strip()
-            if not current_comment.startswith('# LANG:'):
+            if not current_comment.startswith("# LANG:"):
-                bad_comment = f'Unknown comment for {file}:{call.lineno} {current_line}'
+                bad_comment = f"Unknown comment for {file}:{call.lineno} {current_line}"
                current_comment = None
            else:
-                current_comment = current_comment.replace('# LANG:', '').strip()
+                current_comment = current_comment.replace("# LANG:", "").strip()
    if current_comment is not None:
        out = current_comment
@ -109,13 +118,13 @@ def extract_comments(call: ast.Call, lines: list[str], file: pathlib.Path) -> st
        print(bad_comment, file=sys.stderr)
    if out is None:
-        print(f'No comment for {file}:{call.lineno} {current_line}', file=sys.stderr)
+        print(f"No comment for {file}:{call.lineno} {current_line}", file=sys.stderr)
    return out
 def scan_file(path: pathlib.Path) -> list[ast.Call]:
    """Scan a file for ast.Calls."""
-    data = path.read_text(encoding='utf-8')
+    data = path.read_text(encoding="utf-8")
    lines = data.splitlines()
    parsed = ast.parse(data)
    out: list[ast.Call] = []
@ -125,13 +134,15 @@ def scan_file(path: pathlib.Path) -> list[ast.Call]:
    # see if we can extract any comments
    for call in out:
-        setattr(call, 'comment', extract_comments(call, lines, path))
+        setattr(call, "comment", extract_comments(call, lines, path))
    out.sort(key=lambda c: c.lineno)
    return out
-def scan_directory(path: pathlib.Path, skip: list[pathlib.Path] | None = None) -> dict[pathlib.Path, list[ast.Call]]:
+def scan_directory(
    path: pathlib.Path, skip: list[pathlib.Path] | None = None
 ) -> dict[pathlib.Path, list[ast.Call]]:
    """
    Scan a directory for expected callsites.
@ -145,7 +156,7 @@ def scan_directory(path: pathlib.Path, skip: list[pathlib.Path] | None = None) -
        if any(same_path.name == thing.name for same_path in skip):
            continue
-        if thing.is_file() and thing.suffix == '.py':
+        if thing.is_file() and thing.suffix == ".py":
            out[thing] = scan_file(thing)
        elif thing.is_dir():
            out.update(scan_directory(thing, skip))
@ -163,10 +174,10 @@ def parse_template(path) -> set[str]:
    """
    lang_re = re.compile(r'\s*"([^"]+)"\s*=\s*"([^"]+)"\s*;\s*$')
    out = set()
-    with open(path, encoding='utf-8') as file:
+    with open(path, encoding="utf-8") as file:
        for line in file:
            match = lang_re.match(line.strip())
-            if match and match.group(1) != '!Language':
+            if match and match.group(1) != "!Language":
                out.add(match.group(1))
    return out
@ -183,14 +194,16 @@ class FileLocation:
    line_end_col: int | None
    @staticmethod
-    def from_call(path: pathlib.Path, c: ast.Call) -> 'FileLocation':
+    def from_call(path: pathlib.Path, c: ast.Call) -> "FileLocation":
        """
        Create a FileLocation from a Call and Path.
        :param path: Path to the file this FileLocation is in
        :param c: Call object to extract line information from
        """
-        return FileLocation(path, c.lineno, c.col_offset, c.end_lineno, c.end_col_offset)
+        return FileLocation(
            path, c.lineno, c.col_offset, c.end_lineno, c.end_col_offset
        )
@dataclasses.dataclass
@ -238,95 +251,121 @@ def generate_lang_template(data: dict[pathlib.Path, list[ast.Call]]) -> str:
    entries: list[LangEntry] = []
    for path, calls in data.items():
        for c in calls:
-            entries.append(LangEntry([FileLocation.from_call(path, c)], get_arg(c), [getattr(c, 'comment')]))
+            entries.append(
                LangEntry(
                    [FileLocation.from_call(path, c)],
                    get_arg(c),
                    [getattr(c, "comment")],
                )
            )
    deduped = dedupe_lang_entries(entries)
-    out = '''/* Language name */
+    out = """/* Language name */
 "!Language" = "English";
-'''
+"""
-    print(f'Done Deduping entries {len(entries)=}  {len(deduped)=}', file=sys.stderr)
+    print(f"Done Deduping entries {len(entries)=}  {len(deduped)=}", file=sys.stderr)
    for entry in deduped:
        assert len(entry.comments) == len(entry.locations)
        comment_set = set()
        for comment, loc in zip(entry.comments, entry.locations):
            if comment:
-                comment_set.add(f'{loc.path.name}: {comment};')
+                comment_set.add(f"{loc.path.name}: {comment};")
-        files = 'In files: ' + entry.files()
+        files = "In files: " + entry.files()
-        comment = ' '.join(comment_set).strip()
+        comment = " ".join(comment_set).strip()
-        header = f'{comment} {files}'.strip()
+        header = f"{comment} {files}".strip()
        string = f'"{entry.string}"'
-        out += f'/* {header} */\n'
+        out += f"/* {header} */\n"
-        out += f'{string} = {string};\n\n'
+        out += f"{string} = {string};\n\n"
    return out
-if __name__ == '__main__':
+def main():  # noqa: CCR001
    """Run the Translation Checker."""
    parser = argparse.ArgumentParser()
-    parser.add_argument('--directory', help='Directory to search from', default='.')
+    parser.add_argument("--directory", help="Directory to search from", default=".")
-    parser.add_argument('--ignore', action='append', help='directories to ignore', default=['venv', '.venv', '.git'])
+    parser.add_argument(
        "--ignore",
        action="append",
        help="Directories to ignore",
        default=["venv", ".venv", ".git"],
    )
    group = parser.add_mutually_exclusive_group()
-    group.add_argument('--json', action='store_true', help='JSON output')
+    group.add_argument("--json", action="store_true", help="JSON output")
-    group.add_argument('--lang', help='en.template "strings" output to specified file, "-" for stdout')
+    group.add_argument(
-    group.add_argument('--compare-lang', help='en.template file to compare against')
+        "--lang", help='en.template "strings" output to specified file, "-" for stdout'
    )
    group.add_argument("--compare-lang", help="en.template file to compare against")
    args = parser.parse_args()
    directory = pathlib.Path(args.directory)
    res = scan_directory(directory, [pathlib.Path(p) for p in args.ignore])
-    if args.compare_lang is not None and len(args.compare_lang) > 0:
+    output = []
    if args.compare_lang:
        seen = set()
        template = parse_template(args.compare_lang)
        for file, calls in res.items():
            for c in calls:
                arg = get_arg(c)
                if arg in template:
                    seen.add(arg)
                else:
-                    print(f'NEW! {file}:{c.lineno}: {arg!r}')
+                    output.append(f"NEW! {file}:{c.lineno}: {arg!r}")
        for old in set(template) ^ seen:
-            print(f'No longer used: {old!r}')
+            output.append(f"No longer used: {old!r}")
    elif args.json:
        to_print_data = [
            {
-                'path': str(path),
+                "path": str(path),
-                'string': get_arg(c),
+                "string": get_arg(c),
-                'reconstructed': ast.unparse(c),
+                "reconstructed": ast.unparse(c),
-                'start_line': c.lineno,
+                "start_line": c.lineno,
-                'start_offset': c.col_offset,
+                "start_offset": c.col_offset,
-                'end_line': c.end_lineno,
+                "end_line": c.end_lineno,
-                'end_offset': c.end_col_offset,
+                "end_offset": c.end_col_offset,
-                'comment': getattr(c, 'comment', None)
+                "comment": getattr(c, "comment", None),
-            } for (path, calls) in res.items() for c in calls
+            }
            for path, calls in res.items()
            for c in calls
        ]
-
+        output.append(json.dumps(to_print_data, indent=2))
        print(json.dumps(to_print_data, indent=2))
    elif args.lang:
-        if args.lang == '-':
+        lang_template = generate_lang_template(res)
-            print(generate_lang_template(res))
+        if args.lang == "-":
-
+            output.append(lang_template)
        else:
-            with open(args.lang, mode='w+', newline='\n') as langfile:
+            with open(args.lang, mode="w+", newline="\n", encoding="UTF-8") as langfile:
-                langfile.writelines(generate_lang_template(res))
+                langfile.writelines(lang_template)
    else:
        for path, calls in res.items():
-            if len(calls) == 0:
+            if not calls:
                continue
-
+            output.append(str(path))
            print(path)
            for c in calls:
-                print(
+                output.append(
-                    f'    {c.lineno:4d}({c.col_offset:3d}):{c.end_lineno:4d}({c.end_col_offset:3d})\t', ast.unparse(c)
+                    f"    {c.lineno:4d}({c.col_offset:3d}):{c.end_lineno:4d}({c.end_col_offset:3d})\t{ast.unparse(c)}"
                )
            output.append("")
-            print()
+    # Print all collected output at the end
    if output:
        print("\n".join(output))
        sys.exit(1)
 if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        sys.exit()