From 93619e456560db5891aedf078bb0528345376c5b Mon Sep 17 00:00:00 2001
From: David Sangrey <rixxan@hullseals.space>
Date: Thu, 16 Nov 2023 18:16:25 -0500
Subject: [PATCH] [2051] Fix Localized Strings

---
 plugins/edsm.py                   |   4 +-
 scripts/find_localised_strings.py | 237 ++++++++++++++----------------
 2 files changed, 114 insertions(+), 127 deletions(-)

diff --git a/plugins/edsm.py b/plugins/edsm.py
index 33af692b..4827dbf4 100644
--- a/plugins/edsm.py
+++ b/plugins/edsm.py
@@ -309,7 +309,7 @@ def plugin_prefs(parent: ttk.Notebook, cmdr: str | None, is_beta: bool) -> tk.Fr
     this.log = tk.IntVar(value=config.get_int('edsm_out') and 1)
     this.log_button = nb.Checkbutton(
         frame,
-        text=_('Send flight log and Cmdr status to EDSM'),
+        text=_('Send flight log and CMDR status to EDSM'),  # LANG: Send flight log and CMDR Status to EDSM
         variable=this.log,
         command=prefsvarchanged
     )
@@ -320,7 +320,7 @@ def plugin_prefs(parent: ttk.Notebook, cmdr: str | None, is_beta: bool) -> tk.Fr
 
     this.label = HyperlinkLabel(
         frame,
-        text=_('Elite Dangerous Star Map credentials'),
+        text=_('Elite Dangerous Star Map credentials'),  # LANG: Elite Dangerous Star Map credentials
         background=nb.Label().cget('background'),
         url='https://www.edsm.net/settings/api',
         underline=True
diff --git a/scripts/find_localised_strings.py b/scripts/find_localised_strings.py
index b9d8a8b5..1d1da826 100644
--- a/scripts/find_localised_strings.py
+++ b/scripts/find_localised_strings.py
@@ -30,7 +30,7 @@ def get_arg(call: ast.Call) -> str:
         return arg.value
     if isinstance(arg, ast.Name):
         return f'VARIABLE! CHECK CODE! {arg.id}'
-    return f'Unknown! {type(arg)=} {ast.dump(arg)} ||| {ast.unparse(arg)}'  # type: ignore
+    return f'Unknown! {type(arg)=} {ast.dump(arg)} ||| {ast.unparse(arg)}'
 
 
 def find_calls_in_stmt(statement: ast.AST) -> list[ast.Call]:
@@ -39,9 +39,7 @@ def find_calls_in_stmt(statement: ast.AST) -> list[ast.Call]:
     for n in ast.iter_child_nodes(statement):
         out.extend(find_calls_in_stmt(n))
     if isinstance(statement, ast.Call) and get_func_name(statement.func) == '_':
-
         out.append(statement)
-
     return out
 
 
@@ -58,7 +56,7 @@ COMMENT_SAME_LINE_RE = re.compile(r'^.*?(#.*)$')
 COMMENT_OWN_LINE_RE = re.compile(r'^\s*?(#.*)$')
 
 
-def extract_comments(call: ast.Call, lines: list[str], file: pathlib.Path) -> str | None:
+def extract_comments(call: ast.Call, lines: list[str], file: pathlib.Path) -> str | None:  # noqa: CCR001
     """
     Extract comments from source code based on the given call.
 
@@ -70,32 +68,48 @@ def extract_comments(call: ast.Call, lines: list[str], file: pathlib.Path) -> st
     :param file: The path to the file this call node came from
     :return: The first comment that matches the rules, or None
     """
-    above_line_number = call.lineno - 2
-    current_line_number = call.lineno - 1
+    out: str | None = None
+    above = call.lineno - 2
+    current = call.lineno - 1
 
-    def extract_lang_comment(line: str) -> str | None:
-        """
-        Extract a language comment from a given line.
+    above_line = lines[above].strip() if len(lines) >= above else None
+    above_comment: str | None = None
+    current_line = lines[current].strip()
+    current_comment: str | None = None
 
-        :param line: The line to extract the language comment from.
-        :return: The extracted language comment, or None if no valid comment is found.
-        """
-        match = COMMENT_OWN_LINE_RE.match(line)
-        if match and match.group(1).startswith('# LANG:'):
-            return match.group(1).replace('# LANG:', '').strip()
-        return None
+    bad_comment: str | None = None
+    if above_line is not None:
+        match = COMMENT_OWN_LINE_RE.match(above_line)
+        if match:
+            above_comment = match.group(1).strip()
+            if not above_comment.startswith('# LANG:'):
+                bad_comment = f'Unknown comment for {file}:{call.lineno} {above_line}'
+                above_comment = None
 
-    above_comment = extract_lang_comment(lines[above_line_number]) if len(lines) >= above_line_number else None
-    current_comment = extract_lang_comment(lines[current_line_number])
+            else:
+                above_comment = above_comment.replace('# LANG:', '').strip()
 
-    if current_comment is None:
-        current_comment = above_comment
+    if current_line is not None:
+        match = COMMENT_SAME_LINE_RE.match(current_line)
+        if match:
+            current_comment = match.group(1).strip()
+            if not current_comment.startswith('# LANG:'):
+                bad_comment = f'Unknown comment for {file}:{call.lineno} {current_line}'
+                current_comment = None
 
-    if current_comment is None:
-        print(f'No comment for {file}:{call.lineno} {lines[current_line_number]}', file=sys.stderr)
-        return None
+            else:
+                current_comment = current_comment.replace('# LANG:', '').strip()
 
-    return current_comment
+    if current_comment is not None:
+        out = current_comment
+    elif above_comment is not None:
+        out = above_comment
+    elif bad_comment is not None:
+        print(bad_comment, file=sys.stderr)
+
+    if out is None:
+        print(f'No comment for {file}:{call.lineno} {current_line}', file=sys.stderr)
+    return out
 
 
 def scan_file(path: pathlib.Path) -> list[ast.Call]:
@@ -103,19 +117,17 @@ def scan_file(path: pathlib.Path) -> list[ast.Call]:
     data = path.read_text(encoding='utf-8')
     lines = data.splitlines()
     parsed = ast.parse(data)
-    calls: list[ast.Call] = []
+    out: list[ast.Call] = []
 
     for statement in parsed.body:
-        calls.extend(find_calls_in_stmt(statement))
+        out.extend(find_calls_in_stmt(statement))
 
-    # Extract and assign comments to each call
-    for call in calls:
-        call.comment = extract_comments(call, lines, path)  # type: ignore
+    # see if we can extract any comments
+    for call in out:
+        setattr(call, 'comment', extract_comments(call, lines, path))
 
-    # Sort the calls by line number
-    calls.sort(key=lambda c: c.lineno)
-
-    return calls
+    out.sort(key=lambda c: c.lineno)
+    return out
 
 
 def scan_directory(path: pathlib.Path, skip: list[pathlib.Path] | None = None) -> dict[pathlib.Path, list[ast.Call]]:
@@ -125,38 +137,38 @@ def scan_directory(path: pathlib.Path, skip: list[pathlib.Path] | None = None) -
     :param path: path to scan
     :param skip: paths to skip, if any, defaults to None
     """
+    if skip is None:
+        skip = []
     out = {}
     for thing in path.iterdir():
-        if skip is not None and any(s.name == thing.name for s in skip):
+        if any(same_path.name == thing.name for same_path in skip):
             continue
-        if thing.is_file():
-            if not thing.name.endswith('.py'):
-                continue
+
+        if thing.is_file() and thing.suffix == '.py':
             out[thing] = scan_file(thing)
         elif thing.is_dir():
-            out |= scan_directory(thing)  # type: ignore
-        else:
-            raise ValueError(type(thing), thing)
+            out.update(scan_directory(thing, skip))
+
     return out
 
 
-def parse_template(path: pathlib.Path) -> set[str]:
+def parse_template(path) -> set[str]:
     """
     Parse a lang.template file.
 
-    The regular expression used here was extracted from l10n.py.
+    The regexp this uses was extracted from l10n.py.
 
     :param path: The path to the lang file
     """
     lang_re = re.compile(r'\s*"([^"]+)"\s*=\s*"([^"]+)"\s*;\s*$')
-    result = set()
+    out = set()
+    with open(path, encoding='utf-8') as file:
+        for line in file:
+            match = lang_re.match(line.strip())
+            if match and match.group(1) != '!Language':
+                out.add(match.group(1))
 
-    for line in pathlib.Path(path).read_text(encoding='utf-8').splitlines():
-        match = lang_re.match(line)
-        if match and match.group(1) != '!Language':
-            result.add(match.group(1))
-
-    return result
+    return out
 
 
 @dataclasses.dataclass
@@ -191,41 +203,38 @@ class LangEntry:
     def files(self) -> str:
         """Return a string representation of all the files this LangEntry is in, and its location therein."""
         file_locations = [
-            f'{loc.path.name}:{loc.line_start}' +
-            (f':{loc.line_end}' if loc.line_end is not None and loc.line_end != loc.line_start else '')
+            f"{loc.path.name}:{loc.line_start}:{loc.line_end or ''}"
             for loc in self.locations
         ]
-
-        return '; '.join(file_locations)
+        return "; ".join(file_locations)
 
 
 def dedupe_lang_entries(entries: list[LangEntry]) -> list[LangEntry]:
     """
     Deduplicate a list of lang entries.
 
-    This will coalesce LangEntries that have the same string but differing files and comments into a single
-    LangEntry that contains all comments and FileLocations.
+    This will coalesce LangEntries that have that same string but differing files and comments into a single
+    LangEntry that cotains all comments and FileLocations
 
     :param entries: The list to deduplicate
     :return: The deduplicated list
     """
     deduped: dict[str, LangEntry] = {}
-
     for e in entries:
         existing = deduped.get(e.string)
         if existing:
             existing.locations.extend(e.locations)
             existing.comments.extend(e.comments)
         else:
-            deduped[e.string] = LangEntry(locations=e.locations[:], string=e.string, comments=e.comments[:])
-
+            deduped[e.string] = LangEntry(
+                locations=e.locations[:], string=e.string, comments=e.comments[:]
+            )
     return list(deduped.values())
 
 
 def generate_lang_template(data: dict[pathlib.Path, list[ast.Call]]) -> str:
     """Generate a full en.template from the given data."""
     entries: list[LangEntry] = []
-
     for path, calls in data.items():
         for c in calls:
             entries.append(LangEntry([FileLocation.from_call(path, c)], get_arg(c), [getattr(c, 'comment')]))
@@ -236,72 +245,26 @@ def generate_lang_template(data: dict[pathlib.Path, list[ast.Call]]) -> str:
 
 '''
     print(f'Done Deduping entries {len(entries)=}  {len(deduped)=}', file=sys.stderr)
-
     for entry in deduped:
         assert len(entry.comments) == len(entry.locations)
-        comment_parts = []
+
+        comment_set = set()
+        for comment, loc in zip(entry.comments, entry.locations):
+            if comment:
+                comment_set.add(f'{loc.path.name}: {comment};')
+
+        files = 'In files: ' + entry.files()
+        comment = ' '.join(comment_set).strip()
+
+        header = f'{comment} {files}'.strip()
         string = f'"{entry.string}"'
-
-        for i, comment_text in enumerate(entry.comments):
-            if comment_text is None:
-                continue
-
-            loc = entry.locations[i]
-            comment_parts.append(f'{loc.path.name}: {comment_text};')
-
-        if comment_parts:
-            header = ' '.join(comment_parts)
-            out += f'/* {header} */\n'
-
+        out += f'/* {header} */\n'
         out += f'{string} = {string};\n\n'
 
     return out
 
 
-def compare_lang_with_template(template: set[str], res: dict[pathlib.Path, list[ast.Call]]) -> None:
-    """
-    Compare language entries in source code with a given language template.
-
-    :param template: A set of language entries from a language template.
-    :param res: A dictionary containing source code paths as keys and lists of ast.Call objects as values.
-    """
-    seen = set()
-
-    for file, calls in res.items():
-        for c in calls:
-            arg = get_arg(c)
-            if arg in template:
-                seen.add(arg)
-            else:
-                print(f'NEW! {file}:{c.lineno}: {arg!r}')
-
-    for old in set(template) ^ seen:
-        print(f'No longer used: {old}')
-
-
-def print_json_output(res: dict[pathlib.Path, list[ast.Call]]) -> None:
-    """
-    Print JSON output of extracted language entries.
-
-    :param res: A dictionary containing source code paths as keys and lists of ast.Call objects as values.
-    """
-    to_print_data = [
-        {
-            'path': str(path),
-            'string': get_arg(c),
-            'reconstructed': ast.unparse(c),  # type: ignore
-            'start_line': c.lineno,
-            'start_offset': c.col_offset,
-            'end_line': c.end_lineno,
-            'end_offset': c.end_col_offset,
-            'comment': getattr(c, 'comment', None)
-        } for (path, calls) in res.items() for c in calls
-    ]
-
-    print(json.dumps(to_print_data, indent=2))
-
-
-if __name__ == "__main__":
+if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--directory', help='Directory to search from', default='.')
     parser.add_argument('--ignore', action='append', help='directories to ignore', default=['venv', '.venv', '.git'])
@@ -313,32 +276,56 @@ if __name__ == "__main__":
     args = parser.parse_args()
 
     directory = pathlib.Path(args.directory)
-    skip = [pathlib.Path(p) for p in args.ignore]
-    res = scan_directory(directory, skip)
+    res = scan_directory(directory, [pathlib.Path(p) for p in args.ignore])
 
-    if args.compare_lang:
+    if args.compare_lang is not None and len(args.compare_lang) > 0:
+        seen = set()
         template = parse_template(args.compare_lang)
-        compare_lang_with_template(template, res)
+
+        for file, calls in res.items():
+            for c in calls:
+                arg = get_arg(c)
+                if arg in template:
+                    seen.add(arg)
+                else:
+                    print(f'NEW! {file}:{c.lineno}: {arg!r}')
+
+        for old in set(template) ^ seen:
+            print(f'No longer used: {old}')
 
     elif args.json:
-        print_json_output(res)
+        to_print_data = [
+            {
+                'path': str(path),
+                'string': get_arg(c),
+                'reconstructed': ast.unparse(c),
+                'start_line': c.lineno,
+                'start_offset': c.col_offset,
+                'end_line': c.end_lineno,
+                'end_offset': c.end_col_offset,
+                'comment': getattr(c, 'comment', None)
+            } for (path, calls) in res.items() for c in calls
+        ]
+
+        print(json.dumps(to_print_data, indent=2))
 
     elif args.lang:
         if args.lang == '-':
             print(generate_lang_template(res))
+
         else:
             with open(args.lang, mode='w+', newline='\n') as langfile:
                 langfile.writelines(generate_lang_template(res))
 
     else:
         for path, calls in res.items():
-            if not calls:
+            if len(calls) == 0:
                 continue
 
             print(path)
             for c in calls:
                 print(
-                    f'    {c.lineno:4d}({c.col_offset:3d}):{c.end_lineno:4d}('
-                    f'{c.end_col_offset:3d})\t', ast.unparse(c)  # type: ignore
+                    f'    {c.lineno:4d}({c.col_offset:3d}):{c.end_lineno:4d}({c.end_col_offset:3d})\t', ast.unparse(c)
                 )
+
             print()