update-dashboard-issue: enrich with breakdown per metrics (#143)

tldr-pages · Oct 7, 2024 · 1fa8d39 · 1fa8d39
1 parent 64e5a52
commit 1fa8d39
Show file tree

Hide file tree

Showing 3 changed files with 192 additions and 88 deletions.
diff --git a/scripts/_common.py b/scripts/_common.py
@@ -8,8 +8,10 @@
 from enum import Enum
 from pathlib import Path
 import os
+import re
 import json
 import subprocess
+import urllib.parse
 
 
 class Colors(str, Enum):
@@ -202,3 +204,46 @@ def update_github_issue(issue_number, title, body):
         )
 
     return result
+
+
+def replace_characters_for_link(page):
+    return str(
+        page.replace("[", "\\[")
+        .replace("]", "\\]")
+        .replace(")", "\\)")
+        .replace("(", "\\(")
+    )
+
+
+def generate_github_link(item):
+    def replace_reference(match):
+        page = match.group(0)
+
+        directory = Path(page).parent
+        filename = urllib.parse.quote(Path(page).name)
+
+        page = replace_characters_for_link(page)
+
+        return f"[{page}](https://github.com/tldr-pages/tldr/blob/main/{directory}/{filename})"
+
+    return re.sub(r"pages\..*\.md", replace_reference, item)
+
+
+def generate_github_edit_link(page):
+    directory = Path(page).parent
+    filename = urllib.parse.quote(Path(page).name)
+
+    page = replace_characters_for_link(page)
+
+    return (
+        f"[{page}](https://github.com/tldr-pages/tldr/edit/main/{directory}/{filename})"
+    )
+
+
+def generate_github_new_link(page):
+    directory = Path(page).parent
+    filename = urllib.parse.quote(Path(page).name)
+
+    page = replace_characters_for_link(page)
+
+    return f"[{page}](https://github.com/tldr-pages/tldr/new/main/{directory}?filename={filename})"
diff --git a/scripts/update-dashboard-issue.py b/scripts/update-dashboard-issue.py
@@ -4,23 +4,45 @@
 import os
 import re
 import sys
+
 from pathlib import Path
-from _common import get_github_issue, update_github_issue
+from enum import Enum
+from _common import (
+    get_github_issue,
+    update_github_issue,
+    generate_github_link,
+    generate_github_edit_link,
+    generate_github_new_link,
+)
+
+
+class Topics(str, Enum):
+    def __str__(self):
+        return str(
+            self.value
+        )  # make str(Topics.TOPIC) return the Topic instead of an Enum object
+
+    INCONSISTENT_FILENAMES = "inconsistent filename(s)"
+    MALFORMED_OR_OUTDATED_MORE_INFO_LINK_PAGES = (
+        "malformed or outdated more info link page(s)"
+    )
+    MISSING_ALIAS_PAGES = "missing alias page(s)"
+    MISMATCHED_PAGE_TITLES = "mismatched page title(s)"
+    MISSING_TLDR_PAGES = "missing TLDR page(s)"
+    MISPLACED_PAGES = "misplaced page(s)"
+    OUTDATED_PAGES_BASED_ON_COMMAND_COUNT = (
+        "outdated page(s) based on number of commands"
+    )
+    OUTDATED_PAGES_BASED_ON_COMMAND_CONTENTS = (
+        "outdated page(s) based on the commands itself"
+    )
+    MISSING_ENGLISH_PAGES = "missing English page(s)"
+    MISSING_TRANSLATED_PAGES = "missing translated page(s)"
+    LINT_ERRORS = "linter error(s)"
 
 
 def parse_log_file(path: Path) -> dict:
-    data = {"overview": {}, "details": {}}
-
-    def add_to_overview(pattern, key):
-        match = re.search(pattern, line)
-        if match:
-            data["overview"][key] = match.group(1).strip()
-
-    def add_to_details(pattern, key):
-        match = re.search(pattern, line)
-        if match and int(match.group(1)) > 0:
-            data["details"][current_language][key] = int(match.group(1))
-
+    data = {"overview": {}, "metrics": {}, "details": {}}
     overview_patterns = {
         "Total inconsistent filenames": r"Total inconsistent filename\(s\): (.+)",
         "Total malformed or outdated more info link pages": r"Total malformed or outdated more info link page\(s\): (.+)",
@@ -52,42 +74,128 @@ def add_to_details(pattern, key):
     with path.open(encoding="utf-8") as f:
         lines = f.readlines()
 
+    process_overview(lines, overview_patterns, data)
+    process_language_details(lines, detail_patterns, data)
+
+    return data
+
+
+def process_overview(lines, patterns, data):
     for line in lines:
-        for key, pattern in overview_patterns.items():
-            add_to_overview(pattern, key)
+        for key, pattern in patterns.items():
+            match = re.search(pattern, line)
+            if match:
+                data["overview"][key] = match.group(1).strip()
 
+
+def process_language_details(lines, patterns, data):
     current_language = None
     for line in lines:
-        if line.startswith(
-            "----------------------------------------------------------------------------------------------------"
-        ):
-            current_language = None
-        match = re.match(r"^\d+.+in check-pages\.(\w+)/", line)
-        if match:
-            current_language = match.group(1)
-            if current_language not in data["details"]:
-                data["details"][current_language] = {}
-
+        current_language = update_current_language(line, current_language, data)
         if current_language:
-            for key, pattern in detail_patterns.items():
-                add_to_details(pattern, key)
+            add_language_details(line, patterns, current_language, data)
+
+
+def update_current_language(line, current_language, data):
+    if line.startswith("-" * 100):
+        return None
+    match = re.match(r"^\d+.+in check-pages\.(\w+)/", line)
+    if match:
+        new_language = match.group(1)
+        if new_language not in data["details"]:
+            data["details"][new_language] = {}
+        return new_language
+    return current_language
 
+
+def add_language_details(line, patterns, current_language, data):
+    for key, pattern in patterns.items():
+        match = re.search(pattern, line)
+        if match and int(match.group(1)) > 0:
+            data["details"][current_language][key] = int(match.group(1))
+
+
+def parse_seperate_text_files(data):
+    for file in [
+        Path("inconsistent_filenames"),
+        Path("malformed-or-outdated-more-info-link-pages.txt"),
+        Path("missing-alias-pages.txt"),
+        Path("mismatched-page-titles.txt"),
+        Path("missing-tldr-pages.txt"),
+        Path("misplaced-pages.txt"),
+        Path("outdated-pages-based-on-command-count.txt"),
+        Path("outdated-pages-based-on-command-contents.txt"),
+        Path("missing-english-pages.txt"),
+        Path("missing-translated-pages.txt"),
+        Path("lint-errors.txt"),
+    ]:
+        if not file.is_file():
+            continue
+        topic_name = file.name.replace(".txt", "").replace("-", "_")
+        if hasattr(Topics, topic_name.upper()):
+            topic = getattr(Topics, topic_name.upper()).value
+            if topic:
+                with file.open(encoding="utf-8") as f:
+                    lines = f.readlines()
+                    add_metric_details(lines, data, topic_name, topic, file.name)
     return data
 
 
+def add_metric_details(lines, data, topic_name, topic, file_name):
+    data["metrics"][topic] = {
+        "count": len(lines),
+        "files": [],
+        "url": f"https://github.com/tldr-pages/tldr-maintenance/releases/download/latest/{file_name}",
+    }
+    if len(lines) <= 100:
+        match topic_name:
+            case "inconsistent_filenames":
+                data["metrics"][topic]["files"] = [f"{line.strip()}" for line in lines]
+            case "missing_alias_pages":
+                data["metrics"][topic]["files"] = [
+                    f"{generate_github_new_link(line.strip())}" for line in lines
+                ]
+            case "missing_tldr_pages":
+                data["metrics"][topic]["files"] = [
+                    f"{generate_github_link(line.strip())}" for line in lines
+                ]
+            case _:
+                data["metrics"][topic]["files"] = [
+                    f"{generate_github_edit_link(line.strip())}" for line in lines
+                ]
+
+
 def generate_dashboard(data):
-    markdown = "# Translation Dashboard Status\n\n## Overview\n"
-    overview = data["overview"]
+    DETAILS_OPENING = "<details>\n"
+    DETAILS_CLOSING = "\n</details>\n"
+    markdown = "# Translation Dashboard Status\n\n## Overview\n\n"
     markdown += "| Metric | Value |\n"
     markdown += "|--------|-------|\n"
 
-    for key, value in overview.items():
+    for key, value in data["overview"].items():
         markdown += f"| **{key}**  | {value} |\n"
 
-    markdown += "\n## Detailed Breakdown by Language\n"
+    markdown += "\n## Detailed Breakdown by Metric\n\n"
+
+    for key, metric in data["metrics"].items():
+        markdown += DETAILS_OPENING
+
+        markdown += f'<summary>{metric["count"]} {key}</summary>\n\n'
+
+        if not metric["files"]:
+            markdown += f"- More than 100 files, please view the [release artifact]({metric['url']}).\n"
+            markdown += DETAILS_CLOSING
+            continue
+
+        for file in metric["files"]:
+            markdown += f"- {file}\n"
+
+        markdown += DETAILS_CLOSING
+
+    markdown += "\n## Detailed Breakdown by Language\n\n"
 
     for lang, details in data["details"].items():
-        markdown += "<details>\n"
+        markdown += DETAILS_OPENING
         link_to_github_issue = get_github_issue(
             f"Translation Dashboard Status for {lang}"
         )
@@ -99,7 +207,7 @@ def generate_dashboard(data):
         for key, value in details.items():
             markdown += f"- {value} {key}\n"
 
-        markdown += "</details>\n"
+        markdown += DETAILS_CLOSING
 
     return markdown
 
@@ -124,6 +232,8 @@ def main():
             sys.exit(0)
 
         parsed_data = parse_log_file(log_file_path)
+        parsed_data = parse_seperate_text_files(parsed_data)
+
         markdown_content = generate_dashboard(parsed_data)
 
         result = update_github_issue(

diff --git a/scripts/update-language-issues.py b/scripts/update-language-issues.py
@@ -2,9 +2,7 @@
 # SPDX-License-Identifier: MIT
 
 import os
-import re
 import sys
-import urllib.parse
 
 from pathlib import Path
 from enum import Enum
@@ -15,14 +13,17 @@
     create_github_issue,
     get_github_issue,
     update_github_issue,
+    generate_github_link,
+    generate_github_edit_link,
+    generate_github_new_link,
 )
 
 
 class Topics(str, Enum):
     def __str__(self):
         return str(
             self.value
-        )  # make str(Colors.COLOR) return the ANSI code instead of an Enum object
+        )  # make str(Topics.TOPIC) return the Topic instead of an Enum object
 
     INCONSISTENT = "inconsistent filename(s)"
     MALFORMED_OR_OUTDATED_MORE_INFO_LINK = (
@@ -39,58 +40,6 @@ def __str__(self):
     LINT_ERRORS = "linter error(s)"
 
 
-def generate_github_link(item):
-    def replace_reference(match):
-        page = match.group(0)
-
-        directory = Path(page).parent
-        filename = Path(page).name
-
-        filename = urllib.parse.quote(filename)
-        page = str(
-            page.replace("[", "\\[")
-            .replace("]", "\\]")
-            .replace(")", "\\)")
-            .replace("(", "\\(")
-        )
-
-        return f"[{page}](https://github.com/tldr-pages/tldr/blob/main/{directory}/{filename})"
-
-    return re.sub(r"pages\..*\.md", replace_reference, item)
-
-
-def generate_github_edit_link(page):
-    directory = Path(page).parent
-    filename = Path(page).name
-
-    filename = urllib.parse.quote(filename)
-    page = str(
-        page.replace("[", "\\[")
-        .replace("]", "\\]")
-        .replace(")", "\\)")
-        .replace("(", "\\(")
-    )
-
-    return (
-        f"[{page}](https://github.com/tldr-pages/tldr/edit/main/{directory}/{filename})"
-    )
-
-
-def generate_github_new_link(page):
-    directory = Path(page).parent
-    filename = Path(page).name
-
-    filename = urllib.parse.quote(filename)
-    page = str(
-        page.replace("[", "\\[")
-        .replace("]", "\\]")
-        .replace(")", "\\)")
-        .replace("(", "\\(")
-    )
-
-    return f"[{page}](https://github.com/tldr-pages/tldr/new/main/{directory}?filename={filename})"
-
-
 def parse_file(filepath):
     with filepath.open(encoding="utf-8") as file:
         return file.read().strip().split("\n")