Merge pull request #62 from CodeReviewerAi/update-accuracy

Update accuracy
CodeReviewerAi · Jan 13, 2024 · 2540cdd · 2540cdd
2 parents 04bf25e + 3667ece
commit 2540cdd
Show file tree

Hide file tree

Showing 5 changed files with 85 additions and 4 deletions.
diff --git a/.github/workflows/accuracy_up_or_down_test.yml b/.github/workflows/accuracy_up_or_down_test.yml
@@ -0,0 +1,27 @@
+name: Test Accuracy Check
+
+on:
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  test_accuracy:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 0  # Fetch all history for all branches and tags
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.x'
+
+    - name: Install dependencies
+      run: |
+        pip install -r requirements.txt
+
+    - name: Run Accuracy Test
+      run: python test_accuracy_up_or_down.py
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # What is CodeReviewer?
 CodeReviewer is a tool that uses machine learning to help developers reviewing code. It is trained on a dataset of code from the [hlxsites](https://github.com/hlxsites) repositories and is able to predict how likely a given function is going to break in the future.
 
-## Current Accuracy: 73.28%
+## Current Accuracy: 73.19%
 
 ## How does it work?
 We save each functions first version(When it was first merged) and how often it was changed in the future. We then use this data to create embeddings for each function. We then use these embeddings to create a database using Qdrant. 

diff --git a/evaluate_performance.py b/evaluate_performance.py
@@ -32,7 +32,18 @@ def evaluate_model_accuracy(test_data_path, merge_threshold=-0.6):
 
     return accuracy, baseline_accuracy
 
+def update_readme_with_accuracy(accuracy):
+    with open('README.md', 'r') as file:
+        lines = file.readlines()
+
+    for i, line in enumerate(lines):
+        if '## Current Accuracy:' in line:
+            lines[i] = f'## Current Accuracy: {accuracy:.2f}%\n'
+
+    with open('README.md', 'w') as file:
+        file.writelines(lines)
+
 if __name__ == '__main__':
-    accuracy, baseline_accuracy = evaluate_model_accuracy('./dataForTesting/testing.json')
-    print(f"Model Accuracy: {accuracy * 100:.2f}%")
-    print(f"Baseline Accuracy: {baseline_accuracy * 100:.2f}%")
+    accuracy = evaluate_model_accuracy('./dataForTesting/testing.json')
+    print(f"Current Accuracy: {accuracy * 100:.2f}%")
+    update_readme_with_accuracy(accuracy * 100)
diff --git a/main.py b/main.py
@@ -37,6 +37,7 @@ def main(repos_info):
     accuracy, baseline_accuracy = evaluate_performance.evaluate_model_accuracy('./dataForTesting/testing.json')
     print(f"Model Accuracy: {accuracy * 100:.2f}%")
     print(f"Baseline Accuracy: {baseline_accuracy * 100:.2f}%")
+    evaluate_performance.update_readme_with_accuracy(accuracy * 100)
 
 if __name__ == '__main__':    
     start_time = time.time()

diff --git a/test_accuracy_up_or_down.py b/test_accuracy_up_or_down.py
@@ -0,0 +1,42 @@
+import subprocess
+import re
+import sys
+
+# ANSI escape codes for colors
+GREEN = '\033[92m'  # Green text
+RED = '\033[91m'    # Red text
+ENDC = '\033[0m'    # Reset color
+
+def get_last_commit_on_main():
+    return subprocess.check_output(['git', 'rev-parse', 'main']).decode().strip()
+
+def get_diff_of_readme(last_commit):
+    return subprocess.check_output(['git', 'diff', last_commit, 'README.md']).decode()
+
+def parse_accuracy_from_diff(diff):
+    pattern = r"## Current Accuracy: (\d+\.\d+)%"
+    accuracies = re.findall(pattern, diff)
+    return [float(acc) for acc in accuracies]
+
+def test_accuracy_increase():
+    try:
+        last_commit = get_last_commit_on_main()
+        diff = get_diff_of_readme(last_commit)
+        if '## Current Accuracy:' in diff:
+            old_accuracy, new_accuracy = parse_accuracy_from_diff(diff)
+            assert new_accuracy > old_accuracy, "Accuracy must be increased in PR"
+            return True
+        else:
+            print("No accuracy update in README.md 🤷‍♂️")
+            return False
+    except Exception as e:
+        print(f"Test failed due to an error: {e} ❌")
+        return False
+
+if __name__ == '__main__':
+    result = test_accuracy_increase()
+    if result:
+        print(GREEN + "Test passed: Accuracy has been increased! 🎉" + ENDC)
+    else:
+        print(RED + "Test failed: Accuracy has not been increased or no accuracy update was found. 😢" + ENDC)
+        sys.exit(1)  # Exit with a non-zero status code to indicate failure