huggingface · alielfilali01 · Dec 12, 2024 · Dec 13, 2024 · Dec 13, 2024 · Dec 13, 2024
diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py
@@ -343,16 +343,14 @@ def __init__(
             hf_subset=hf_subset,
             prompt_function=aratrust_pfn,
             hf_repo="asas-ai/AraTrust-categorized",
-            metric=[
-                Metrics.f1_score
-            ],  # Following the paper (AraTrust: An Evaluation of Trustworthiness for LLMs in Arabic)[https://arxiv.org/abs/2403.09017]
+            metric=[Metrics.loglikelihood_acc_norm],
             hf_avail_splits=["train"],
             evaluation_splits=["train"],
             few_shots_split=None,
             few_shots_select=None,
             suite=["community"],
             generation_size=-1,
-            stop_sequence=[],
+            stop_sequence=None,
             trust_dataset=True,
             version=0,
         )
@@ -414,12 +412,15 @@ def alghafa_pfn(line, task_name: str = None):
     question = line["query"]
     answer_index = int(line["label"])
     allowed_keys = [f"sol{i}" for i in range(1, 6)]
-    choices = [line[key] for key in allowed_keys if key in line]
+    extracted_choices = [line[key] for key in allowed_keys if key in line]
+    choices = [str(i) for i in range(len(extracted_choices))]
 
     instruction = "الأسئلة التالية هي أسئلة متعددة الإختيارات مع الجواب الصحيح\n\n"
     query = f"{instruction}السؤال: {question}\n"
-    for index, choice in enumerate(choices):
+
+    for index, choice in enumerate(extracted_choices):
         query += f"{index}) {choice}\n"
+
     query += "الإجابة:"
 
     return Doc(
@@ -772,7 +773,7 @@ def sciq_arabic_pfn(line, task_name: str = None):
 
 
 def madinah_qa_pfn(line, task_name: str = None):
-    instruction = "السؤال التالي هو سؤال متعدد الإختيارات. اختر الإجابة الصحيحة:\n\n"
+    instruction = "بناءً على السياق أدناه، اختر الإجابة الصحيحة للسؤال التالي من قائمة الأجوبة:\n\n"
 
     # Define the mapping from Latin to Arabic letters
     latin_to_arabic = {"A": "أ", "B": "ب", "C": "ج", "D": "د", "E": "هـ"}
@@ -793,14 +794,14 @@ def madinah_qa_pfn(line, task_name: str = None):
     # Find the correct index for the answer key in the Arabic version
     answer_index = valid_keys_latin.index(line["Answer Key"])
 
-    query = f"{instruction}{line['Question']}\n"
+    query = f"{instruction}\nالسياق:\n{line['Context']}\nالسؤال:\n{line['Question']}\n"
     query += "".join([f"{key}. {choice}\n" for key, choice in zip(valid_keys_arabic, choices)])
     query += "الإجابة:"
 
     return Doc(
         task_name=task_name,
         query=query,
-        choices=choices,
+        choices=valid_keys_arabic,
         gold_index=answer_index,  # Correct index in the valid keys
         instruction=instruction,
     )

diff --git a/examples/tasks/OALL_v2_tasks.txt b/examples/tasks/OALL_v2_tasks.txt
@@ -1,6 +1,5 @@
 community|alghafa:meta_ar_dialects|0|0
 community|alghafa:meta_ar_msa|0|0
-community|alghafa:mcq_exams_test_ar|0|0
 community|alghafa:multiple_choice_facts_truefalse_balanced_task|0|0
 community|alghafa:multiple_choice_grounded_statement_soqal_task|0|0
 community|alghafa:multiple_choice_grounded_statement_xglue_mlqa_task|0|0