Skip to content

Commit

Permalink
chore: some travelplanner eval fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
zhudotexe committed Dec 2, 2024
1 parent b32ec03 commit 5632993
Show file tree
Hide file tree
Showing 9 changed files with 905 additions and 189 deletions.
180 changes: 180 additions & 0 deletions experiments/travelplanner/cohere-hf/baseline/results_for_tp_eval.jsonl

Large diffs are not rendered by default.

180 changes: 180 additions & 0 deletions experiments/travelplanner/cohere-hf/full/results_for_tp_eval.jsonl

Large diffs are not rendered by default.

180 changes: 180 additions & 0 deletions experiments/travelplanner/cohere-hf/root-fc/results_for_tp_eval.jsonl

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

File renamed without changes.
182 changes: 0 additions & 182 deletions experiments/travelplanner/validation/id_to_idx.json

This file was deleted.

10 changes: 4 additions & 6 deletions score_travelplanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@

from redel.utils import read_jsonl

EXPECTED_RESULTS = {
"validation": 180,
"mistral": 180,
}
EXPECTED_RESULTS = 180
ID_TO_IDX_MAP = Path(__file__).parent / "experiments/travelplanner/id_to_idx.json"


async def transform_submission(fp: Path):
Expand All @@ -24,7 +22,7 @@ async def transform_submission(fp: Path):
split = fp.parents[1].name
transformed = []
try:
with open(fp.parents[1] / "id_to_idx.json") as f:
with open(ID_TO_IDX_MAP) as f:
id_to_idx = json.load(f)
except FileNotFoundError:
id_to_idx = {}
Expand All @@ -45,7 +43,7 @@ async def transform_submission(fp: Path):
transformed.append({"idx": idx, "query": query, "plan": plan})

# ensure there are the right number of results, and they're sorted by idx
missing_idxs = set(range(EXPECTED_RESULTS[split])).difference({t["idx"] for t in transformed})
missing_idxs = set(range(EXPECTED_RESULTS)).difference({t["idx"] for t in transformed})
if missing_idxs:
print(f"WARN: Submission is missing indices {missing_idxs}")
for idx in missing_idxs:
Expand Down
2 changes: 1 addition & 1 deletion slurm/gen_slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
# ModelConfig(model_class="openai", large="gpt-4o-2024-05-13", small="gpt-3.5-turbo-0125", size=0, extras=""),
ModelConfig(
model_class="mistral",
large="mistralai/Mistral-Large-Instruct-2407",
large="mistralai/Mistral-Large-Instruct-2411",
small="mistralai/Mistral-Small-Instruct-2409",
size=8,
extras="--engine-timeout 1800", # 30 min timeout per trial
Expand Down

0 comments on commit 5632993

Please sign in to comment.