Skip to content

Commit

Permalink
chore: more webarena slurm
Browse files Browse the repository at this point in the history
  • Loading branch information
zhudotexe committed Dec 10, 2024
1 parent a14d615 commit 521651e
Show file tree
Hide file tree
Showing 34 changed files with 45 additions and 37 deletions.
2 changes: 1 addition & 1 deletion slurm/claude/fanoutqa-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ python bench_fanoutqa.py --config small-leaf --model-class claude --large-model
python bench_fanoutqa.py --config small-all --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/small-all
python bench_fanoutqa.py --config small-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/small-baseline
python bench_fanoutqa.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/short-context
python bench_fanoutqa.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/short-baseline
python bench_fanoutqa.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/claude/short-baseline
2 changes: 1 addition & 1 deletion slurm/claude/travelplanner-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ python bench_travelplanner.py --config small-leaf --model-class claude --large-m
python bench_travelplanner.py --config small-all --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/small-all
python bench_travelplanner.py --config small-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/small-baseline
python bench_travelplanner.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/short-context
python bench_travelplanner.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/short-baseline
python bench_travelplanner.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/claude/short-baseline
2 changes: 1 addition & 1 deletion slurm/claude/webarena-1-full.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
2 changes: 1 addition & 1 deletion slurm/claude/webarena-2-root-fc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
2 changes: 1 addition & 1 deletion slurm/claude/webarena-3-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
2 changes: 1 addition & 1 deletion slurm/claude/webarena-4-small-leaf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
2 changes: 1 addition & 1 deletion slurm/claude/webarena-5-small-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
2 changes: 1 addition & 1 deletion slurm/claude/webarena-6-small-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
2 changes: 1 addition & 1 deletion slurm/claude/webarena-7-short-context.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
2 changes: 1 addition & 1 deletion slurm/claude/webarena-8-short-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
5 changes: 3 additions & 2 deletions slurm/claude/webarena-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down Expand Up @@ -45,4 +45,5 @@ sleep 600
python bench_webarena.py --config short-context --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/short-context
curl -X GET ${RESTART_URL}
sleep 600
python bench_webarena.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/short-baselinekill $DOCKER_PID
python bench_webarena.py --config short-baseline --model-class claude --large-model claude-3-5-sonnet-20241022 --small-model claude-3-5-haiku-20241022 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/claude/short-baseline
kill $DOCKER_PID
2 changes: 1 addition & 1 deletion slurm/cohere-hf/fanoutqa-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ python bench_fanoutqa.py --config small-leaf --model-class cohere-hf --large-mod
python bench_fanoutqa.py --config small-all --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/cohere-hf/small-all --engine-timeout 1800
python bench_fanoutqa.py --config small-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/cohere-hf/small-baseline --engine-timeout 1800
python bench_fanoutqa.py --config short-context --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/cohere-hf/short-context --engine-timeout 1800
python bench_fanoutqa.py --config short-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/cohere-hf/short-baseline --engine-timeout 1800
python bench_fanoutqa.py --config short-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/cohere-hf/short-baseline --engine-timeout 1800
2 changes: 1 addition & 1 deletion slurm/cohere-hf/travelplanner-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ python bench_travelplanner.py --config small-leaf --model-class cohere-hf --larg
python bench_travelplanner.py --config small-all --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/cohere-hf/small-all --engine-timeout 1800
python bench_travelplanner.py --config small-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/cohere-hf/small-baseline --engine-timeout 1800
python bench_travelplanner.py --config short-context --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/cohere-hf/short-context --engine-timeout 1800
python bench_travelplanner.py --config short-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/cohere-hf/short-baseline --engine-timeout 1800
python bench_travelplanner.py --config short-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/cohere-hf/short-baseline --engine-timeout 1800
3 changes: 2 additions & 1 deletion slurm/cohere-hf/webarena-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,5 @@ sleep 600
python bench_webarena.py --config short-context --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/short-context --engine-timeout 1800
curl -X GET ${RESTART_URL}
sleep 600
python bench_webarena.py --config short-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/short-baseline --engine-timeout 1800kill $DOCKER_PID
python bench_webarena.py --config short-baseline --model-class cohere-hf --large-model CohereForAI/c4ai-command-r-plus-08-2024 --small-model CohereForAI/c4ai-command-r-08-2024 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/cohere-hf/short-baseline --engine-timeout 1800
kill $DOCKER_PID
3 changes: 3 additions & 0 deletions slurm/gen_slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ def main():
"sleep 600"
)
footer = "kill $DOCKER_PID"
if int(mem[:-1]) < 256:
mem = "256G"
else:
bench_startup = ""
bench_extras = ""
Expand Down Expand Up @@ -147,6 +149,7 @@ def main():
f.write(header)
f.write("\n")
f.write("\n".join(all_commands))
f.write("\n")
f.write(footer)


Expand Down
2 changes: 1 addition & 1 deletion slurm/mistral/fanoutqa-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ python bench_fanoutqa.py --config small-leaf --model-class mistral --large-model
python bench_fanoutqa.py --config small-all --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/mistral/small-all --engine-timeout 1800
python bench_fanoutqa.py --config small-baseline --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/mistral/small-baseline --engine-timeout 1800
python bench_fanoutqa.py --config short-context --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/mistral/short-context --engine-timeout 1800
python bench_fanoutqa.py --config short-baseline --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/mistral/short-baseline --engine-timeout 1800
python bench_fanoutqa.py --config short-baseline --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/mistral/short-baseline --engine-timeout 1800
2 changes: 1 addition & 1 deletion slurm/mistral/travelplanner-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ python bench_travelplanner.py --config small-leaf --model-class mistral --large-
python bench_travelplanner.py --config small-all --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/mistral/small-all --engine-timeout 1800
python bench_travelplanner.py --config small-baseline --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/mistral/small-baseline --engine-timeout 1800
python bench_travelplanner.py --config short-context --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/mistral/short-context --engine-timeout 1800
python bench_travelplanner.py --config short-baseline --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/mistral/short-baseline --engine-timeout 1800
python bench_travelplanner.py --config short-baseline --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/mistral/short-baseline --engine-timeout 1800
3 changes: 2 additions & 1 deletion slurm/mistral/webarena-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,5 @@ sleep 600
python bench_webarena.py --config short-context --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/short-context --engine-timeout 1800
curl -X GET ${RESTART_URL}
sleep 600
python bench_webarena.py --config short-baseline --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/short-baseline --engine-timeout 1800kill $DOCKER_PID
python bench_webarena.py --config short-baseline --model-class mistral --large-model mistralai/Mistral-Large-Instruct-2407 --small-model mistralai/Mistral-Small-Instruct-2409 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/mistral/short-baseline --engine-timeout 1800
kill $DOCKER_PID
2 changes: 1 addition & 1 deletion slurm/openai/fanoutqa-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ python bench_fanoutqa.py --config small-leaf --model-class openai --large-model
python bench_fanoutqa.py --config small-all --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/openai/small-all
python bench_fanoutqa.py --config small-baseline --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/openai/small-baseline
python bench_fanoutqa.py --config short-context --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/openai/short-context
python bench_fanoutqa.py --config short-baseline --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/openai/short-baseline
python bench_fanoutqa.py --config short-baseline --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/openai/short-baseline
2 changes: 1 addition & 1 deletion slurm/openai/travelplanner-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ python bench_travelplanner.py --config small-leaf --model-class openai --large-m
python bench_travelplanner.py --config small-all --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/openai/small-all
python bench_travelplanner.py --config small-baseline --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/openai/small-baseline
python bench_travelplanner.py --config short-context --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/openai/short-context
python bench_travelplanner.py --config short-baseline --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/openai/short-baseline
python bench_travelplanner.py --config short-baseline --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/travelplanner/openai/short-baseline
2 changes: 1 addition & 1 deletion slurm/openai/webarena-1-full.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
2 changes: 1 addition & 1 deletion slurm/openai/webarena-2-root-fc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
2 changes: 1 addition & 1 deletion slurm/openai/webarena-3-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
2 changes: 1 addition & 1 deletion slurm/openai/webarena-4-small-leaf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
2 changes: 1 addition & 1 deletion slurm/openai/webarena-5-small-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
2 changes: 1 addition & 1 deletion slurm/openai/webarena-6-small-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
2 changes: 1 addition & 1 deletion slurm/openai/webarena-7-short-context.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
2 changes: 1 addition & 1 deletion slurm/openai/webarena-8-short-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down
5 changes: 3 additions & 2 deletions slurm/openai/webarena-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --time=7-0
#SBATCH --nodes=1
#SBATCH -c 1
#SBATCH --mem=32G
#SBATCH --mem=256G
#SBATCH --gpus=0
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
Expand Down Expand Up @@ -45,4 +45,5 @@ sleep 600
python bench_webarena.py --config short-context --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/short-context
curl -X GET ${RESTART_URL}
sleep 600
python bench_webarena.py --config short-baseline --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/short-baselinekill $DOCKER_PID
python bench_webarena.py --config short-baseline --model-class openai --large-model gpt-4o-2024-05-13 --small-model gpt-3.5-turbo-0125 --save-dir /nlpgpu/data/andrz/redel/experiments/webarena/openai/short-baseline
kill $DOCKER_PID
2 changes: 1 addition & 1 deletion slurm/qwen/fanoutqa-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ python bench_fanoutqa.py --config small-leaf --model-class qwen --large-model Qw
python bench_fanoutqa.py --config small-all --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/qwen/small-all --engine-timeout 1800
python bench_fanoutqa.py --config small-baseline --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/qwen/small-baseline --engine-timeout 1800
python bench_fanoutqa.py --config short-context --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/qwen/short-context --engine-timeout 1800
python bench_fanoutqa.py --config short-baseline --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/qwen/short-baseline --engine-timeout 1800
python bench_fanoutqa.py --config short-baseline --model-class qwen --large-model Qwen/Qwen2.5-72B-Instruct --small-model Qwen/Qwen2.5-7B-Instruct --save-dir /nlpgpu/data/andrz/redel/experiments/fanoutqa/qwen/short-baseline --engine-timeout 1800
Loading

0 comments on commit 521651e

Please sign in to comment.