diff --git a/bench_engines.py b/bench_engines.py
index 2efdc2a7f..d818de6ce 100644
--- a/bench_engines.py
+++ b/bench_engines.py
@@ -38,6 +38,8 @@ def get_engine(model_class: str, model_id: str, context_size: int = None):
             return OpenAIEngine(model="gpt-4o-2024-05-13", temperature=0, max_context_size=context_size)
         if model_id == "gpt-3.5-turbo-0125":
             return OpenAIEngine(model="gpt-3.5-turbo-0125", temperature=0, max_context_size=context_size)
+        if model_id == "gpt-4o-mini":
+            return OpenAIEngine(model="gpt-4o-mini", temperature=0, max_context_size=context_size)
     # ==== MISTRAL ====
     if model_class == "mistral":
         from kani.ext.vllm import VLLMEngine
@@ -166,7 +168,6 @@ def get_engine(model_class: str, model_id: str, context_size: int = None):
                 },
                 sampling_params=SamplingParams(temperature=0.7, max_tokens=2048, min_tokens=1),
             )
-    # todo: cohere
     raise ValueError("unknown engine")
 
 
diff --git a/bench_webarena.py b/bench_webarena.py
index aa9c313b7..8b99fb2ed 100644
--- a/bench_webarena.py
+++ b/bench_webarena.py
@@ -81,14 +81,17 @@ def wa_ensure_auth(config_file: Path) -> Path:
             comb = get_site_comb_from_filepath(cookie_file_name)
             temp_dir = tempfile.mkdtemp()
             # subprocess to renew the cookie
-            subprocess.run([
-                "python",
-                "experiments/webarena/auto_login.py",
-                "--auth_folder",
-                temp_dir,
-                "--site_list",
-                *comb,
-            ])
+            subprocess.run(
+                [
+                    "python",
+                    "experiments/webarena/auto_login.py",
+                    "--auth_folder",
+                    temp_dir,
+                    "--site_list",
+                    *comb,
+                ],
+                check=True,
+            )
             _c["storage_state"] = f"{temp_dir}/{cookie_file_name}"
             assert os.path.exists(_c["storage_state"])
             # write a temp copy of the config file
diff --git a/redel/tools/webarena/patches.py b/redel/tools/webarena/patches.py
index 5661c4555..8022cb453 100644
--- a/redel/tools/webarena/patches.py
+++ b/redel/tools/webarena/patches.py
@@ -139,15 +139,15 @@ def patch_to_support_webarena():
 
     # WebArena runs a subprocess to login to get cookies
     # which spews logs / warnings, so we silence them
-    _subprocess_run = subprocess.run
-
-    def subprocess_run(*args, **kwargs):
-        if any("auto_login.py" in a for a in args[0]):
-            kwargs["stdout"] = subprocess.PIPE
-            kwargs["stderr"] = subprocess.PIPE
-        return _subprocess_run(*args, **kwargs)
-
-    subprocess.run = lambda *args, **kwargs: subprocess_run(*args, **kwargs)
+    # _subprocess_run = subprocess.run
+    #
+    # def subprocess_run(*args, **kwargs):
+    #     if any("auto_login.py" in a for a in args[0]):
+    #         kwargs["stdout"] = subprocess.PIPE
+    #         kwargs["stderr"] = subprocess.PIPE
+    #     return _subprocess_run(*args, **kwargs)
+    #
+    # subprocess.run = lambda *args, **kwargs: subprocess_run(*args, **kwargs)
 
     # WebArena's get_bounding_client_rect method is very slow
     with ignore_webarena_warnings():
diff --git a/test-webarena.sh b/test-webarena.sh
new file mode 100644
index 000000000..96c7fca27
--- /dev/null
+++ b/test-webarena.sh
@@ -0,0 +1,4 @@
+#!/bin/zsh
+
+source slurm/webarena-env.sh
+python bench_webarena.py --config baseline --model-class openai --large-model gpt-4o-mini --small-model gpt-4o-mini --save-dir experiments/webarena/dev/baseline