Merge branch 'demo/emnlp' of https://github.com/zhudotexe/redel into …

…demo/emnlp
zhudotexe · Nov 14, 2024 · 9a3ea5c · 9a3ea5c
2 parents a1bd705 + 4a40a4c
commit 9a3ea5c
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 3 deletions.
diff --git a/bench_engines.py b/bench_engines.py
@@ -98,7 +98,6 @@ def get_engine(model_class: str, model_id: str, context_size: int = None):
                 max_context_size=context_size,
                 model_load_kwargs={
                     "tensor_parallel_size": 8,
-                    "tokenizer_mode": "auto",
                     # for more stability
                     # "gpu_memory_utilization": 0.8,
                     # "enforce_eager": True,
@@ -113,7 +112,6 @@ def get_engine(model_class: str, model_id: str, context_size: int = None):
                 max_context_size=context_size,
                 model_load_kwargs={
                     "tensor_parallel_size": 8,
-                    "tokenizer_mode": "auto",
                     # for more stability
                     "enable_prefix_caching": True,
                 },

diff --git a/sandbox/qwen-fc.py b/sandbox/qwen-fc.py
@@ -123,11 +123,11 @@ async def stream(self, messages: list[ChatMessage], functions: list[AIFunction]
                 completion_tokens=completion_tokens,
             )
 
+
 model = VLLMEngine(
     model_id="Qwen/Qwen2.5-72B-Instruct",
     model_load_kwargs={
         "tensor_parallel_size": 4,
-        "tokenizer_mode": "auto",
         "enable_prefix_caching": True,
     },
     sampling_params=SamplingParams(temperature=0, max_tokens=2048),