Skip to content

Commit

Permalink
Merge branch 'demo/emnlp' of https://github.com/zhudotexe/redel into …
Browse files Browse the repository at this point in the history
…demo/emnlp
  • Loading branch information
zhudotexe committed Nov 14, 2024
2 parents a1bd705 + 4a40a4c commit 9a3ea5c
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 3 deletions.
2 changes: 0 additions & 2 deletions bench_engines.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ def get_engine(model_class: str, model_id: str, context_size: int = None):
max_context_size=context_size,
model_load_kwargs={
"tensor_parallel_size": 8,
"tokenizer_mode": "auto",
# for more stability
# "gpu_memory_utilization": 0.8,
# "enforce_eager": True,
Expand All @@ -113,7 +112,6 @@ def get_engine(model_class: str, model_id: str, context_size: int = None):
max_context_size=context_size,
model_load_kwargs={
"tensor_parallel_size": 8,
"tokenizer_mode": "auto",
# for more stability
"enable_prefix_caching": True,
},
Expand Down
2 changes: 1 addition & 1 deletion sandbox/qwen-fc.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,11 @@ async def stream(self, messages: list[ChatMessage], functions: list[AIFunction]
completion_tokens=completion_tokens,
)


model = VLLMEngine(
model_id="Qwen/Qwen2.5-72B-Instruct",
model_load_kwargs={
"tensor_parallel_size": 4,
"tokenizer_mode": "auto",
"enable_prefix_caching": True,
},
sampling_params=SamplingParams(temperature=0, max_tokens=2048),
Expand Down

0 comments on commit 9a3ea5c

Please sign in to comment.