Skip to content

Commit

Permalink
bug fix, update readme, delete launch_parameters (#25)
Browse files Browse the repository at this point in the history
  • Loading branch information
anmolagarwalcp810 authored Jul 28, 2024
1 parent 60277c0 commit 6aa32c6
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 138 deletions.
48 changes: 25 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,18 @@ We have a [live demo](https://vidur.westus2.cloudapp.azure.com/) that captures t
* __Instructions on adding a new model to existing or new SKUs can be found [here](docs/profiling.md)__.
* All models support a maximum context length of 4k except `Llama3-8B` and `Llama3-70B` which support 16k context length by passing additional CLI params:

For random forrest:
```text
--sklearn_execution_time_predictor_prediction_max_prefill_chunk_size 16384 \
--sklearn_execution_time_predictor_prediction_max_batch_size 512 \
--sklearn_execution_time_predictor_prediction_max_tokens_per_request 16384 \
--random_forrest_execution_time_predictor_config_prediction_max_prefill_chunk_size 16384 \
--random_forrest_execution_time_predictor_config_prediction_max_batch_size 512 \
--random_forrest_execution_time_predictor_config_prediction_max_tokens_per_request 16384 \
```

For linear regression:
```text
--linear_regression_execution_time_predictor_config_prediction_max_prefill_chunk_size 16384 \
--linear_regression_execution_time_predictor_config_prediction_max_batch_size 512 \
--linear_regression_execution_time_predictor_config_prediction_max_tokens_per_request 16384 \
```

* Pipeline parallelism is supported for all models. The PP dimension should divide the number of layers in the model.
Expand Down Expand Up @@ -97,26 +105,20 @@ or a big example with all the parameters,

```sh
python -m vidur.main \
--replica_device a100 \
--replica_model_name meta-llama/Llama-2-7b-hf \
--cluster_num_replicas 1 \
--replica_num_tensor_parallel_workers 1 \
--replica_num_pipeline_stages 1 \
--request_generator_provider synthetic \
--synthetic_request_generator_length_provider trace \
--synthetic_request_generator_interval_provider static \
--request_generator_max_tokens 4096 \
--trace_request_length_generator_trace_file ./data/processed_traces/arxiv_summarization_stats_llama2_tokenizer_filtered_v2.csv \
--synthetic_request_generator_num_requests 128 \
--request_generator_provider synthetic \
--synthetic_request_generator_length_provider trace \
--synthetic_request_generator_interval_provider static \
--request_generator_max_tokens 4096 \
--trace_request_length_generator_trace_file ./data/processed_traces/arxiv_summarization_stats_llama2_tokenizer_filtered_v2.csv \
--synthetic_request_generator_num_requests 128 \
--replica_scheduler_provider vllm \
--replica_scheduler_batch_size_cap 256 \
--vllm_scheduler_max_tokens_in_batch 4096
--replica_config_device a100 \
--replica_config_model_name meta-llama/Llama-2-7b-hf \
--cluster_config_num_replicas 1 \
--replica_config_tensor_parallel_size 1 \
--replica_config_num_pipeline_stages 1 \
--request_generator_config_type synthetic \
--length_generator_config_type trace \
--interval_generator_config_type static \
--[trace|zipf|uniform|fixed]_request_length_generator_config_max_tokens 4096 \
--trace_request_length_generator_config_trace_file ./data/processed_traces/arxiv_summarization_stats_llama2_tokenizer_filtered_v2.csv \
--synthetic_request_generator_config_num_requests 128 \
--replica_scheduler_config_type vllm \
--[vllm|lightllm|orca|faster_transformer|sarathi]_scheduler_config_batch_size_cap 256 \
--[vllm|lightllm]_scheduler_config_max_tokens_in_batch 4096
```

The simulator supports a plethora of parameters for the simulation description which can be found [here](docs/launch_parameters.md).
Expand Down
112 changes: 0 additions & 112 deletions docs/launch_parameters.md

This file was deleted.

12 changes: 9 additions & 3 deletions vidur/config/flat_dataclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def create_from_cli_args(cls) -> Any:
nargs = None
action = None
field_type = field.type
help_text = field.metadata.get("help", None)
help_text = cls.metadata_mapping[field.name].get("help", None)

if is_list(field.type):
assert is_composed_of_primitives(field.type)
Expand All @@ -113,15 +113,17 @@ def create_from_cli_args(cls) -> Any:

# handle cases with default and default factory args
if field.default is not MISSING:
arg_params["default"] = field.default
value = field.default
if callable(value):
value = value()
arg_params["default"] = value
elif field.default_factory is not MISSING:
arg_params["default"] = field.default_factory()
else:
arg_params["required"] = True

if nargs:
arg_params["nargs"] = nargs

parser.add_argument(f"--{field.name}", **arg_params)

args = parser.parse_args()
Expand All @@ -139,6 +141,7 @@ def create_flat_dataclass(input_dataclass: Any) -> Any:
processed_classes = set()
dataclass_args = defaultdict(list)
dataclass_dependencies = defaultdict(set)
metadata_mapping = {}

def process_dataclass(_input_dataclass, prefix=""):
if _input_dataclass in processed_classes:
Expand All @@ -165,6 +168,7 @@ def process_dataclass(_input_dataclass, prefix=""):
meta_fields_with_defaults.append(
(type_field_name, type(default_value), default_value)
)
metadata_mapping[type_field_name] = field.metadata

assert hasattr(field_type, "__dataclass_fields__")
for subclass in get_all_subclasses(field_type):
Expand Down Expand Up @@ -202,6 +206,7 @@ def process_dataclass(_input_dataclass, prefix=""):
dataclass_args[_input_dataclass].append(
(prefixed_name, field.name, field_type)
)
metadata_mapping[prefixed_name] = field.metadata

process_dataclass(input_dataclass)

Expand All @@ -211,6 +216,7 @@ def process_dataclass(_input_dataclass, prefix=""):
# Metadata fields
FlatClass.dataclass_args = dataclass_args
FlatClass.dataclass_dependencies = dataclass_dependencies
FlatClass.metadata_mapping = metadata_mapping

# Helper methods
FlatClass.reconstruct_original_dataclass = reconstruct_original_dataclass
Expand Down

0 comments on commit 6aa32c6

Please sign in to comment.