-
Notifications
You must be signed in to change notification settings - Fork 33
Open
Description
I build sarathi-serve by pip install -e . --extra-index-url https://flashinfer.ai/whl/cu121/torch2.3/.
I run the code
from sarathi import AsyncLLMEngine, SamplingParams
from sarathi.engine.arg_utils import EngineArgs
def get_engine_config_from_args(args, arrival_rate):
print(f"ratio: {args.ratio}, arrival_period: {args.arrival_period}, arrival_rate: {arrival_rate}")
scheduling_policy = args.scheduling_policy
if args.scheduling_policy == "skip-join":
scheduling_policy= "skip-join"
print(f"scheduling_policy: {args.scheduling_policy}")
engine_args = {
"model": args.model_type,
"tensor_parallel_size" : args.tensor_parallel,
"enable_chunked_prefill": True, # Enable chunked prefill
"enable_prefix_caching" : True, # Enable prefix caching
#Xiao: from sarathi-lean/sarathi/benchmark/config/default.yml
"chunk_size" : 512,
"low_chunk_size" : 128,
"high_chunk_size" : 2048,
"max_model_len" : 131072, # Xiao Maximum model length
# chunk_schedule_max_tokens: 4096
"chunk_schedule_max_tokens" : 131072,
"chunk_schedule_stages" : 16,
}
return engine_args
eng_args = get_engine_config_from_args(args, arrival_rate)
engine = AsyncLLMEngine.from_engine_args(
eng_args
)
engine.generate(xxx)
my error is
File "/root/miniconda3/envs/pod/lib/python3.10/site-packages/sarathi/model_executor/models/internlm.py", line 18, in <module>
from sarathi.model_executor.parallel_utils.pipeline_parallel.mappings import recv, send
ModuleNotFoundError: No module named 'sarathi.model_executor.parallel_utils.pipeline_parallel
Metadata
Metadata
Assignees
Labels
No labels