Skip to content

Commit b6fe2d9

Browse files
committed
fix comparison
1 parent b129492 commit b6fe2d9

4 files changed

Lines changed: 4 additions & 4 deletions

File tree

benchmarks_and_experiments/important/accuracy_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ def _run_vllm_prefixcache(samples: List[Dict], model: str, max_new_tokens: int =
435435
log.info("[vllm_prefixcache accuracy] submitting %d prompts in batches of %d ...", n, _VLLM_BATCH)
436436
prompts = [_format_prompt(s["context"], s["input"], s.get("choices")) for s in samples]
437437
llm = LLM(model=model, enable_prefix_caching=True,
438-
max_model_len=max_context_tokens + 128,
438+
max_model_len=max_context_tokens + 512,
439439
gpu_memory_utilization=gpu_memory_utilization,
440440
enforce_eager=enforce_eager,
441441
max_num_seqs=max_num_seqs)

benchmarks_and_experiments/important/comparison_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def _run_vllm(
353353

354354
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
355355
llm = LLM(model=self.model_name, enable_prefix_caching=True,
356-
max_model_len=max_context_tokens + 128,
356+
max_model_len=max_context_tokens + 512,
357357
gpu_memory_utilization=0.95,
358358
enforce_eager=True,
359359
max_num_seqs=1)

benchmarks_and_experiments/important/latency_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ def _measure_vllm_prefixcache(
371371

372372
tokenizer = AutoTokenizer.from_pretrained(model)
373373
llm = LLM(model=model, enable_prefix_caching=enable_prefix_caching,
374-
max_model_len=max_context_tokens + 128,
374+
max_model_len=max_context_tokens + 512,
375375
gpu_memory_utilization=gpu_memory_utilization,
376376
enforce_eager=enforce_eager,
377377
max_num_seqs=max_num_seqs)

benchmarks_and_experiments/important/memory_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ def _measure_vllm_prefixcache(
397397

398398
tokenizer = AutoTokenizer.from_pretrained(model)
399399
llm = LLM(model=model, enable_prefix_caching=True,
400-
max_model_len=max_context_tokens + 128,
400+
max_model_len=max_context_tokens + 512,
401401
gpu_memory_utilization=gpu_memory_utilization,
402402
enforce_eager=enforce_eager,
403403
max_num_seqs=max_num_seqs)

0 commit comments

Comments
 (0)