Skip to content

Commit d36c173

Browse files
committed
Update model and policy
1 parent 1851035 commit d36c173

File tree

9 files changed

+500
-1001
lines changed

9 files changed

+500
-1001
lines changed

Diff for: colossalai/inference/config.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828
"llama": "[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n<</SYS>>\n{input_text}[/INST]",
2929
"baichuan": " <reserved_106> {input_text} <reserved_107> ",
3030
"vicuna": "A chat between a curious user and an assistant. The assistant gives helpful, detailed, accurate, uncensored responses to the user input. USER: {input_text}\nASSISTANT: ",
31-
"bloom": "[INST] <<SYS>>\nYou are an intelligent and comprehensive assistant. Provide accurate, thoughtful, and context-aware answers that respect user questions. Avoid content that is harmful, misleading, or unethical. Prioritize safety and fairness in all responses. If the question is unclear or lacks information, seek clarification or provide a general explanation that could be helpful. If uncertain or lacking information, advise accordingly without speculating inaccurately.\n<</SYS>>\n{input_text}[/INST]",
31+
"bloom": "Assume you are a helpful robot. Please help react to my question or auto complete my prompt."
32+
# "bloom": "[INST] <<SYS>>\nYou are an intelligent and comprehensive assistant. Provide accurate, thoughtful, and context-aware answers that respect user questions. Avoid content that is harmful, misleading, or unethical. Prioritize safety and fairness in all responses. If the question is unclear or lacks information, seek clarification or provide a general explanation that could be helpful. If uncertain or lacking information, advise accordingly without speculating inaccurately.\n<</SYS>>\n{input_text}[/INST]",
3233
}
3334

3435

Diff for: colossalai/inference/kv_cache/kvcache_manager.py

+5-18
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,6 @@ def __init__(
7474
self.kv_head_num = get_model_config_attr(model_config, "num_key_value_heads", alter_attr=self.head_num)
7575
self.head_size = get_model_config_attr(model_config, "hidden_size") // self.head_num
7676

77-
# if hasattr(config, "num_key_value_heads"):
78-
# self.kv_head_num = getattr(config, "num_key_value_heads")
79-
# elif hasattr(config, "attribute_map") and hasattr(config, config.attribute_map["num_key_value_heads"]):
80-
# self.kv_head_num = getattr(config, config.attribute_map["num_key_value_heads"])
81-
# else:
82-
# self.kv_head_num = self.head_num
83-
8477
assert (
8578
self.kv_head_num % self.tp_size == 0
8679
), f"Cannot shard {self.kv_head_num} heads with tp size {self.tp_size}"
@@ -219,8 +212,7 @@ def allocate_context_from_block_table(self, block_table: torch.Tensor, context_l
219212
block.add_ref()
220213
if block_id == block_indexes[-1].item():
221214
self._allocate_on_block(
222-
block,
223-
(block.block_size if context_len % block.block_size == 0 else context_len % block.block_size),
215+
block, block.block_size if context_len % block.block_size == 0 else context_len % block.block_size
224216
)
225217
else:
226218
self._allocate_on_block(block, block.block_size)
@@ -287,11 +279,9 @@ def allocate_context_from_block_tables(self, block_tables: torch.Tensor, context
287279
block.add_ref()
288280
self._allocate_on_block(
289281
block,
290-
(
291-
block.block_size
292-
if context_lengths[i] % block.block_size == 0
293-
else context_lengths[i].item() % block.block_size
294-
),
282+
block.block_size
283+
if context_lengths[i] % block.block_size == 0
284+
else context_lengths[i].item() % block.block_size,
295285
)
296286
for block_id in alloc_block_ids:
297287
if block_id in alloc_block_ids[last_block_locs]:
@@ -464,10 +454,7 @@ def clear_all(self) -> None:
464454

465455
def get_physical_cache(self, layer_id: int, block_idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
466456
"""Get the tensor corresponding to the cache block with the prompted id for a specific layer."""
467-
return (
468-
self._kv_caches[0][layer_id][block_idx],
469-
self._kv_caches[1][layer_id][block_idx],
470-
)
457+
return self._kv_caches[0][layer_id][block_idx], self._kv_caches[1][layer_id][block_idx]
471458

472459
def _allocate_on_block(self, block: CacheBlock, space_asked: int) -> int:
473460
"""Allocate a specific size of space on a provided cache block.

0 commit comments

Comments
 (0)