Skip to content

Commit c8b346d

Browse files
author
Xu Xiong
committed
llama-30B
1 parent c799935 commit c8b346d

3 files changed

Lines changed: 4 additions & 4 deletions

File tree

src/bloombee/server/handler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -795,8 +795,8 @@ async def _cross_stage_push_wrapper(mb_hidden, mb_keep, push_metadata):
795795
push_tensor_bytes = sum(len(t.buffer) for t in next_tensors)
796796

797797
# 模拟网络传输延时
798-
NETWORK_SPEED_BYTES_PER_SEC = 31.25 * 1024 * 1024
799-
transfer_delay = push_tensor_bytes / NETWORK_SPEED_BYTES_PER_SEC + 0.05
798+
NETWORK_SPEED_BYTES_PER_SEC = 15.6 * 1024 * 1024
799+
transfer_delay = push_tensor_bytes / NETWORK_SPEED_BYTES_PER_SEC + 0.075
800800
await asyncio.sleep(transfer_delay)
801801
task = asyncio.create_task(self._push_outputs(request, output_tensors, step_metadata))
802802
background_tasks.add(task) # Keep reference until it is done to save it from GC

src/bloombee/server/server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ def __init__(
324324
self.weight_home = array_1d(self.num_blocks, ValueHolder)
325325
self.path = os.path.join(tempfile.gettempdir(), 'data', 'llama_weights')
326326

327-
hidden_size = 5120
327+
hidden_size = 6656
328328
vocab_size = 32000
329329

330330
# Create configuration

src/bloombee/server/speculative_pruner/adaptive_neural_pruner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def __init__(
5555
self.lm_head = MidLMHead(hidden_size=hidden_size, vocab_size=vocab_size).to("cuda")
5656
lm_head_weights_path = hf_hub_download(
5757
repo_id="xxiong59/lm-head-for-speculative-pruning",
58-
filename="lm_head_llama13B-20.pt",
58+
filename="lm_head_llama30B-20.pt",
5959
cache_dir="./cache"
6060
)
6161
lm_head_checkpoint = torch.load(lm_head_weights_path, map_location="cuda")

0 commit comments

Comments
 (0)