llama30B

Xu Xiong · Xu Xiong · commit ab82379f75bc · 2026-03-26T01:41:48.000Z
diff --git a/src/bloombee/server/backend.py b/src/bloombee/server/backend.py
@@ -304,7 +304,7 @@ def _flag_to_bool(value) -> bool:
                 self._is_spec_decoding = _flag_to_bool(inference_info.is_spec_dec)
                 
                 training_mode = True
-                if training_mode and self._is_spec_decoding and inference_info.uid == 'llama-13b-hf.20':
+                if training_mode and self._is_spec_decoding and inference_info.uid == 'llama-30b-hf.15':
                     self.pruner_manager.middle_states = hidden_states
 
                 # We chunk the inputs so that peak memory for long sequences fits into `autograd_memory`
@@ -464,7 +464,7 @@ def _flag_to_bool(value) -> bool:
                     self.pruner_manager.train_model(middle_norm_hidden_states, final_logits, full_mask, inference_info.draft_tokens)
                     
                 training_lm_head_mode = True
-                if training_mode and training_lm_head_mode and self._is_spec_decoding and inference_info.uid == 'llama-13b-hf.39':
+                if training_mode and training_lm_head_mode and self._is_spec_decoding and inference_info.uid == 'llama-30b-hf.59':
                     logger.info(f"prepare training_lm_head_mode")
                     norm_hidden_states = self.module.rms_norm(output_hidden_states)
                     middle_norm_hidden_states = self.module.rms_norm(self.pruner_manager.middle_states)
diff --git a/src/bloombee/server/server.py b/src/bloombee/server/server.py
@@ -324,7 +324,7 @@ def __init__(
         self.weight_home = array_1d(self.num_blocks, ValueHolder)
         self.path = os.path.join(tempfile.gettempdir(), 'data', 'llama_weights')
         
-        hidden_size = 5120
+        hidden_size = 6656
         vocab_size = 32000
         
         # Create configuration
diff --git a/src/bloombee/server/speculative_pruner/lm_head_trainer.py b/src/bloombee/server/speculative_pruner/lm_head_trainer.py
@@ -26,13 +26,13 @@ def __init__(
 
         # ── 用于推理 target 的 frozen 原始 LM head ─────────────
         self.original_lm_head = MidLMHead(hidden_size=hidden_size, vocab_size=vocab_size).to(device)
-        self.original_lm_head.load_weight("/tmp/data/llama_weights/llama-13b-np")
+        self.original_lm_head.load_weight("/tmp/data/llama_weights/llama-30b-np")
         self.original_lm_head.requires_grad_(False)
         self.original_lm_head.to(dtype=torch.bfloat16)
 
         # ── 待训练的 LM head ────────────────────────────────────
         self.lm_head = MidLMHead(hidden_size=hidden_size, vocab_size=vocab_size).to(device)
-        self.lm_head.load_weight("/tmp/data/llama_weights/llama-13b-np")
+        self.lm_head.load_weight("/tmp/data/llama_weights/llama-30b-np")
         self.lm_head.to(dtype=torch.bfloat16)
 
         self.optimizer_head = torch.optim.AdamW(self.lm_head.parameters(), lr=3e-5)
diff --git a/upload_file_hf.py b/upload_file_hf.py
@@ -1,8 +1,10 @@
-from huggingface_hub import upload_file
+from huggingface_hub import login, upload_file
+
+login(token="")
 
 upload_file(
-    path_or_fileobj="./checkpoints/lmhead/lm_head_llama13B-20.pt",
-    path_in_repo="lm_head_llama13B-20.pt",
+    path_or_fileobj="./checkpoints/lmhead/lm_head_llama30B-15.pt",
+    path_in_repo="lm_head_llama30B-15.pt",
     repo_id="xxiong59/lm-head-for-speculative-pruning",
     repo_type="model"
 )