1515from .qwen import Qwen3Model
1616
1717
18- @ModelBase .register ("StepVLForConditionalGeneration" )
18+ @ModelBase .register ("StepVLForConditionalGeneration" , "Step3p7ForConditionalGeneration" )
1919class Step3VLVisionModel (MmprojModel ):
2020 def __init__ (self , * args , ** kwargs ):
2121 super ().__init__ (* args , ** kwargs )
@@ -95,7 +95,7 @@ class Step3VLTextModel(Qwen3Model):
9595 model_arch = gguf .MODEL_ARCH .QWEN3
9696
9797
98- @ModelBase .register ("Step3p5ForCausalLM" )
98+ @ModelBase .register ("Step3p5ForCausalLM" , "Step3p7ForConditionalGeneration" )
9999class Step35Model (TextModel ):
100100 model_arch = gguf .MODEL_ARCH .STEP35
101101
@@ -203,11 +203,23 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
203203 if isinstance (rope_theta , list ):
204204 rope_theta = rope_theta [0 ]
205205 base = float (rope_theta )
206- if (dim := self .hparams .get ("head_dim" )) is None :
207- dim = self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ]
208- dim = int (dim )
209206
210- freqs = 1.0 / (base ** (torch .arange (0 , dim , 2 , dtype = torch .float32 ) / dim ))
207+ if (storage_dim := self .hparams .get ("head_dim" )) is None :
208+ storage_dim = self .hparams ["hidden_size" ] // self .hparams ["num_attention_heads" ]
209+ storage_dim = int (storage_dim )
210+
211+ # Llama 3 factors apply only to the rotary dims used by full_attention layers
212+ # (partial_rotary_factor * head_dim). Remaining slots are padded with 1.0 so
213+ # sliding_attention layers remain unaffected. set_gguf_parameters already
214+ # guarantees at least one full_attention layer.
215+ layer_types = (self .hparams .get ("layer_types" ) or [])[: self .block_count ]
216+ partial_rotary_factors = (self .hparams .get ("partial_rotary_factors" ) or [])[: self .block_count ]
217+ full_attention_factor = next (
218+ float (f ) for lt , f in zip (layer_types , partial_rotary_factors ) if lt == "full_attention"
219+ )
220+ rotary_dim = int (storage_dim * full_attention_factor )
221+
222+ freqs = 1.0 / (base ** (torch .arange (0 , rotary_dim , 2 , dtype = torch .float32 ) / rotary_dim ))
211223
212224 factor = float (rope_params .get ("factor" , 8.0 ))
213225 low_freq_factor = float (rope_params .get ("low_freq_factor" , 1.0 ))
@@ -228,4 +240,8 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
228240 smooth = (old_context_len / wavelen - low_freq_factor ) / (high_freq_factor - low_freq_factor )
229241 rope_factors .append (1.0 / ((1.0 - smooth ) / factor + smooth ))
230242
243+ # Pad to head_dim/2 with 1.0 so non-scaled layers remain neutral.
244+ if len (rope_factors ) < storage_dim // 2 :
245+ rope_factors .extend ([1.0 ] * (storage_dim // 2 - len (rope_factors )))
246+
231247 yield (self .format_tensor_name (gguf .MODEL_TENSOR .ROPE_FREQS ), torch .tensor (rope_factors , dtype = torch .float32 ))
0 commit comments