@@ -2164,8 +2164,16 @@ def _cfg(url: str = '', **kwargs) -> Dict[str, Any]:
2164
2164
input_size = (3 , 384 , 384 ), crop_pct = 1.0 ),
2165
2165
'vit_so150m_patch16_reg4_map_256.untrained' : _cfg (
2166
2166
input_size = (3 , 256 , 256 )),
2167
- 'vit_so150m2_patch16_reg1_gap_256.untrained' : _cfg (
2168
- input_size = (3 , 256 , 256 ), crop_pct = 0.95 ),
2167
+ 'vit_so150m2_patch16_reg1_gap_256.sbb_e200_in12k_ft_in1k' : _cfg (
2168
+ hf_hub_id = 'timm/' ,
2169
+ input_size = (3 , 256 , 256 ), crop_pct = 1.0 ),
2170
+ 'vit_so150m2_patch16_reg1_gap_256.sbb_e200_in12k' : _cfg (
2171
+ hf_hub_id = 'timm/' ,
2172
+ num_classes = 11821 ,
2173
+ input_size = (3 , 256 , 256 ), crop_pct = 1.0 ),
2174
+ 'vit_so150m2_patch16_reg1_gap_384.sbb_e200_in12k_ft_in1k' : _cfg (
2175
+ hf_hub_id = 'timm/' ,
2176
+ input_size = (3 , 384 , 384 ), crop_pct = 1.0 ),
2169
2177
2170
2178
'vit_intern300m_patch14_448.ogvl_dist' : _cfg (
2171
2179
hf_hub_id = 'timm/' ,
@@ -3518,6 +3526,18 @@ def vit_so150m2_patch16_reg1_gap_256(pretrained: bool = False, **kwargs) -> Visi
3518
3526
return model
3519
3527
3520
3528
3529
+ @register_model
3530
+ def vit_so150m2_patch16_reg1_gap_384 (pretrained : bool = False , ** kwargs ) -> VisionTransformer :
3531
+ """ SO150M v2 (shape optimized, but diff than paper def, optimized for GPU) """
3532
+ model_args = dict (
3533
+ patch_size = 16 , embed_dim = 832 , depth = 21 , num_heads = 13 , mlp_ratio = 34 / 13 , init_values = 1e-5 ,
3534
+ qkv_bias = False , class_token = False , reg_tokens = 1 , global_pool = 'avg' ,
3535
+ )
3536
+ model = _create_vision_transformer (
3537
+ 'vit_so150m2_patch16_reg1_gap_384' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
3538
+ return model
3539
+
3540
+
3521
3541
@register_model
3522
3542
def vit_intern300m_patch14_448 (pretrained : bool = False , ** kwargs ) -> VisionTransformer :
3523
3543
model_args = dict (
0 commit comments