@@ -2174,12 +2174,20 @@ def _cfg(url: str = '', **kwargs) -> Dict[str, Any]:
2174
2174
'vit_so150m2_patch16_reg1_gap_384.sbb_e200_in12k_ft_in1k' : _cfg (
2175
2175
hf_hub_id = 'timm/' ,
2176
2176
input_size = (3 , 384 , 384 ), crop_pct = 1.0 ),
2177
+ 'vit_so150m2_patch16_reg1_gap_448.sbb_e200_in12k_ft_in1k' : _cfg (
2178
+ hf_hub_id = 'timm/' ,
2179
+ input_size = (3 , 448 , 448 ), crop_pct = 1.0 , crop_mode = 'squash' ),
2177
2180
2178
2181
'vit_intern300m_patch14_448.ogvl_dist' : _cfg (
2179
2182
hf_hub_id = 'timm/' ,
2180
2183
mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
2181
2184
input_size = (3 , 448 , 448 ), crop_pct = 1.0 , num_classes = 0 ,
2182
2185
),
2186
+ 'vit_intern300m_patch14_448.ogvl_2pt5' : _cfg (
2187
+ hf_hub_id = 'timm/' ,
2188
+ mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
2189
+ input_size = (3 , 448 , 448 ), crop_pct = 1.0 , num_classes = 0 ,
2190
+ ),
2183
2191
2184
2192
'aimv2_large_patch14_224.apple_pt' : _cfg (
2185
2193
hf_hub_id = 'timm/' ,
@@ -3538,6 +3546,18 @@ def vit_so150m2_patch16_reg1_gap_384(pretrained: bool = False, **kwargs) -> Visi
3538
3546
return model
3539
3547
3540
3548
3549
+ @register_model
3550
+ def vit_so150m2_patch16_reg1_gap_448 (pretrained : bool = False , ** kwargs ) -> VisionTransformer :
3551
+ """ SO150M v2 (shape optimized, but diff than paper def, optimized for GPU) """
3552
+ model_args = dict (
3553
+ patch_size = 16 , embed_dim = 832 , depth = 21 , num_heads = 13 , mlp_ratio = 34 / 13 , init_values = 1e-5 ,
3554
+ qkv_bias = False , class_token = False , reg_tokens = 1 , global_pool = 'avg' ,
3555
+ )
3556
+ model = _create_vision_transformer (
3557
+ 'vit_so150m2_patch16_reg1_gap_448' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
3558
+ return model
3559
+
3560
+
3541
3561
@register_model
3542
3562
def vit_intern300m_patch14_448 (pretrained : bool = False , ** kwargs ) -> VisionTransformer :
3543
3563
model_args = dict (
0 commit comments