@@ -1213,6 +1213,9 @@ def checkpoint_filter_fn(
12131213 if any ([k .endswith (f ) for f in ['.periods' , '.bias_mask' , 'mask_token' ]]):
12141214 # discard unused/non-persistent/pretrain only params
12151215 continue
1216+ if k .startswith ('projectors.' ):
1217+ # discard optional distillation projection heads from EUPE checkpoints
1218+ continue
12161219 if k .startswith ('local_cls_norm' ):
12171220 # discard, only used for 7b dinov3 pretrain w/ local crops
12181221 continue
@@ -1374,6 +1377,18 @@ def _dinov3_cfg(url: str = '', **kwargs) -> Dict[str, Any]:
13741377 'license' : 'dinov3-license' , ** kwargs
13751378 }
13761379
1380+
1381+ def _eupe_cfg (url : str = '' , ** kwargs ) -> Dict [str , Any ]:
1382+ """Generate default configuration for EUPE models."""
1383+ return {
1384+ 'url' : url ,
1385+ 'num_classes' : 0 , 'input_size' : (3 , 256 , 256 ), 'pool_size' : None ,
1386+ 'crop_pct' : 1.0 , 'interpolation' : 'bicubic' , 'fixed_input_size' : True ,
1387+ 'mean' : IMAGENET_DEFAULT_MEAN , 'std' : IMAGENET_DEFAULT_STD ,
1388+ 'first_conv' : 'patch_embed.proj' , 'classifier' : 'head' ,
1389+ 'license' : 'fair-noncommercial-research-license' , ** kwargs
1390+ }
1391+
13771392default_cfgs = generate_default_cfgs ({
13781393
13791394 # EVA 01 CLIP fine-tuned on imagenet-1k
@@ -1748,6 +1763,18 @@ def _dinov3_cfg(url: str = '', **kwargs) -> Dict[str, Any]:
17481763 'vit_base_patch16_dinov3_qkvb.lvd1689m' : _dinov3_cfg (
17491764 hf_hub_id = 'timm/' ,
17501765 ),
1766+ 'vit_tiny_patch16_dinov3_qkvb.eupe_lvd1689m' : _eupe_cfg (
1767+ hf_hub_id = 'facebook/EUPE-ViT-T' ,
1768+ hf_hub_filename = 'EUPE-ViT-T.pt' ,
1769+ ),
1770+ 'vit_small_patch16_dinov3_qkvb.eupe_lvd1689m' : _eupe_cfg (
1771+ hf_hub_id = 'facebook/EUPE-ViT-S' ,
1772+ hf_hub_filename = 'EUPE-ViT-S.pt' ,
1773+ ),
1774+ 'vit_base_patch16_dinov3_qkvb.eupe_lvd1689m' : _eupe_cfg (
1775+ hf_hub_id = 'facebook/EUPE-ViT-B' ,
1776+ hf_hub_filename = 'EUPE-ViT-B.pt' ,
1777+ ),
17511778 'vit_large_patch16_dinov3.lvd1689m' : _dinov3_cfg (
17521779 hf_hub_id = 'timm/' ,
17531780 ),
@@ -2716,6 +2743,32 @@ def vit_large_patch16_rope_mixed_ape_224(pretrained: bool = False, **kwargs) ->
27162743 return model
27172744
27182745
2746+ @register_model
2747+ def vit_tiny_patch16_dinov3_qkvb (pretrained : bool = False , ** kwargs ) -> Eva :
2748+ """DINOv3-style T/16 w/ QKV bias enabled."""
2749+ model_args = dict (
2750+ patch_size = 16 ,
2751+ dynamic_img_size = True ,
2752+ embed_dim = 192 ,
2753+ depth = 12 ,
2754+ num_heads = 3 ,
2755+ qkv_bias = True ,
2756+ # global_pool='token', # upstream uses CLS token; default here is 'avg', pass via kwargs or --gp
2757+ init_values = 1.0e-05 , # layer-scale
2758+ rope_type = 'dinov3' ,
2759+ rope_temperature = 100 ,
2760+ #rope_rescale_coords=2, # haven't added to interface
2761+ rope_rotate_half = True ,
2762+ use_rot_pos_emb = True ,
2763+ use_abs_pos_emb = False ,
2764+ num_reg_tokens = 4 ,
2765+ use_fc_norm = False ,
2766+ norm_layer = partial (LayerNorm , eps = 1e-5 ),
2767+ )
2768+ model = _create_eva ('vit_tiny_patch16_dinov3_qkvb' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2769+ return model
2770+
2771+
27192772@register_model
27202773def vit_small_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
27212774 """DINOv3 S/16 https://arxiv.org/abs/2508.10104
@@ -2728,6 +2781,7 @@ def vit_small_patch16_dinov3(pretrained: bool = False, **kwargs) -> Eva:
27282781 depth = 12 ,
27292782 num_heads = 6 ,
27302783 qkv_bias = False ,
2784+ # global_pool='token', # upstream uses CLS token; default here is 'avg', pass via kwargs or --gp
27312785 init_values = 1.0e-05 , # layer-scale
27322786 rope_type = 'dinov3' ,
27332787 rope_temperature = 100 ,
@@ -2755,6 +2809,7 @@ def vit_small_patch16_dinov3_qkvb(pretrained: bool = False, **kwargs) -> Eva:
27552809 depth = 12 ,
27562810 num_heads = 6 ,
27572811 qkv_bias = True ,
2812+ # global_pool='token', # upstream uses CLS token; default here is 'avg', pass via kwargs or --gp
27582813 init_values = 1.0e-05 , # layer-scale
27592814 rope_type = 'dinov3' ,
27602815 rope_temperature = 100 ,
@@ -2782,6 +2837,7 @@ def vit_small_plus_patch16_dinov3(pretrained: bool = False, **kwargs) -> Eva:
27822837 depth = 12 ,
27832838 num_heads = 6 ,
27842839 qkv_bias = False ,
2840+ # global_pool='token', # upstream uses CLS token; default here is 'avg', pass via kwargs or --gp
27852841 init_values = 1.0e-05 , # layer-scale
27862842 rope_type = 'dinov3' ,
27872843 rope_temperature = 100 ,
@@ -2811,6 +2867,7 @@ def vit_small_plus_patch16_dinov3_qkvb(pretrained: bool = False, **kwargs) -> Ev
28112867 depth = 12 ,
28122868 num_heads = 6 ,
28132869 qkv_bias = True ,
2870+ # global_pool='token', # upstream uses CLS token; default here is 'avg', pass via kwargs or --gp
28142871 init_values = 1.0e-05 , # layer-scale
28152872 rope_type = 'dinov3' ,
28162873 rope_temperature = 100 ,
@@ -2840,6 +2897,7 @@ def vit_base_patch16_dinov3(pretrained: bool = False, **kwargs) -> Eva:
28402897 depth = 12 ,
28412898 num_heads = 12 ,
28422899 qkv_bias = False ,
2900+ # global_pool='token', # upstream uses CLS token; default here is 'avg', pass via kwargs or --gp
28432901 init_values = 1.0e-05 , # layer-scale
28442902 rope_type = 'dinov3' ,
28452903 rope_temperature = 100 ,
@@ -2867,6 +2925,7 @@ def vit_base_patch16_dinov3_qkvb(pretrained: bool = False, **kwargs) -> Eva:
28672925 depth = 12 ,
28682926 num_heads = 12 ,
28692927 qkv_bias = True ,
2928+ # global_pool='token', # upstream uses CLS token; default here is 'avg', pass via kwargs or --gp
28702929 init_values = 1.0e-05 , # layer-scale
28712930 rope_type = 'dinov3' ,
28722931 rope_temperature = 100 ,
@@ -2894,6 +2953,7 @@ def vit_large_patch16_dinov3(pretrained: bool = False, **kwargs) -> Eva:
28942953 depth = 24 ,
28952954 num_heads = 16 ,
28962955 qkv_bias = False ,
2956+ # global_pool='token', # upstream uses CLS token; default here is 'avg', pass via kwargs or --gp
28972957 init_values = 1.0e-5 , # layer-scale
28982958 rope_type = 'dinov3' ,
28992959 rope_temperature = 100 ,
@@ -2921,6 +2981,7 @@ def vit_large_patch16_dinov3_qkvb(pretrained: bool = False, **kwargs) -> Eva:
29212981 depth = 24 ,
29222982 num_heads = 16 ,
29232983 qkv_bias = True ,
2984+ # global_pool='token', # upstream uses CLS token; default here is 'avg', pass via kwargs or --gp
29242985 init_values = 1.0e-5 , # layer-scale
29252986 rope_type = 'dinov3' ,
29262987 rope_temperature = 100 ,
@@ -2948,6 +3009,7 @@ def vit_huge_plus_patch16_dinov3(pretrained: bool = False, **kwargs) -> Eva:
29483009 depth = 32 ,
29493010 num_heads = 20 ,
29503011 qkv_bias = False ,
3012+ # global_pool='token', # upstream uses CLS token; default here is 'avg', pass via kwargs or --gp
29513013 init_values = 1.0e-5 , # layer-scale
29523014 rope_type = 'dinov3' ,
29533015 rope_temperature = 100 ,
@@ -2978,6 +3040,7 @@ def vit_huge_plus_patch16_dinov3_qkvb(pretrained: bool = False, **kwargs) -> Eva
29783040 depth = 32 ,
29793041 num_heads = 20 ,
29803042 qkv_bias = True ,
3043+ # global_pool='token', # upstream uses CLS token; default here is 'avg', pass via kwargs or --gp
29813044 init_values = 1.0e-5 , # layer-scale
29823045 rope_type = 'dinov3' ,
29833046 rope_temperature = 100 ,
@@ -3007,6 +3070,7 @@ def vit_7b_patch16_dinov3(pretrained: bool = False, **kwargs) -> Eva:
30073070 depth = 40 ,
30083071 num_heads = 32 ,
30093072 qkv_bias = False ,
3073+ # global_pool='token', # upstream uses CLS token; default here is 'avg', pass via kwargs or --gp
30103074 mlp_ratio = 2 ,
30113075 init_values = 1.0e-5 , # layer-scale
30123076 rope_type = 'dinov3' ,
0 commit comments