From 69b8d7b586135083ee1ba0a6ec79de417fd6c6cc Mon Sep 17 00:00:00 2001 From: Sachin Prasad Date: Sat, 7 Mar 2026 13:48:25 -0800 Subject: [PATCH 1/2] add Meta CLIP2 presets --- .../models/metaclip_2/metaclip_2_presets.py | 57 ++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/keras_hub/src/models/metaclip_2/metaclip_2_presets.py b/keras_hub/src/models/metaclip_2/metaclip_2_presets.py index 36ddcc275b..e7a1db3084 100644 --- a/keras_hub/src/models/metaclip_2/metaclip_2_presets.py +++ b/keras_hub/src/models/metaclip_2/metaclip_2_presets.py @@ -1 +1,56 @@ -backbone_presets = {} +"""MetaCLIP 2 model preset configurations.""" + +backbone_presets = { + "metaclip_2_vit_huge_patch14_224": { + "metadata": { + "description": ( + "986 million parameter, 32-layer for vision and 24-layer for " + "text, patch size of 14, image resolution 224x224. MetaCLIP 2 " + "worldwide huge model (ViT-H-14-quickgelu-worldwide) trained on " # noqa + "29B seen pairs with QuickGELU activation." + ), + "params": 1858784002, + "path": "metaclip_2", + }, + "kaggle_handle": "kaggle://keras/metaclip2/keras/metaclip_2_vit_huge_patch14_224/1", + }, + "metaclip_2_vit_huge_patch14_378": { + "metadata": { + "description": ( + "986 million parameter, 32-layer for vision and 24-layer for " + "text, patch size of 14, image resolution 378x378. MetaCLIP 2 " + "worldwide huge model (ViT-H-14-378-worldwide) trained on " + "29B seen pairs." + ), + "params": 1859389185, + "path": "metaclip_2", + }, + "kaggle_handle": "kaggle://keras/metaclip2/keras/metaclip_2_vit_huge_patch14_378/1", + }, + "metaclip_2_vit_giant_patch14_224": { + "metadata": { + "description": ( + "1.4 billion parameter, 40-layer for vision and 24-layer for " + "text, patch size of 14, image resolution 224x224. MetaCLIP 2 " + "worldwide giant model (ViT-bigG-14-worldwide) trained on " + "29B seen pairs." + ), + "params": 3630409985, + "path": "metaclip_2", + }, + "kaggle_handle": "kaggle://keras/metaclip2/keras/metaclip_2_vit_giant_patch14_224/1", + }, + "metaclip_2_vit_giant_patch14_378": { + "metadata": { + "description": ( + "1.4 billion parameter, 40-layer for vision and 24-layer for " + "text, patch size of 14, image resolution 378x378. MetaCLIP 2 " + "worldwide giant model (ViT-bigG-14-378-worldwide) trained on " + "29B seen pairs." + ), + "params": 3631197057, + "path": "metaclip_2", + }, + "kaggle_handle": "kaggle://keras/metaclip2/keras/metaclip_2_vit_giant_patch14_378/1", + }, +} From 53692d16509e5eebc545d8b70b21cf0f3221d23a Mon Sep 17 00:00:00 2001 From: Sachin Prasad Date: Thu, 12 Mar 2026 14:26:09 -0700 Subject: [PATCH 2/2] fix description --- keras_hub/src/models/metaclip_2/metaclip_2_presets.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/keras_hub/src/models/metaclip_2/metaclip_2_presets.py b/keras_hub/src/models/metaclip_2/metaclip_2_presets.py index e7a1db3084..ac7d1f84d0 100644 --- a/keras_hub/src/models/metaclip_2/metaclip_2_presets.py +++ b/keras_hub/src/models/metaclip_2/metaclip_2_presets.py @@ -4,12 +4,12 @@ "metaclip_2_vit_huge_patch14_224": { "metadata": { "description": ( - "986 million parameter, 32-layer for vision and 24-layer for " + "2 billion parameter, 32-layer for vision and 24-layer for " "text, patch size of 14, image resolution 224x224. MetaCLIP 2 " "worldwide huge model (ViT-H-14-quickgelu-worldwide) trained on " # noqa "29B seen pairs with QuickGELU activation." ), - "params": 1858784002, + "params": 1858783745, "path": "metaclip_2", }, "kaggle_handle": "kaggle://keras/metaclip2/keras/metaclip_2_vit_huge_patch14_224/1", @@ -17,7 +17,7 @@ "metaclip_2_vit_huge_patch14_378": { "metadata": { "description": ( - "986 million parameter, 32-layer for vision and 24-layer for " + "2 billion parameter, 32-layer for vision and 24-layer for " "text, patch size of 14, image resolution 378x378. MetaCLIP 2 " "worldwide huge model (ViT-H-14-378-worldwide) trained on " "29B seen pairs." @@ -30,7 +30,7 @@ "metaclip_2_vit_giant_patch14_224": { "metadata": { "description": ( - "1.4 billion parameter, 40-layer for vision and 24-layer for " + "4 billion parameter, 40-layer for vision and 24-layer for " "text, patch size of 14, image resolution 224x224. MetaCLIP 2 " "worldwide giant model (ViT-bigG-14-worldwide) trained on " "29B seen pairs." @@ -43,7 +43,7 @@ "metaclip_2_vit_giant_patch14_378": { "metadata": { "description": ( - "1.4 billion parameter, 40-layer for vision and 24-layer for " + "4 billion parameter, 40-layer for vision and 24-layer for " "text, patch size of 14, image resolution 378x378. MetaCLIP 2 " "worldwide giant model (ViT-bigG-14-378-worldwide) trained on " "29B seen pairs."