Skip to content

Commit 729815b

Browse files
authored
Rename preset IDs for consistency (#612)
And start to add rules we would like to follow to our style guide.
1 parent f7d816f commit 729815b

26 files changed

+140
-111
lines changed

Diff for: STYLE_GUIDE.md

+23
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,29 @@ When a specific abbreviation is very common and is pronounceable (acronym),
1818
consider it as a standalone word, e.g. Bert, Deberta, etc. In this case, "Bert"
1919
is considered as a common noun and not an abbreviation anymore.
2020

21+
## Naming of Models and Presets
22+
23+
Naming of models and presets is a difficult and important element of our
24+
library usability. In general we try to to follow the branding of "upstream"
25+
model naming, subject to the consistency constraints laid out here.
26+
27+
- The model and preset names should be recognizable to users familiar with the
28+
original release. E.g. the model that goes with the "DeBERTaV3" paper should
29+
be called `DebertaV3`. A release of a [toxic-bert](https://huggingface.co/unitary/toxic-bert)
30+
checkpoint for `keras_nlp.models.Bert`, should include the string
31+
`"toxic_bert"`.
32+
- All preset names should include the language of the pretraining data. If three
33+
or more language are supported, the preset name should include `"multi"` (not
34+
the single letter "m").
35+
- If a preset lowercases input for cased-based languages, the preset name should
36+
be marked with `"uncased"`.
37+
- Don't abbreviate size names. E.g. "xsmall" or "XL" in an original checkpoint
38+
releases should map to `"extra_small"` or `"extra_large"` in a preset names.
39+
- No configuration in names. E.g. use "bert_base" instead of
40+
"bert_L-12_H-768_A-12".
41+
42+
When in doubt, readability should win out!
43+
2144
## File names
2245

2346
When possible, keep publicly documented classes in their own files, and make

Diff for: keras_nlp/models/bert/bert_presets.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@
108108
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_en_uncased/v1/vocab.txt",
109109
"vocabulary_hash": "64800d5d8528ce344256daf115d4965e",
110110
},
111-
"bert_base_en_cased": {
111+
"bert_base_en": {
112112
"config": {
113113
"vocabulary_size": 28996,
114114
"num_layers": 12,
@@ -126,9 +126,9 @@
126126
"Base size of BERT where case is maintained. "
127127
"Trained on English Wikipedia + BooksCorpus."
128128
),
129-
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_en_cased/v1/model.h5",
129+
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_en/v1/model.h5",
130130
"weights_hash": "f94a6cb012e18f4fb8ec92abb91864e9",
131-
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_en_cased/v1/vocab.txt",
131+
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_en/v1/vocab.txt",
132132
"vocabulary_hash": "bb6ca9b42e790e5cd986bbb16444d0e0",
133133
},
134134
"bert_base_zh": {
@@ -151,7 +151,7 @@
151151
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_zh/v1/vocab.txt",
152152
"vocabulary_hash": "3b5b76c4aef48ecf8cb3abaafe960f09",
153153
},
154-
"bert_base_multi_cased": {
154+
"bert_base_multi": {
155155
"config": {
156156
"vocabulary_size": 119547,
157157
"num_layers": 12,
@@ -169,9 +169,9 @@
169169
"Base size of BERT. Trained on trained on Wikipedias of 104 "
170170
"languages."
171171
),
172-
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_multi_cased/v1/model.h5",
172+
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_multi/v1/model.h5",
173173
"weights_hash": "b0631cec0a1f2513c6cfd75ba29c33aa",
174-
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_multi_cased/v1/vocab.txt",
174+
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_base_multi/v1/vocab.txt",
175175
"vocabulary_hash": "d9d865138d17f1958502ed060ecfeeb6",
176176
},
177177
"bert_large_en_uncased": {
@@ -197,7 +197,7 @@
197197
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_large_en_uncased/v1/vocab.txt",
198198
"vocabulary_hash": "64800d5d8528ce344256daf115d4965e",
199199
},
200-
"bert_large_en_cased": {
200+
"bert_large_en": {
201201
"config": {
202202
"vocabulary_size": 28996,
203203
"num_layers": 24,
@@ -215,9 +215,9 @@
215215
"Base size of BERT where case is maintained. "
216216
"Trained on English Wikipedia + BooksCorpus."
217217
),
218-
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_large_en_cased/v1/model.h5",
218+
"weights_url": "https://storage.googleapis.com/keras-nlp/models/bert_large_en/v1/model.h5",
219219
"weights_hash": "8b8ab82290bbf4f8db87d4f100648890",
220-
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_large_en_cased/v1/vocab.txt",
220+
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bert_large_en/v1/vocab.txt",
221221
"vocabulary_hash": "bb6ca9b42e790e5cd986bbb16444d0e0",
222222
},
223223
}

Diff for: keras_nlp/models/deberta_v3/deberta_v3_backbone.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -227,12 +227,14 @@ def from_preset(
227227
}
228228
229229
# Load architecture and weights from preset
230-
model = keras_nlp.models.DebertaV3Backbone.from_preset("deberta_base")
230+
model = keras_nlp.models.DebertaV3Backbone.from_preset(
231+
"deberta_base_en",
232+
)
231233
output = model(input_data)
232234
233235
# Load randomly initialized model from preset architecture
234236
model = keras_nlp.models.DebertaV3Backbone.from_preset(
235-
"deberta_base", load_weights=False
237+
"deberta_base_en", load_weights=False
236238
)
237239
output = model(input_data)
238240
```

Diff for: keras_nlp/models/deberta_v3/deberta_v3_classifier.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def from_preset(
203203
204204
# Create a DebertaV3Classifier and fit your data.
205205
classifier = keras_nlp.models.DebertaV3Classifier.from_preset(
206-
"deberta_base",
206+
"deberta_v3_base_en",
207207
num_classes=4,
208208
)
209209
classifier.compile(
@@ -220,13 +220,13 @@ def from_preset(
220220
221221
# Use a shorter sequence length.
222222
preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset(
223-
"deberta_base",
223+
"deberta_v3_base_en",
224224
sequence_length=128,
225225
)
226226
227227
# Create a DebertaV3Classifier and fit your data.
228228
classifier = keras_nlp.models.DebertaV3Classifier.from_preset(
229-
"deberta_base",
229+
"deberta_v3_base_en",
230230
num_classes=4,
231231
preprocessor=preprocessor,
232232
)
@@ -249,7 +249,7 @@ def from_preset(
249249
250250
# Create a DebertaV3Classifier and fit your data.
251251
classifier = keras_nlp.models.DebertaV3Classifier.from_preset(
252-
"deberta_base",
252+
"deberta_v3_base_en",
253253
num_classes=4,
254254
preprocessor=None,
255255
)

Diff for: keras_nlp/models/deberta_v3/deberta_v3_preprocessor.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -226,13 +226,13 @@ def from_preset(
226226
```python
227227
# Load preprocessor from preset
228228
preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset(
229-
"deberta_base",
229+
"deberta_v3_base_en",
230230
)
231231
preprocessor("The quick brown fox jumped.")
232232
233233
# Override sequence_length
234234
preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset(
235-
"deberta_base",
235+
"deberta_v3_base_en",
236236
sequence_length=64
237237
)
238238
preprocessor("The quick brown fox jumped.")

Diff for: keras_nlp/models/deberta_v3/deberta_v3_presets.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"""DeBERTa model preset configurations."""
1515

1616
backbone_presets = {
17-
"deberta_v3_extra_small": {
17+
"deberta_v3_extra_small_en": {
1818
"config": {
1919
"vocabulary_size": 128100,
2020
"num_layers": 12,
@@ -30,12 +30,12 @@
3030
"Extra small size of DeBERTaV3. "
3131
"Trained on English Wikipedia, BookCorpus and OpenWebText."
3232
),
33-
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_extra_small/v1/model.h5",
33+
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_extra_small_en/v1/model.h5",
3434
"weights_hash": "d8e10327107e5c5e20b45548a5028619",
35-
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_extra_small/v1/vocab.spm",
35+
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_extra_small_en/v1/vocab.spm",
3636
"spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
3737
},
38-
"deberta_v3_small": {
38+
"deberta_v3_small_en": {
3939
"config": {
4040
"vocabulary_size": 128100,
4141
"num_layers": 6,
@@ -51,12 +51,12 @@
5151
"Small size of DeBERTaV3. "
5252
"Trained on English Wikipedia, BookCorpus and OpenWebText."
5353
),
54-
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_small/v1/model.h5",
54+
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_small_en/v1/model.h5",
5555
"weights_hash": "84118eb7c5a735f2061ecccaf71bb888",
56-
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_small/v1/vocab.spm",
56+
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_small_en/v1/vocab.spm",
5757
"spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
5858
},
59-
"deberta_v3_base": {
59+
"deberta_v3_base_en": {
6060
"config": {
6161
"vocabulary_size": 128100,
6262
"num_layers": 12,
@@ -72,12 +72,12 @@
7272
"Base size of DeBERTaV3. "
7373
"Trained on English Wikipedia, BookCorpus and OpenWebText."
7474
),
75-
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_base/v1/model.h5",
75+
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_base_en/v1/model.h5",
7676
"weights_hash": "cebce044aeed36aec9b94e3b8a255430",
77-
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_base/v1/vocab.spm",
77+
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_base_en/v1/vocab.spm",
7878
"spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
7979
},
80-
"deberta_v3_large": {
80+
"deberta_v3_large_en": {
8181
"config": {
8282
"vocabulary_size": 128100,
8383
"num_layers": 24,
@@ -93,9 +93,9 @@
9393
"Base size of DeBERTaV3. "
9494
"Trained on English Wikipedia, BookCorpus and OpenWebText."
9595
),
96-
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_large/v1/model.h5",
96+
"weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_large_en/v1/model.h5",
9797
"weights_hash": "bce7690f358a9e39304f8c0ebc71a745",
98-
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_large/v1/vocab.spm",
98+
"spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_large_en/v1/vocab.spm",
9999
"spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
100100
},
101101
"deberta_v3_base_multi": {

Diff for: keras_nlp/models/deberta_v3/deberta_v3_presets_test.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,15 @@ class DebertaV3PresetSmokeTest(tf.test.TestCase, parameterized.TestCase):
3838

3939
def test_tokenizer_output(self):
4040
tokenizer = DebertaV3Tokenizer.from_preset(
41-
"deberta_v3_extra_small",
41+
"deberta_v3_extra_small_en",
4242
)
4343
outputs = tokenizer("The quick brown fox.")
4444
expected_outputs = [279, 1538, 3258, 16123, 260]
4545
self.assertAllEqual(outputs, expected_outputs)
4646

4747
def test_preprocessor_output(self):
4848
preprocessor = DebertaV3Preprocessor.from_preset(
49-
"deberta_v3_extra_small",
49+
"deberta_v3_extra_small_en",
5050
sequence_length=4,
5151
)
5252
outputs = preprocessor("The quick brown fox.")["token_ids"]
@@ -62,7 +62,7 @@ def test_backbone_output(self, load_weights):
6262
"padding_mask": tf.constant([[1, 1, 1, 1]]),
6363
}
6464
model = DebertaV3Backbone.from_preset(
65-
"deberta_v3_extra_small", load_weights=load_weights
65+
"deberta_v3_extra_small_en", load_weights=load_weights
6666
)
6767
outputs = model(input_data)
6868
if load_weights:
@@ -76,7 +76,7 @@ def test_backbone_output(self, load_weights):
7676
def test_classifier_output(self, load_weights):
7777
input_data = tf.constant(["The quick brown fox."])
7878
model = DebertaV3Classifier.from_preset(
79-
"deberta_v3_extra_small", load_weights=load_weights
79+
"deberta_v3_extra_small_en", load_weights=load_weights
8080
)
8181
# Never assert output values, as the head weights are random.
8282
model.predict(input_data)
@@ -90,7 +90,7 @@ def test_classifier_output_without_preprocessing(self, load_weights):
9090
"padding_mask": tf.constant([[1, 1, 1, 1]]),
9191
}
9292
model = DebertaV3Classifier.from_preset(
93-
"deberta_v3_extra_small",
93+
"deberta_v3_extra_small_en",
9494
load_weights=load_weights,
9595
preprocessor=None,
9696
)
@@ -117,7 +117,7 @@ def test_preset_docstring(self, cls):
117117
def test_unknown_preset_error(self, cls):
118118
# Not a preset name
119119
with self.assertRaises(ValueError):
120-
cls.from_preset("deberta_v3_extra_small_clowntown")
120+
cls.from_preset("deberta_v3_extra_small_en_clowntown")
121121

122122

123123
@pytest.mark.extra_large

Diff for: keras_nlp/models/deberta_v3/deberta_v3_tokenizer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def from_preset(
106106
```python
107107
# Load a preset tokenizer.
108108
tokenizer = keras_nlp.models.DebertaV3Tokenizer.from_preset(
109-
"deberta_base",
109+
"deberta_v3_base_en",
110110
)
111111
112112
# Tokenize some input.

Diff for: keras_nlp/models/distil_bert/distil_bert_classifier.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -219,12 +219,12 @@ def from_preset(
219219
220220
# Use a shorter sequence length.
221221
preprocessor = keras_nlp.models.DistilBertBackbone.from_preset(
222-
"bert_base_en_uncased",
222+
"distil_bert_base_en_uncased",
223223
sequence_length=128,
224224
)
225225
# Create a DistilBertClassifier and fit your data.
226226
classifier = keras_nlp.models.DistilBertClassifier.from_preset(
227-
"bert_base_en_uncased",
227+
"distil_bert_base_en_uncased",
228228
num_classes=4,
229229
preprocessor=preprocessor,
230230
)
@@ -250,7 +250,7 @@ def from_preset(
250250
251251
# Create a DistilBERT classifier and fit your data.
252252
classifier = keras_nlp.models.DistilBertClassifier.from_preset(
253-
"bert_base_en_uncased",
253+
"distil_bert_base_en_uncased",
254254
num_classes=4,
255255
preprocessor=None,
256256
)

Diff for: keras_nlp/models/distil_bert/distil_bert_presets.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_en_uncased/v1/vocab.txt",
3838
"vocabulary_hash": "64800d5d8528ce344256daf115d4965e",
3939
},
40-
"distil_bert_base_en_cased": {
40+
"distil_bert_base_en": {
4141
"config": {
4242
"vocabulary_size": 28996,
4343
"num_layers": 6,
@@ -55,12 +55,12 @@
5555
"Trained on English Wikipedia + BooksCorpus using BERT as the "
5656
"teacher model."
5757
),
58-
"weights_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_en_cased/v1/model.h5",
58+
"weights_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_en/v1/model.h5",
5959
"weights_hash": "fa36aa6865978efbf85a5c8264e5eb57",
60-
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_en_cased/v1/vocab.txt",
60+
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_en/v1/vocab.txt",
6161
"vocabulary_hash": "bb6ca9b42e790e5cd986bbb16444d0e0",
6262
},
63-
"distil_bert_base_multi_cased": {
63+
"distil_bert_base_multi": {
6464
"config": {
6565
"vocabulary_size": 119547,
6666
"num_layers": 6,
@@ -77,9 +77,9 @@
7777
"Base size of DistilBERT. Trained on Wikipedias of 104 languages "
7878
"using BERT the teacher model."
7979
),
80-
"weights_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_multi_cased/v1/model.h5",
80+
"weights_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_multi/v1/model.h5",
8181
"weights_hash": "c0f11095e2a6455bd3b1a6d14800a7fa",
82-
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_multi_cased/v1/vocab.txt",
82+
"vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/distil_bert_base_multi/v1/vocab.txt",
8383
"vocabulary_hash": "d9d865138d17f1958502ed060ecfeeb6",
8484
},
8585
}

Diff for: keras_nlp/models/gpt2/gpt2_backbone.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ def from_preset(
215215
}
216216
217217
# Load architecture and weights from preset
218-
model = GPT2Backbone.from_preset("gpt2_base")
218+
model = GPT2Backbone.from_preset("gpt2_base_en")
219219
output = model(input_data)
220220
221221
# Load randomly initialized model from preset architecture

0 commit comments

Comments
 (0)