Skip to content

Commit 9425064

Browse files
fixes
1 parent 8ef00ff commit 9425064

File tree

3 files changed

+19
-19
lines changed

3 files changed

+19
-19
lines changed

guides/int8_quantization_in_keras.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,12 @@
174174
# Save INT8 Gemma3 model
175175
gemma3.save_to_preset("gemma3_int8")
176176

177+
# Reload and compare outputs
178+
gemma3_int8 = Gemma3CausalLM.from_preset("gemma3_int8")
179+
180+
output = gemma3_int8.generate("Keras is a", max_length=30)
181+
print("Quantized reloaded output:", output)
182+
177183

178184
# Compute storage savings
179185
def bytes_to_mib(n):
@@ -188,12 +194,6 @@ def bytes_to_mib(n):
188194
print(f"Gemma3: INT8 file size: {bytes_to_mib(gemma_int8_size):.2f} MiB")
189195
print(f"Gemma3: Size reduction: {gemma_reduction:.1f}%")
190196

191-
# Reload and compare outputs
192-
gemma3_int8 = Gemma3CausalLM.from_preset("gemma3_int8")
193-
194-
output = gemma3_int8.generate("Keras is a", max_length=30)
195-
print("Quantized reloaded output:", output)
196-
197197
"""
198198
## Practical tips
199199

guides/ipynb/int8_quantization_in_keras.ipynb

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,12 @@
226226
"# Save INT8 Gemma3 model\n",
227227
"gemma3.save_to_preset(\"gemma3_int8\")\n",
228228
"\n",
229+
"# Reload and compare outputs\n",
230+
"gemma3_int8 = Gemma3CausalLM.from_preset(\"gemma3_int8\")\n",
231+
"\n",
232+
"output = gemma3_int8.generate(\"Keras is a\", max_length=30)\n",
233+
"print(\"Quantized reloaded output:\", output)\n",
234+
"\n",
229235
"\n",
230236
"# Compute storage savings\n",
231237
"def bytes_to_mib(n):\n",
@@ -238,13 +244,7 @@
238244
"gemma_reduction = 100.0 * (1.0 - (gemma_int8_size / max(gemma_fp32_size, 1)))\n",
239245
"print(f\"Gemma3: FP32 file size: {bytes_to_mib(gemma_fp32_size):.2f} MiB\")\n",
240246
"print(f\"Gemma3: INT8 file size: {bytes_to_mib(gemma_int8_size):.2f} MiB\")\n",
241-
"print(f\"Gemma3: Size reduction: {gemma_reduction:.1f}%\")\n",
242-
"\n",
243-
"# Reload and compare outputs\n",
244-
"gemma3_int8 = Gemma3CausalLM.from_preset(\"gemma3_int8\")\n",
245-
"\n",
246-
"output = gemma3_int8.generate(\"Keras is a\", max_length=30)\n",
247-
"print(\"Quantized reloaded output:\", output)"
247+
"print(f\"Gemma3: Size reduction: {gemma_reduction:.1f}%\")"
248248
]
249249
},
250250
{

guides/md/int8_quantization_in_keras.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,12 @@ print("Quantized output:", output)
194194
# Save INT8 Gemma3 model
195195
gemma3.save_to_preset("gemma3_int8")
196196

197+
# Reload and compare outputs
198+
gemma3_int8 = Gemma3CausalLM.from_preset("gemma3_int8")
199+
200+
output = gemma3_int8.generate("Keras is a", max_length=30)
201+
print("Quantized reloaded output:", output)
202+
197203

198204
# Compute storage savings
199205
def bytes_to_mib(n):
@@ -207,12 +213,6 @@ gemma_reduction = 100.0 * (1.0 - (gemma_int8_size / max(gemma_fp32_size, 1)))
207213
print(f"Gemma3: FP32 file size: {bytes_to_mib(gemma_fp32_size):.2f} MiB")
208214
print(f"Gemma3: INT8 file size: {bytes_to_mib(gemma_int8_size):.2f} MiB")
209215
print(f"Gemma3: Size reduction: {gemma_reduction:.1f}%")
210-
211-
# Reload and compare outputs
212-
gemma3_int8 = Gemma3CausalLM.from_preset("gemma3_int8")
213-
214-
output = gemma3_int8.generate("Keras is a", max_length=30)
215-
print("Quantized reloaded output:", output)
216216
```
217217

218218
<div class="k-default-codeblock">

0 commit comments

Comments
 (0)