Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion keras_hub/src/models/clip/clip_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class CLIPBackbone(Backbone):
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.

Example:
```python
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/clip/clip_text_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class CLIPTextEncoder(Backbone):
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.
"""

def __init__(
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/clip/clip_vision_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class CLIPVisionEncoder(Backbone):
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.
"""

def __init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class DepthAnythingBackbone(Backbone):
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.

Example:
```python
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/dinov2/dinov2_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class DINOV2Backbone(FeaturePyramidBackbone):
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.

Example:
```python
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/dinov3/dinov3_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class DINOV3Backbone(FeaturePyramidBackbone):
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.

Example:
```python
Expand Down
11 changes: 10 additions & 1 deletion keras_hub/src/models/flux/flux_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,16 @@ class FluxBackbone(Backbone):
use_bias: bool. Whether to apply bias to the query, key, and value
projections.
guidance_embed: bool. If True, applies guidance embedding in the model.

image_shape: tuple[int]. Shape of the image input tensor. Defaults to
[(None, 768, 3072)].
text_shape: tuple[int]. Shape of the text input tensor. Defaults to
[(None, 768, 3072)].
image_ids_shape: tuple[int]. Shape of the image IDs input tensor.
Defaults to [(None, 768, 3072)].
text_ids_shape: tuple[int]. Shape of the text IDs input tensor.
Defaults to [(None, 768, 3072)].
y_shape: tuple[int]. Shape of the additional vector input tensor.
Defaults to `(None, 128)`.
Call arguments:
image: KerasTensor. Image input tensor of shape (N, L, D) where N is the
batch size, L is the sequence length, and D is the feature
Expand Down
4 changes: 2 additions & 2 deletions keras_hub/src/models/gemma/gemma_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class GemmaBackbone(Backbone):
intermediate_dim: int. The output dimension of the first Dense layer in
a two-layer feedforward network for each transformer.
head_dim: int. The size of each attention head.
layer_norm_epsilon: float. The epsilon value user for every layer norm
layer_norm_epsilon: float. The epsilon value used for every layer norm
in the transformer model.
dropout: float. Dropout probability for the Transformer encoder.
query_head_dim_normalize: boolean. If `True` normalize the query before
Expand All @@ -55,7 +55,7 @@ class GemmaBackbone(Backbone):
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.

Example:
```python
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/gemma3/gemma3_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class Gemma3Backbone(Backbone):
vision_encoder: A `Gemma3VisionEncoder` instance. `call()`
takes in images and returns corresponding sequence of embeddings. If
`None`, the model is a text-only model.
layer_norm_epsilon: float. The epsilon value user for every layer norm
layer_norm_epsilon: float. The epsilon value used for every layer norm
in all transformer blocks. Defaults to `1e-6`.
dropout: float. Dropout probability for the Transformer decoder blocks.
Defaults to `0`.
Expand Down
4 changes: 2 additions & 2 deletions keras_hub/src/models/gemma3/gemma3_vision_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ class Gemma3VisionEncoder(keras.Model):
pool_size: int. Factors by which to downscale `(dim1, dim2)` in the
average pooling layer. The same value is used for `"strides"`.
Defaults to 14.
layer_norm_epsilon: float. The epsilon value user for every layer norm
layer_norm_epsilon: float. The epsilon value used for every layer norm
in all transformer blocks. Defaults to `1e-6`.
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.

Example:
```python
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/gpt2/gpt2_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class GPT2Backbone(Backbone):
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.

Example:
```python
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/gpt_oss/gpt_oss_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class GptOssBackbone(Backbone):
rope_max_wavelength: int. The maximum angular wavelength of
the sine/cosine curves, for rotary embeddings. Defaults to `10000`.
rope_scaling_factor: float. The scaling factor for
calculation of roatary embedding. Defaults to `1.0`.
calculation of rotary embedding. Defaults to `1.0`.
layer_norm_epsilon: float. Epsilon for the layer
normalization layers in the transformer decoder. Defaults to `1e-6`.
sliding_window: int. The sliding window for the attention
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/hgnetv2/hgnetv2_image_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def __init__(
# Check valid pooling.
else:
raise ValueError(
"Unknown `pooling` type. Polling should be either `'avg'` or "
"Unknown `pooling` type. Pooling should be either `'avg'` or "
f"`'max'`. Received: pooling={pooling}."
)

Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/image_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def __init__(
)
else:
raise ValueError(
"Unknown `pooling` type. Polling should be either `'avg'` or "
"Unknown `pooling` type. Pooling should be either `'avg'` or "
f"`'max'`. Received: pooling={pooling}."
)
self.output_dropout = keras.layers.Dropout(
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/llama3/llama3_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class Llama3Backbone(LlamaBackbone):
rope_max_wavelength (int, optional): The maximum angular wavelength of
the sine/cosine curves, for rotary embeddings. Defaults to `10000`.
rope_position_scaling_factor (float, optional): The scaling factor for
calculation of roatary embedding. Defaults to `1.0`
calculation of rotary embedding. Defaults to `1.0`
rope_requency_adjustment_factor (float, optional): The scaling factor
used to scale the inverse frequencies.
rope_low_freq_factor (float, optional): The low frequency factor.
Expand Down
4 changes: 3 additions & 1 deletion keras_hub/src/models/mistral/mistral_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,16 @@ class MistralBackbone(Backbone):
rope_max_wavelength (int, optional): The maximum angular wavelength of
the sine/cosine curves, for rotary embeddings. Defaults to `10000`.
rope_scaling_factor (float, optional): The scaling factor for
calculation of roatary embedding. Defaults to `1.0`.
calculation of rotary embedding. Defaults to `1.0`.
layer_norm_epsilon (float, optional): Epsilon for the layer
normalization layers in the transformer decoder. Defaults to `1e-6`.
sliding_window (int, optional): The sliding window for the mistral
attention layers. This controls the maximum cache size for the
attention layers in each transformer decoder. Only `sliding_window`
number of tokens are saved in the cache and used to generate the
next token. Defaults to `512`.
dropout (float, optional): Dropout probability for the Transformer
decoder blocks. Defaults to `0`.
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for model computations and weights. Note that some computations,
such as softmax and layer normalization, will always be done at
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def _compute_self_attention_mask(
else self_attention_cache_update_index
)

# The lower traingular attention mask
# The lower triangular attention mask
causal_mask = compute_causal_mask(
batch_size, input_length, output_length, cache_update_index
)
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/mixtral/mixtral_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class MixtralBackbone(Backbone):
rope_max_wavelength (int, optional): The maximum angular wavelength of
the sine/cosine curves, for rotary embeddings. Defaults to `10000`.
rope_scaling_factor (float, optional): The scaling factor for
calculation of roatary embedding. Defaults to `1.0`.
calculation of rotary embedding. Defaults to `1.0`.
layer_norm_epsilon (float, optional): Epsilon for the layer
normalization layers in the transformer decoder. Defaults to `1e-6`.
sliding_window (int, optional): The sliding window for the mixtral
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/mixtral/mixtral_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ def _compute_self_attention_mask(
else self_attention_cache_update_index
)

# The lower traingular attention mask
# The lower triangular attention mask
causal_mask = compute_causal_mask(
batch_size, input_length, output_length, cache_update_index
)
Expand Down
8 changes: 4 additions & 4 deletions keras_hub/src/models/pali_gemma/pali_gemma_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class PaliGemmaBackbone(Backbone):
in a two-layer feedforward network for vision transformer. Defaults
to `4304`.
vit_pooling: `None` or string. The encoded vision embeddings are pooled
using the specified polling setting. The accepted values are
using the specified pooling setting. The accepted values are
`"map"`, `"gap"`, `"0"` or `None`. Defaults to `None`.
vit_classifier_activation: activation function. The activation that
is used for final output classification in the vision transformer.
Expand All @@ -76,14 +76,14 @@ class PaliGemmaBackbone(Backbone):
window attention. Defaults to `False`.
sliding_window_size: int. Size of the sliding local window. Defaults to
`4096`.
layer_norm_epsilon: float. The epsilon value user for every layer norm
layer_norm_epsilon: float. The epsilon value used for every layer norm
in all transformer blocks. Defaults to `1e-6`.
dropout: float. Dropout probability for the Transformer decoder blocks.
Defaults to `0`.
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.

Example:
```python
Expand All @@ -100,7 +100,7 @@ class PaliGemmaBackbone(Backbone):
# Randomly initialized PaliGemma decoder with custom config.
model = keras_hub.models.PaliGemmaBackbone(
vocabulary_size=50257,
images_size=224,
image_size=224,
num_layers=12,
num_query_heads=12,
num_key_value_heads=1,
Expand Down
8 changes: 4 additions & 4 deletions keras_hub/src/models/pali_gemma/pali_gemma_vit.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,14 +426,14 @@ class PaliGemmaVit(keras.Model):
as a image classifier, this value would correspond to the number of
output classes.
pooling: string. The encoded vision embeddings are pooled using the
specified polling setting. The accepted values are `"map"`, `"gap"`,
specified pooling setting. The accepted values are `"map"`, `"gap"`,
`"zero"` or `None`. Defaults to `None`.
classifier_activation: activation fucntion. The activation that is used
for final output classification
classifier_activation: activation function. The activation that is used
for final output classification.
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.

Example:
```python
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/siglip/siglip_backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class SigLIPBackbone(Backbone):
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.

Example:
```python
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/siglip/siglip_text_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class SigLIPTextEncoder(Backbone):
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.
"""

def __init__(
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/siglip/siglip_vision_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class SigLIPVisionEncoder(Backbone):
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.
"""

def __init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ class StableDiffusion3Backbone(Backbone):
dtype: string or `keras.mixed_precision.DTypePolicy`. The dtype to use
for the models computations and weights. Note that some
computations, such as softmax and layer normalization will always
be done a float32 precision regardless of dtype.
be done in float32 precision regardless of dtype.

Example:
```python
Expand Down
2 changes: 1 addition & 1 deletion keras_hub/src/models/vgg/vgg_image_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def __init__(
)
else:
raise ValueError(
"Unknown `pooling` type. Polling should be either `'avg'` or "
"Unknown `pooling` type. Pooling should be either `'avg'` or "
f"`'max'`. Received: pooling={pooling}."
)

Expand Down
4 changes: 3 additions & 1 deletion keras_hub/src/samplers/random_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ def __init__(

def get_next_token(self, probabilities):
# Sample the next token from the probability distribution.
# tf does not support half precision multinomial sampling, so make
# sure we have full precision here.
next_token_id = random.categorical(
ops.log(probabilities),
ops.cast(ops.log(probabilities), "float32"),
1,
seed=self.seed_generator,
dtype="int32",
Expand Down
4 changes: 3 additions & 1 deletion keras_hub/src/samplers/top_p_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ def get_next_token(self, probabilities):
ops.zeros(ops.shape(sorted_preds), dtype=sorted_preds.dtype),
)
sorted_next_token = random.categorical(
ops.log(probabilities),
# tf does not support half precision multinomial sampling, so make
# sure we have full precision here.
ops.cast(ops.log(probabilities), "float32"),
1,
seed=self.seed_generator,
dtype="int32",
Expand Down
Loading