LongxingTan
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docker/Dockerfile‎
Lines changed: 3 additions & 1 deletion b/‎docker/Dockerfile‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/source/quick-start.rst‎
Lines changed: 4 additions & 1 deletion b/‎docs/source/quick-start.rst‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎tests/test_examples/test_tfts_inputs.py‎
Lines changed: 4 additions & 1 deletion b/‎tests/test_examples/test_tfts_inputs.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎tests/test_models/test_wavenet.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/test_models/test_wavenet.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tfts/layers/attention_layer.py‎
Lines changed: 25 additions & 8 deletions b/‎tfts/layers/attention_layer.py‎
Lines changed: 25 additions & 8 deletions
diff --git a/‎tfts/layers/cnn_layer.py‎
Lines changed: 3 additions & 1 deletion b/‎tfts/layers/cnn_layer.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎tfts/layers/dense_layer.py‎
Lines changed: 5 additions & 2 deletions b/‎tfts/layers/dense_layer.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎tfts/layers/embed_layer.py‎
Lines changed: 1 addition & 0 deletions b/‎tfts/layers/embed_layer.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tfts/layers/nbeats_layer.py‎
Lines changed: 52 additions & 24 deletions b/‎tfts/layers/nbeats_layer.py‎
Lines changed: 52 additions & 24 deletions
@@ -58,7 +58,7 @@ jobs:
           poetry run python -m pip install pip -U
           poetry install --no-interaction --no-root
           poetry run python -m pip install tensorflow==${{ matrix.tf-version }}
-          poetry run python -m pip install matplotlib
+          poetry run python -m pip install matplotlib numpy==1.26.0
 
       - name: Run unittest
         shell: bash
 
@@ -1,9 +1,11 @@
 From tensorflow/tensorflow:2.16.1-gpu
 
 RUN apt-get update
-RUN apt-get install -y libgl1-mesa-dev wget vim python3.9
+RUN apt-get install -y libgl1-mesa-dev wget vim
 
 RUN pip install --no-cache-dir tfts
 
+EXPOSE 8888
+
 # Set the default command to python3.
 CMD ["python3"]
@@ -124,8 +124,11 @@ Run with pretrained weights
     model = AutoModel.from_pretrained("tfts-model")
 
 
+3.3 Save and load the model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-3.3 Serve the model
+
+3.4 Serve the model
 ~~~~~~~~~~~~~~~~~~~~~~~
 Once the model is trained and evaluated, deploy it for inference. Ensure the model is saved in a format compatible with your serving environment (e.g., TensorFlow SavedModel, ONNX, etc.). Set up an API or service to handle incoming requests, preprocess input data, and return predictions in real-time.
 
 
@@ -23,7 +23,7 @@ def test_encoder_array(self):
         y_valid = np.random.rand(1, predict_sequence_length, 1)
 
         for m in self.test_models:
-            logger.info(f"Test model {m}")
+            print(f"==== Test model {m} ====")
             config = AutoConfig.for_model(m)
             model = AutoModel.from_config(config, predict_sequence_length=predict_sequence_length)
             trainer = KerasTrainer(model)
@@ -65,6 +65,7 @@ def test_encoder_decoder_array2(self):
         n_decoder_feature = 3
 
         x_train = (
+            # x, encoder, decoder
             np.random.rand(1, train_length, 1),
             np.random.rand(1, train_length, n_encoder_feature),
             np.random.rand(1, predict_sequence_length, n_decoder_feature),
@@ -78,6 +79,7 @@ def test_encoder_decoder_array2(self):
         y_valid = np.random.rand(1, predict_sequence_length, 1)
 
         for m in self.test_models:
+            print(f"==== Test model {m} ====")
             config = AutoConfig.for_model(m)
             model = AutoModel.from_config(config, predict_sequence_length=predict_sequence_length)
             trainer = KerasTrainer(model)
@@ -116,6 +118,7 @@ def test_encoder_decoder_tfdata(self):
         valid_loader = valid_loader.batch(batch_size=1)
 
         for m in self.test_models:
+            print(f"==== Test model {m} ====")
             config = AutoConfig.for_model(m)
             model = AutoModel.from_config(config, predict_sequence_length=predict_sequence_length)
             trainer = KerasTrainer(model)
 
@@ -26,7 +26,7 @@ def test_encoder(self):
     def test_decoder1(self):
         filters = 32
         dilation_rates = [2]
-        dense_hidden_size = 32
+        dense_hidden_size = 64
         predict_sequence_length = 3
         layer = DecoderV1(filters, dilation_rates, dense_hidden_size, predict_sequence_length)
 
 
@@ -52,6 +52,7 @@ def call(
         training: Optional[bool] = None,
         return_attention_scores: bool = False,
         use_causal_mask: bool = False,
+        **kwargs,
     ):
         """use query and key generating an attention multiplier for value, multi_heads to repeat it
 
@@ -110,12 +111,31 @@ def get_config(self):
         return dict(list(base_config.items()) + list(config.items()))
 
     def compute_output_shape(self, input_shape):
-        if isinstance(input_shape, (list, tuple)) and len(input_shape) == 3:
-            q_shape = input_shape[0]
-        else:
-            raise ValueError("Expected input_shape to be a list or tuple of three elements (q, k, v)")
+        if isinstance(input_shape, tuple) and len(input_shape) == 3:
+            batch_size, seq_len, _ = input_shape
+            return (batch_size, seq_len, self.hidden_size)
+
+        elif isinstance(input_shape, (list, tuple)) and len(input_shape) == 3:
+            q_shape, k_shape, v_shape = input_shape
 
-        return (q_shape[0], q_shape[1], self.hidden_size)
+            # Validate that all shapes are tuples with 3 dimensions
+            if not all(isinstance(shape, tuple) and len(shape) == 3 for shape in [q_shape, k_shape, v_shape]):
+                raise ValueError(
+                    "Each input shape must be a tuple of length 3 (batch_size, seq_len, features). "
+                    f"Got shapes: q={q_shape}, k={k_shape}, v={v_shape}"
+                )
+
+            # Output shape is based on query sequence length
+            batch_size, seq_q_len, _ = q_shape
+            return (batch_size, seq_q_len, self.hidden_size)
+
+        else:
+            raise ValueError(
+                "Expected input_shape to be either:\n"
+                "1. A single tuple (batch_size, seq_len, features) for self-attention, or\n"
+                "2. A list/tuple of 3 shapes [(q_shape), (k_shape), (v_shape)] for cross-attention.\n"
+                f"Got: {input_shape}"
+            )
 
 
 class SelfAttention(tf.keras.layers.Layer):
@@ -161,9 +181,6 @@ def get_config(self):
         return base_config
 
     def compute_output_shape(self, input_shape):
-        """
-        Compute the output shape of the self-attention layer.
-        """
         return (input_shape[0], input_shape[1], self.hidden_size)
 
 
 
@@ -69,6 +69,7 @@ def build(self, input_shape: Tuple[int]) -> None:
         input_shape : Tuple[int]
             Shape of the input tensor
         """
+        super(ConvTemp, self).build(input_shape)
         self.conv = tf.keras.layers.Conv1D(
             kernel_size=self.kernel_size,
             kernel_initializer=initializers.get(self.kernel_initializer),
@@ -77,7 +78,8 @@ def build(self, input_shape: Tuple[int]) -> None:
             dilation_rate=self.dilation_rate,
             activation=activations.get(self.activation),
         )
-        super(ConvTemp, self).build(input_shape)
+        self.conv.build(input_shape)
+        self.built = True
 
     def call(self, inputs):
         """Forward pass of the layer.
 
@@ -76,10 +76,13 @@ def get_config(self):
         base_config = super(DenseTemp, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
+    def compute_output_shape(self, input_shape):
+        return tf.TensorShape(input_shape[:-1] + (self.hidden_size,))
+
 
 class FeedForwardNetwork(tf.keras.layers.Layer):
-    def __init__(self, hidden_size: int, intermediate_size: int, hidden_dropout_prob: float = 0.0):
-        super(FeedForwardNetwork, self).__init__()
+    def __init__(self, hidden_size: int, intermediate_size: int, hidden_dropout_prob: float = 0.0, **kwargs):
+        super(FeedForwardNetwork, self).__init__(**kwargs)
         self.hidden_size = hidden_size
         self.intermediate_size = intermediate_size
         self.hidden_dropout_prob = hidden_dropout_prob
 
@@ -4,6 +4,7 @@
 
 import tensorflow as tf
 from tensorflow.keras.layers import GRU, Embedding
+from tensorflow.keras.utils import register_keras_serializable
 
 from .position_layer import PositionalEmbedding, PositionalEncoding, RelativePositionEmbedding
 
 
@@ -100,7 +100,6 @@ def seasonality_model(
         "bp,pt->bt", theta[:, config_per_harmonic : 2 * config_per_harmonic], forecast_sin_template
     )
     forecast = forecast_harmonics_sin + forecast_harmonics_cos
-
     return backcast, forecast
 
 
@@ -123,9 +122,14 @@ class GenericBlock(tf.keras.layers.Layer):
     """
 
     def __init__(
-        self, train_sequence_length: int, predict_sequence_length: int, hidden_size: int, n_block_layers: int = 4
+        self,
+        train_sequence_length: int,
+        predict_sequence_length: int,
+        hidden_size: int,
+        n_block_layers: int = 4,
+        **kwargs
     ):
-        super(GenericBlock, self).__init__()
+        super(GenericBlock, self).__init__(**kwargs)
         self.train_sequence_length = train_sequence_length
         self.predict_sequence_length = predict_sequence_length
         self.hidden_size = hidden_size
@@ -139,9 +143,9 @@ def build(self, input_shape: Tuple[Optional[int], ...]):
         input_shape : Tuple[Optional[int], ...]
             Shape of the input tensor
         """
+        super(GenericBlock, self).build(input_shape)
         self.layers = [Dense(self.hidden_size, activation="relu") for _ in range(self.n_block_layers)]
         self.theta = Dense(self.train_sequence_length + self.predict_sequence_length, use_bias=False, activation=None)
-        super(GenericBlock, self).build(input_shape)
 
     def call(self, inputs: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
         """Compute the output of the Generic Block.
@@ -164,6 +168,24 @@ def call(self, inputs: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
         x = self.theta(x)
         return generic_model(x, tf.range(self.train_sequence_length), tf.range(self.predict_sequence_length))
 
+    def compute_output_shape(self, input_shape):
+        batch_size = input_shape[0]
+        backcast_shape = (batch_size, self.train_sequence_length)
+        forecast_shape = (batch_size, self.predict_sequence_length)
+        return (backcast_shape, forecast_shape)
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "train_sequence_length": self.train_sequence_length,
+                "predict_sequence_length": self.predict_sequence_length,
+                "hidden_size": self.hidden_size,
+                "n_block_layers": self.n_block_layers,
+            }
+        )
+        return config
+
 
 class TrendBlock(tf.keras.layers.Layer):
     """Trend block that learns trend patterns using polynomial basis functions.
@@ -192,8 +214,9 @@ def __init__(
         hidden_size: int,
         n_block_layers: int = 4,
         polynomial_term: int = 2,
+        **kwargs
     ):
-        super().__init__()
+        super().__init__(**kwargs)
 
         self.train_sequence_length = train_sequence_length
         self.predict_sequence_length = predict_sequence_length
@@ -226,12 +249,10 @@ def build(self, input_shape: Tuple[Optional[int], ...]):
         input_shape : Tuple[Optional[int], ...]
             Shape of the input tensor
         """
-
+        super().build(input_shape)
         self.layers = [Dense(self.hidden_size, activation="relu") for _ in range(self.n_block_layers)]
         self.theta = Dense(2 * self.polynomial_size, use_bias=False, activation=None)
 
-        super().build(input_shape)
-
     def call(self, inputs: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
         """Compute the output of the Trend Block.
 
@@ -254,14 +275,16 @@ def call(self, inputs: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
         return trend_model(x, self.backcast_time, self.forecast_time, self.polynomial_size)
 
     def compute_output_shape(self, input_shape):
-        return [(input_shape[0], self.train_sequence_length), (input_shape[0], self.predict_sequence_length)]
+        return ((input_shape[0], self.train_sequence_length), (input_shape[0], self.predict_sequence_length))
 
 
 class SeasonalityBlock(tf.keras.layers.Layer):
     """Seasonality block"""
 
-    def __init__(self, train_sequence_length, predict_sequence_length, hidden_size, n_block_layers=4, num_harmonics=1):
-        super().__init__()
+    def __init__(
+        self, train_sequence_length, predict_sequence_length, hidden_size, n_block_layers=4, num_harmonics=1, **kwargs
+    ):
+        super().__init__(**kwargs)
         self.train_sequence_length = train_sequence_length
         self.predict_sequence_length = predict_sequence_length
         self.hidden_size = hidden_size
@@ -300,6 +323,7 @@ def __init__(self, train_sequence_length, predict_sequence_length, hidden_size,
         self.forecast_sin_template = tf.transpose(tf.sin(self.forecast_grid))
 
     def build(self, input_shape: Tuple[Optional[int], ...]):
+        super().build(input_shape)
         self.layers = [Dense(self.hidden_size, activation="relu") for _ in range(self.n_block_layers)]
         self.theta = Dense(self.theta_size, use_bias=False, activation=None)
 
@@ -336,17 +360,21 @@ def call(self, inputs):
             self.forecast_sin_template,
         )
 
-
-class ZerosLayer(tf.keras.layers.Layer):
-    """Layer for creating zeros tensor with proper shape"""
-
-    def __init__(self, predict_length, **kwargs):
-        super(ZerosLayer, self).__init__(**kwargs)
-        self.predict_length = predict_length
-
-    def call(self, x):
-        batch_size = tf.shape(x)[0]
-        return tf.zeros([batch_size, self.predict_length], dtype=tf.float32)
-
     def compute_output_shape(self, input_shape):
-        return (input_shape[0], self.predict_length)
+        batch_size = input_shape[0]
+        backcast_shape = (batch_size, self.train_sequence_length)
+        forecast_shape = (batch_size, self.predict_sequence_length)
+        return (backcast_shape, forecast_shape)
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "train_sequence_length": self.train_sequence_length,
+                "predict_sequence_length": self.predict_sequence_length,
+                "hidden_size": self.hidden_size,
+                "n_block_layers": self.n_block_layers,
+                "num_harmonics": self.num_harmonics,
+            }
+        )
+        return config