keras-team · andersendsa · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026 · Feb 28, 2026
diff --git a/keras/src/backend/openvino/excluded_tests.txt b/keras/src/backend/openvino/excluded_tests.txt
@@ -37,4 +37,5 @@ keras/src/saving
 keras/src/trainers/compile_utils_test.py
 keras/src/trainers/epoch_iterator_test.py
 keras/src/trainers/trainer_test.py
-keras/src/utils
+keras/src/utils
+keras/src/layers/rnn/gru_test.py
diff --git a/keras/src/backend/openvino/rnn.py b/keras/src/backend/openvino/rnn.py
@@ -2,6 +2,8 @@
 from openvino import Model
 from openvino import Type
 
+from keras.src import activations
+from keras.src import ops
 from keras.src import tree
 from keras.src.backend.openvino.core import OpenVINOKerasTensor
 from keras.src.backend.openvino.core import get_ov_output
@@ -105,7 +107,7 @@ def _slice_at_0(x):
         x_ov = get_ov_output(x)
         slice_0 = ov_opset.gather(
             x_ov,
-            ov_opset.constant([0], dtype=Type.i32).output(0),
+            ov_opset.constant(0, dtype=Type.i32).output(0),
             ov_opset.constant(0, dtype=Type.i32).output(0),
         ).output(0)
         return OpenVINOKerasTensor(slice_0)
@@ -130,10 +132,10 @@ def _slice_at_0(x):
         inp_ov = get_ov_output(inp)
         pshape = inp_ov.get_partial_shape()
         if pshape.rank.is_static:
-            new_shape = list(pshape)[1:]
+            new_shape = [1] + list(pshape)[1:]
         else:
             new_shape = (
-                [-1] * (pshape.rank.get_length() - 1)
+                [-1] * (pshape.rank.get_length())
                 if pshape.rank.is_static
                 else None
             )
@@ -144,7 +146,7 @@ def _slice_at_0(x):
     if mask is not None:
         mask_ov = get_ov_output(mask)
         pshape = mask_ov.get_partial_shape()
-        new_shape = list(pshape)[1:] if pshape.rank.is_static else None
+        new_shape = [1] + list(pshape)[1:] if pshape.rank.is_static else None
         param = ov_opset.parameter(new_shape, mask_ov.get_element_type())
         sliced_mask_params.append(param)
         params.append(param)
@@ -172,9 +174,13 @@ def _slice_at_0(x):
         )
         constants_params.append(param)
         params.append(param)
-    sliced_inputs_t = [
-        OpenVINOKerasTensor(p.output(0)) for p in sliced_inputs_params
-    ]
+    sliced_inputs_t = []
+    for p in sliced_inputs_params:
+        p_out = p.output(0)
+        p_out = ov_opset.squeeze(
+            p_out, ov_opset.constant([0], dtype=Type.i32).output(0)
+        ).output(0)
+        sliced_inputs_t.append(OpenVINOKerasTensor(p_out))
     merged_states_t = [
         OpenVINOKerasTensor(p.output(0)) for p in merged_states_params
     ]
@@ -192,6 +198,9 @@ def _slice_at_0(x):
     final_last_output_list = []
     if mask is not None:
         mask_t = sliced_mask_params[0].output(0)
+        mask_t = ov_opset.squeeze(
+            mask_t, ov_opset.constant([0], dtype=Type.i32).output(0)
+        ).output(0)
         for i, (new_st, old_st) in enumerate(
             zip(flat_step_new_states, merged_states_t)
         ):
@@ -225,7 +234,15 @@ def _slice_at_0(x):
         final_states_list = [get_ov_output(x) for x in flat_step_new_states]
         final_output_list = [get_ov_output(x) for x in flat_step_output]
         final_last_output_list = [get_ov_output(x) for x in flat_step_output]
+    zero_const = ov_opset.constant([0], dtype=Type.i32).output(0)
+    final_output_list = [
+        ov_opset.unsqueeze(x, zero_const).output(0) for x in final_output_list
+    ]
     cond_const = ov_opset.constant(True, Type.boolean).output(0)
+    zero_const = ov_opset.constant([0], dtype=Type.i32).output(0)
+    final_output_list = [
+        ov_opset.unsqueeze(x, zero_const).output(0) for x in final_output_list
+    ]
     results = (
         [cond_const]
         + final_states_list
@@ -284,8 +301,163 @@ def _slice_at_0(x):
     return last_output, outputs, new_states
 
 
-def lstm(*args, **kwargs):
-    raise NotImplementedError("`lstm` is not supported with openvino backend")
+def lstm(
+    inputs,
+    initial_state_h,
+    initial_state_c,
+    mask,
+    kernel,
+    recurrent_kernel,
+    bias,
+    activation,
+    recurrent_activation,
+    return_sequences=False,
+    go_backwards=False,
+    unroll=False,
+    time_major=False,
+):
+    if mask is not None:
+        raise NotImplementedError("lstm sequence with mask is not supported")
+        mask = get_ov_output(mask)
+    inputs = get_ov_output(inputs)
+    initial_state_h = get_ov_output(initial_state_h)
+    initial_state_c = get_ov_output(initial_state_c)
+    kernel = get_ov_output(kernel)
+    recurrent_kernel = get_ov_output(recurrent_kernel)
+    shape = ov_opset.shape_of(inputs, Type.i32).output(0)
+    if not time_major:
+        batch_size = ov_opset.gather(
+            shape,
+            ov_opset.constant([0], Type.i32),
+            ov_opset.constant(0, Type.i32),
+        ).output(0)
+        seq_length = ov_opset.gather(
+            shape,
+            ov_opset.constant([1], Type.i32),
+            ov_opset.constant(0, Type.i32),
+        ).output(0)
+    else:
+        batch_size = ov_opset.gather(
+            shape,
+            ov_opset.constant([1], Type.i32),
+            ov_opset.constant(0, Type.i32),
+        ).output(0)
+        seq_length = ov_opset.gather(
+            shape,
+            ov_opset.constant([0], Type.i32),
+            ov_opset.constant(0, Type.i32),
+        ).output(0)
+    seq_length_tensor = ov_opset.broadcast(seq_length, batch_size).output(0)
+    if time_major:
+        axes = ov_opset.constant([1, 0, 2], Type.i32).output(0)
+        inputs = ov_opset.transpose(inputs, axes).output(0)
+    kernel_T = ov_opset.transpose(
+        kernel, ov_opset.constant([1, 0], Type.i32)
+    ).output(0)
+    k_i, k_f, k_c, k_o = ov_opset.split(
+        kernel_T, ov_opset.constant(0, Type.i32), 4
+    ).outputs()
+    W = ov_opset.concat([k_f, k_i, k_c, k_o], axis=0).output(0)
-    W = ov_opset.concat([k_f, k_i, k_c, k_o], axis=0).output(0)
+    W = ov_opset.concat([k_i, k_o, k_f, k_c], axis=0).output(0)
-    W = ov_opset.concat([k_f, k_i, k_c, k_o], axis=0).output(0)
+    W = ov_opset.concat([k_i, k_o, k_f, k_c], axis=0).output(0)
+    W = ov_opset.unsqueeze(W, ov_opset.constant([0], Type.i32)).output(0)
+    recurrent_kernel_T = ov_opset.transpose(
+        recurrent_kernel, ov_opset.constant([1, 0], Type.i32)
+    ).output(0)
+    r_i, r_f, r_c, r_o = ov_opset.split(
+        recurrent_kernel_T, ov_opset.constant(0, Type.i32), 4
+    ).outputs()
+    R = ov_opset.concat([r_f, r_i, r_c, r_o], axis=0).output(0)
-    R = ov_opset.concat([r_f, r_i, r_c, r_o], axis=0).output(0)
+    R = ov_opset.concat([r_i, r_o, r_f, r_c], axis=0).output(0)
-    R = ov_opset.concat([r_f, r_i, r_c, r_o], axis=0).output(0)
+    R = ov_opset.concat([r_i, r_o, r_f, r_c], axis=0).output(0)
+    R = ov_opset.unsqueeze(R, ov_opset.constant([0], Type.i32)).output(0)
+    if bias is not None:
+        bias = get_ov_output(bias)
+        b_i, b_f, b_c, b_o = ov_opset.split(
+            bias, ov_opset.constant(0, Type.i32), 4
+        ).outputs()
+        B = ov_opset.concat([b_f, b_i, b_c, b_o], axis=0).output(0)
-        B = ov_opset.concat([b_f, b_i, b_c, b_o], axis=0).output(0)
+        B = ov_opset.concat([b_i, b_o, b_f, b_c], axis=0).output(0)
-        B = ov_opset.concat([b_f, b_i, b_c, b_o], axis=0).output(0)
+        B = ov_opset.concat([b_i, b_o, b_f, b_c], axis=0).output(0)
+    else:
+        W_shape = ov_opset.shape_of(W, Type.i32).output(0)
+        B_size = ov_opset.gather(
+            W_shape,
+            ov_opset.constant([1], Type.i32),
+            ov_opset.constant(0, Type.i32),
+        ).output(0)
+        B = ov_opset.broadcast(
+            ov_opset.constant(0, inputs.get_element_type()), B_size
+        ).output(0)
+    B = ov_opset.unsqueeze(B, ov_opset.constant([0], Type.i32)).output(0)
+    initial_state_h = ov_opset.unsqueeze(
+        initial_state_h, ov_opset.constant([1], Type.i32)
+    ).output(0)
+    initial_state_c = ov_opset.unsqueeze(
+        initial_state_c, ov_opset.constant([1], Type.i32)
+    ).output(0)
+    direction = "forward"
+    if go_backwards:
+        direction = "reverse"
+
+    def get_activation_name(act):
+        from keras.src import activations
+
+        if act == activations.tanh:
+            return "tanh"
+        if act == activations.sigmoid:
+            return "sigmoid"
+        if act == activations.relu:
+            return "relu"
+        if act == activations.linear:
+            return "linear"
+        if hasattr(act, "__name__"):
+            return act.__name__
+        return "tanh"
+
+    act_names = [
+        get_activation_name(recurrent_activation),
+        get_activation_name(activation),
+        get_activation_name(activation),
+    ]
+    hidden_size = (
+        R.get_partial_shape()[2].get_length()
+        if R.get_partial_shape().rank.is_static
+        and R.get_partial_shape()[2].is_static
+        else -1
+    )
+    lstm_node = ov_opset.lstm_sequence(
+        X=inputs,
+        initial_hidden_state=initial_state_h,
+        initial_cell_state=initial_state_c,
+        sequence_lengths=seq_length_tensor,
+        W=W,
+        R=R,
+        B=B,
+        hidden_size=hidden_size,
+        direction=direction,
+        activations=act_names,
+    )
+    Y = lstm_node.output(0)
+    Ho = lstm_node.output(1)
+    Co = lstm_node.output(2)
+    Y = ov_opset.squeeze(Y, ov_opset.constant([1], Type.i32)).output(0)
+    Ho = ov_opset.squeeze(Ho, ov_opset.constant([1], Type.i32)).output(0)
+    Co = ov_opset.squeeze(Co, ov_opset.constant([1], Type.i32)).output(0)
+    if not time_major:
+        outputs = Y
+    else:
+        axes = ov_opset.constant([1, 0, 2], Type.i32).output(0)
+        outputs = ov_opset.transpose(Y, axes).output(0)
+    if not return_sequences:
+        if time_major:
+            outputs = ov_opset.unsqueeze(
+                Ho, ov_opset.constant([0], Type.i32)
+            ).output(0)
+        else:
+            outputs = ov_opset.unsqueeze(
+                Ho, ov_opset.constant([1], Type.i32)
+            ).output(0)
+    last_output = Ho
+    return (
+        OpenVINOKerasTensor(last_output),
+        OpenVINOKerasTensor(outputs),
+        [OpenVINOKerasTensor(Ho), OpenVINOKerasTensor(Co)],
+    )
 
 
 def gru(*args, **kwargs):
@@ -304,5 +476,18 @@ def numpy_scan(f, init, xs, reverse=False, mask=None):
     )
 
 
-def cudnn_ok(*args, **kwargs):
-    return False
+def cudnn_ok(
+    activation,
+    recurrent_activation,
+    unroll,
+    use_bias,
+    reset_after=None,
+):
+    if reset_after is None:
+        return (
+            activation in (activations.tanh, ops.tanh)
+            and recurrent_activation in (activations.sigmoid, ops.sigmoid)
+            and not unroll
+        )
+    else:
+        return False
diff --git a/keras/src/initializers/random_initializers.py b/keras/src/initializers/random_initializers.py
@@ -2,13 +2,14 @@
 
 from keras.src import ops
 from keras.src.api_export import keras_export
-from keras.src.backend import random
 from keras.src.initializers.initializer import Initializer
 from keras.src.saving import serialization_lib
 
 
 class RandomInitializer(Initializer):
     def __init__(self, seed=None):
+        from keras.src.backend import random
+
         self._init_seed = seed
         if seed is None:
             seed = random.make_default_seed()
@@ -69,6 +70,8 @@ def __init__(self, mean=0.0, stddev=0.05, seed=None):
         super().__init__(seed=seed)
 
     def __call__(self, shape, dtype=None):
+        from keras.src.backend import random
+
         return random.normal(
             shape=shape,
             mean=self.mean,
@@ -128,6 +131,8 @@ def __init__(self, mean=0.0, stddev=0.05, seed=None):
         super().__init__(seed=seed)
 
     def __call__(self, shape, dtype=None):
+        from keras.src.backend import random
+
         return random.truncated_normal(
             shape=shape,
             mean=self.mean,
@@ -184,6 +189,8 @@ def __init__(self, minval=-0.05, maxval=0.05, seed=None):
         super().__init__(seed=seed)
 
     def __call__(self, shape, dtype=None):
+        from keras.src.backend import random
+
         return random.uniform(
             shape=shape,
             minval=self.minval,
@@ -283,6 +290,8 @@ def __init__(
         super().__init__(seed=seed)
 
     def __call__(self, shape, dtype=None):
+        from keras.src.backend import random
+
         scale = self.scale
         fan_in, fan_out = compute_fans(shape)
         if self.mode == "fan_in":
@@ -683,6 +692,8 @@ def __init__(self, gain=1.0, seed=None):
         super().__init__(seed=seed)
 
     def __call__(self, shape, dtype=None):
+        from keras.src.backend import random
+
         if len(shape) < 2:
             raise ValueError(
                 "The tensor to initialize must be "

diff --git a/keras/src/ops/__init__.py b/keras/src/ops/__init__.py
@@ -2,15 +2,25 @@
 # from keras.src.ops.numpy import Add, add
 # from keras.src.ops.numpy import Multiply, multiply
 
-from keras.src.backend import cast
-from keras.src.backend import cond
-from keras.src.backend import is_tensor
-from keras.src.backend import name_scope
-from keras.src.backend import random
 from keras.src.ops import image
 from keras.src.ops import operation_utils
 from keras.src.ops.core import *  # noqa: F403
 from keras.src.ops.linalg import *  # noqa: F403
 from keras.src.ops.math import *  # noqa: F403
 from keras.src.ops.nn import *  # noqa: F403
 from keras.src.ops.numpy import *  # noqa: F403
+
+
+def __dir__():
+    return sorted(
+        list(globals().keys())
+        + ["cast", "cond", "is_tensor", "name_scope", "random"]
+    )
+
+
+def __getattr__(name):
+    if name in {"cast", "cond", "is_tensor", "name_scope", "random"}:
+        import keras.src.backend as backend
+
+        return getattr(backend, name)
+    raise AttributeError(f"module '{__name__}' has no attribute '{name}'")