Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion keras/src/backend/openvino/excluded_tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,5 @@ keras/src/saving
keras/src/trainers/compile_utils_test.py
keras/src/trainers/epoch_iterator_test.py
keras/src/trainers/trainer_test.py
keras/src/utils
keras/src/utils
keras/src/layers/rnn/gru_test.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This change to exclude gru_test.py seems out of scope for a pull request focused on adding LSTM support. Additionally, the entire keras/src/layers/rnn directory is already excluded on line 26, which makes this new line redundant if the directory exclusion works as expected. Could you please clarify the reasoning for this change or remove it if it's not necessary?

Copy link
Contributor

@goyaladitya05 goyaladitya05 Mar 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed with gemini here. Adding gru_test.py when the parent directory is already excluded makes no sense. Please clean up the excluded_tests.txt diff.

143 changes: 130 additions & 13 deletions keras/src/backend/openvino/rnn.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import openvino.opset15 as ov_opset
from openvino import Model
from openvino import Type
from keras.src import backend
from keras.src import activations
from keras.src import ops
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You cannot import ops like this here, this is what's creating the circular imports.

But they're not even used.


from keras.src.backend.openvino.numpy import split, concatenate
from keras.src import tree
from keras.src.backend.openvino.core import OpenVINOKerasTensor
from keras.src.backend.openvino.core import get_ov_output
Expand Down Expand Up @@ -105,7 +109,7 @@ def _slice_at_0(x):
x_ov = get_ov_output(x)
slice_0 = ov_opset.gather(
x_ov,
ov_opset.constant([0], dtype=Type.i32).output(0),
ov_opset.constant(0, dtype=Type.i32).output(0),
ov_opset.constant(0, dtype=Type.i32).output(0),
).output(0)
return OpenVINOKerasTensor(slice_0)
Expand All @@ -130,10 +134,10 @@ def _slice_at_0(x):
inp_ov = get_ov_output(inp)
pshape = inp_ov.get_partial_shape()
if pshape.rank.is_static:
new_shape = list(pshape)[1:]
new_shape = [1] + list(pshape)[1:]
else:
new_shape = (
[-1] * (pshape.rank.get_length() - 1)
[-1] * (pshape.rank.get_length())
if pshape.rank.is_static
else None
)
Expand All @@ -144,7 +148,7 @@ def _slice_at_0(x):
if mask is not None:
mask_ov = get_ov_output(mask)
pshape = mask_ov.get_partial_shape()
new_shape = list(pshape)[1:] if pshape.rank.is_static else None
new_shape = [1] + list(pshape)[1:] if pshape.rank.is_static else None
param = ov_opset.parameter(new_shape, mask_ov.get_element_type())
sliced_mask_params.append(param)
params.append(param)
Expand Down Expand Up @@ -172,9 +176,11 @@ def _slice_at_0(x):
)
constants_params.append(param)
params.append(param)
sliced_inputs_t = [
OpenVINOKerasTensor(p.output(0)) for p in sliced_inputs_params
]
sliced_inputs_t = []
for p in sliced_inputs_params:
p_out = p.output(0)
p_out = ov_opset.squeeze(p_out, ov_opset.constant([0], dtype=Type.i32).output(0)).output(0)
sliced_inputs_t.append(OpenVINOKerasTensor(p_out))
merged_states_t = [
OpenVINOKerasTensor(p.output(0)) for p in merged_states_params
]
Expand All @@ -192,6 +198,7 @@ def _slice_at_0(x):
final_last_output_list = []
if mask is not None:
mask_t = sliced_mask_params[0].output(0)
mask_t = ov_opset.squeeze(mask_t, ov_opset.constant([0], dtype=Type.i32).output(0)).output(0)
for i, (new_st, old_st) in enumerate(
zip(flat_step_new_states, merged_states_t)
):
Expand Down Expand Up @@ -226,6 +233,8 @@ def _slice_at_0(x):
final_output_list = [get_ov_output(x) for x in flat_step_output]
final_last_output_list = [get_ov_output(x) for x in flat_step_output]
cond_const = ov_opset.constant(True, Type.boolean).output(0)
zero_const = ov_opset.constant([0], dtype=Type.i32).output(0)
final_output_list = [ov_opset.unsqueeze(x, zero_const).output(0) for x in final_output_list]
results = (
[cond_const]
+ final_states_list
Expand Down Expand Up @@ -283,10 +292,105 @@ def _slice_at_0(x):
outputs = tree.map_structure(swap_batch_timestep, outputs)
return last_output, outputs, new_states


def lstm(*args, **kwargs):
raise NotImplementedError("`lstm` is not supported with openvino backend")

def lstm(
inputs,
initial_state_h,
initial_state_c,
mask,
kernel,
recurrent_kernel,
bias,
activation,
recurrent_activation,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The unroll parameter is included in the function signature but is not used within the implementation. According to the Keras API design guidelines (lines 18 and 53), unused parameters should be avoided to keep the API surface clean and minimize user confusion. Please consider either implementing the unrolling logic or removing this parameter.

return_sequences=False,
go_backwards=False,
unroll=False,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The unroll parameter is defined but not used in this function. According to the Keras API design guidelines, parameters that are not used should be removed to avoid user confusion and to keep the API surface clean.

time_major=False,
Comment on lines +315 to +316
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

These local imports are redundant. keras.src.backend is already imported at the top of the file, and split and concatenate are also imported from keras.src.backend.openvino.numpy at the file's top level. These lines can be removed to improve code clarity.

):
from keras.src import backend
from keras.src.backend.openvino.numpy import split, concatenate
if mask is not None:
raise NotImplementedError("lstm sequence with mask is not supported")
mask = get_ov_output(mask)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This line is unreachable due to the raise NotImplementedError on the preceding line. It should be removed.

inputs = get_ov_output(inputs)
initial_state_h = get_ov_output(initial_state_h)
initial_state_c = get_ov_output(initial_state_c)
kernel = get_ov_output(kernel)
recurrent_kernel = get_ov_output(recurrent_kernel)
shape = ov_opset.shape_of(inputs, Type.i32).output(0)
if not time_major:
batch_size = ov_opset.gather(shape, ov_opset.constant([0], Type.i32), ov_opset.constant(0, Type.i32)).output(0)
seq_length = ov_opset.gather(shape, ov_opset.constant([1], Type.i32), ov_opset.constant(0, Type.i32)).output(0)
else:
batch_size = ov_opset.gather(shape, ov_opset.constant([1], Type.i32), ov_opset.constant(0, Type.i32)).output(0)
seq_length = ov_opset.gather(shape, ov_opset.constant([0], Type.i32), ov_opset.constant(0, Type.i32)).output(0)
seq_length_tensor = ov_opset.broadcast(seq_length, batch_size).output(0)
if time_major:
axes = ov_opset.constant([1, 0, 2], Type.i32).output(0)
inputs = ov_opset.transpose(inputs, axes).output(0)
kernel_T = ov_opset.transpose(kernel, ov_opset.constant([1, 0], Type.i32)).output(0)
k_i, k_f, k_c, k_o = ov_opset.split(kernel_T, ov_opset.constant(0, Type.i32), 4).outputs()
W = ov_opset.concat([k_f, k_i, k_c, k_o], axis=0).output(0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The concatenation order for weights W is incorrect. Keras LSTM kernels are ordered for [input, forget, cell, output] gates. The OpenVINO LSTMSequence operation expects the weights for gates in the order [input, output, forget, cell]. The current code concatenates them in the order [forget, input, cell, output]. This should be [input, output, forget, cell].

Suggested change
W = ov_opset.concat([k_f, k_i, k_c, k_o], axis=0).output(0)
W = ov_opset.concat([k_i, k_o, k_f, k_c], axis=0).output(0)

W = ov_opset.unsqueeze(W, ov_opset.constant([0], Type.i32)).output(0)
recurrent_kernel_T = ov_opset.transpose(recurrent_kernel, ov_opset.constant([1, 0], Type.i32)).output(0)
r_i, r_f, r_c, r_o = ov_opset.split(recurrent_kernel_T, ov_opset.constant(0, Type.i32), 4).outputs()
R = ov_opset.concat([r_f, r_i, r_c, r_o], axis=0).output(0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

Similar to the kernel weights W, the concatenation order for recurrent weights R is incorrect. It should be [input, output, forget, cell] to match the OpenVINO LSTMSequence op specification.

Suggested change
R = ov_opset.concat([r_f, r_i, r_c, r_o], axis=0).output(0)
R = ov_opset.concat([r_i, r_o, r_f, r_c], axis=0).output(0)

R = ov_opset.unsqueeze(R, ov_opset.constant([0], Type.i32)).output(0)
if bias is not None:
bias = get_ov_output(bias)
b_i, b_f, b_c, b_o = ov_opset.split(bias, ov_opset.constant(0, Type.i32), 4).outputs()
B = ov_opset.concat([b_f, b_i, b_c, b_o], axis=0).output(0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

Similar to the weights, the concatenation order for biases B is incorrect. It should be [input, output, forget, cell] to match the OpenVINO LSTMSequence op specification.

Suggested change
B = ov_opset.concat([b_f, b_i, b_c, b_o], axis=0).output(0)
B = ov_opset.concat([b_i, b_o, b_f, b_c], axis=0).output(0)

else:
W_shape = ov_opset.shape_of(W, Type.i32).output(0)
B_size = ov_opset.gather(W_shape, ov_opset.constant([1], Type.i32), ov_opset.constant(0, Type.i32)).output(0)
B = ov_opset.broadcast(ov_opset.constant(0, inputs.get_element_type()), B_size).output(0)
B = ov_opset.unsqueeze(B, ov_opset.constant([0], Type.i32)).output(0)
initial_state_h = ov_opset.unsqueeze(initial_state_h, ov_opset.constant([1], Type.i32)).output(0)
initial_state_c = ov_opset.unsqueeze(initial_state_c, ov_opset.constant([1], Type.i32)).output(0)
direction = "forward"
if go_backwards:
direction = "reverse"
def get_activation_name(act):
from keras.src import activations
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

activations are already imported at the top, you don't need this import.

if act == activations.tanh: return "tanh"
if act == activations.sigmoid: return "sigmoid"
if act == activations.relu: return "relu"
if act == activations.linear: return "linear"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The get_activation_name helper function defaults to returning "tanh" for any unrecognized activation function. This can lead to silent errors where an unsupported activation is used but the model compiles and runs with a different activation (tanh) than intended. According to the Keras API design guidelines (lines 139-143), it's better to catch user errors early and provide informative error messages.

Please consider raising a ValueError for unsupported activations. You could also expand the list of recognized activations to include those from keras.src.ops (e.g., ops.tanh) for better robustness.

    def get_activation_name(act):
        from keras.src import activations
        from keras.src import ops
        if act in (activations.tanh, ops.tanh):
            return "tanh"
        if act in (activations.sigmoid, ops.sigmoid):
            return "sigmoid"
        if act in (activations.relu, ops.relu):
            return "relu"
        if act in (activations.linear, ops.linear):
            return "linear"
        raise ValueError(f"Unsupported activation function for OpenVINO LSTM: {act}")

if hasattr(act, "__name__"):
return act.__name__
return "tanh"
Comment on lines +396 to +408
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This function can be made more concise and maintainable by using a dictionary to map activation functions to their string names. This is a common pattern for this type of logic and would improve readability.

For example:

ACTIVATION_MAP = {
    activations.tanh: "tanh",
    activations.sigmoid: "sigmoid",
    activations.relu: "relu",
    activations.linear: "linear",
}

def get_activation_name(act):
    name = ACTIVATION_MAP.get(act)
    if name:
        return name
    if hasattr(act, "__name__"):
        return act.__name__
    return "tanh"

act_names = [get_activation_name(recurrent_activation), get_activation_name(activation), get_activation_name(activation)]
hidden_size = R.get_partial_shape()[2].get_length() if R.get_partial_shape().rank.is_static and R.get_partial_shape()[2].is_static else -1
lstm_node = ov_opset.lstm_sequence(
X=inputs,
initial_hidden_state=initial_state_h,
initial_cell_state=initial_state_c,
sequence_lengths=seq_length_tensor,
W=W,
R=R,
B=B,
hidden_size=hidden_size,
direction=direction,
activations=act_names
)
Y = lstm_node.output(0)
Ho = lstm_node.output(1)
Co = lstm_node.output(2)
Y = ov_opset.squeeze(Y, ov_opset.constant([1], Type.i32)).output(0)
Ho = ov_opset.squeeze(Ho, ov_opset.constant([1], Type.i32)).output(0)
Co = ov_opset.squeeze(Co, ov_opset.constant([1], Type.i32)).output(0)
if not time_major:
outputs = Y
else:
axes = ov_opset.constant([1, 0, 2], Type.i32).output(0)
outputs = ov_opset.transpose(Y, axes).output(0)
if not return_sequences:
if time_major:
outputs = ov_opset.unsqueeze(Ho, ov_opset.constant([0], Type.i32)).output(0)
else:
outputs = ov_opset.unsqueeze(Ho, ov_opset.constant([1], Type.i32)).output(0)
last_output = Ho
return OpenVINOKerasTensor(last_output), OpenVINOKerasTensor(outputs), [OpenVINOKerasTensor(Ho), OpenVINOKerasTensor(Co)]

def gru(*args, **kwargs):
raise NotImplementedError("`gru` is not supported with openvino backend")
Expand All @@ -304,5 +408,18 @@ def numpy_scan(f, init, xs, reverse=False, mask=None):
)


def cudnn_ok(*args, **kwargs):
return False
def cudnn_ok(
activation,
recurrent_activation,
unroll,
use_bias,
reset_after=None,
):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to clarify, in this context, cudnn_ok means "use the native OpenVino implementation" (we should rename it).

But is the criteria really the same as for CuDNN? That seems surprising.

if reset_after is None:
return (
activation in (activations.tanh, ops.tanh)
and recurrent_activation in (activations.sigmoid, ops.sigmoid)
and not unroll
)
else:
return False
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This cudnn_ok function appears to be dead code in the context of the OpenVINO backend. It's only referenced in the LSTM and GRU layer __init__ methods within a condition that checks backend.backend() == "tensorflow". When the backend is OpenVINO, this function will not be called.

Additionally, the name cudnn_ok is misleading here. If this function is intended to be used in the future for OpenVINO, it should be renamed to something more appropriate, like is_fast_path_supported.

For now, it seems this function can be reverted to return False to avoid confusion and maintaining unused code.

Loading