Skip to content

Commit 71ca575

Browse files
hheydarycopybara-github
authored andcommitted
Establish the new litert stateless executors pattern and implement dynamic executor.
LiteRT-LM-PiperOrigin-RevId: 881470803
1 parent d4c2a5b commit 71ca575

16 files changed

+2138
-35
lines changed

runtime/executor/BUILD

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -635,10 +635,11 @@ cc_test(
635635
":llm_executor_io_types",
636636
"@com_google_googletest//:gtest_main",
637637
"@com_google_absl//absl/types:span",
638+
"@litert//litert/c:litert_tensor_buffer_types",
638639
"@litert//litert/cc:litert_element_type",
639640
"@litert//litert/cc:litert_environment",
640641
"@litert//litert/cc:litert_layout",
641-
"@litert//litert/cc:litert_model",
642+
"@litert//litert/cc:litert_ranked_tensor_type",
642643
"@litert//litert/cc:litert_tensor_buffer",
643644
"//runtime/components/constrained_decoding:constrained_decoder",
644645
"//runtime/components/constrained_decoding:fake_constraint",

runtime/executor/kv_cache_interface.h

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,20 @@
1+
// Copyright 2026 The ODML Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
115
#ifndef THIRD_PARTY_ODML_LITERT_LM_RUNTIME_EXECUTOR_KV_CACHE_INTERFACE_H_
216
#define THIRD_PARTY_ODML_LITERT_LM_RUNTIME_EXECUTOR_KV_CACHE_INTERFACE_H_
317

4-
#include <cstddef>
518
#include <string>
619

720
#include "absl/status/status.h" // from @com_google_absl
@@ -15,19 +28,33 @@ class KVCacheInterface {
1528
public:
1629
virtual ~KVCacheInterface() = default;
1730

18-
// Resizes the KV cache to the specified number of entries.
19-
// Note: If the requested `num_entries` is smaller than the current number
20-
// of entries, the cache will be trimmed to the requested size.
21-
virtual absl::Status Resize(size_t num_entries) = 0;
22-
2331
// Returns the total number of entries in the KV cache per block.
2432
virtual int GetNumEntries() const = 0;
2533

34+
// Returns the batch size of the KV cache.
35+
virtual int GetBatchSize() const = 0;
36+
2637
// Serializes the KV cache to a byte string.
2738
virtual absl::StatusOr<std::string> Serialize() const = 0;
2839

2940
// Loads the KV cache from a serialized byte string.
3041
virtual absl::Status Load(absl::string_view serialized_kv_cache) = 0;
42+
43+
// Selects a single batch from the other KV cache and copies it to this KV
44+
// cache.
45+
// Example:
46+
// This has shape [1, ...] and other has shape [3, ...]. Then we can select
47+
// batch x from other and copy it to this
48+
// (i.e., other[x, :, ...] -> this[0, :, ...]).
49+
virtual absl::Status SelectAndCopyFrom(KVCacheInterface& other,
50+
int batch_index) = 0;
51+
52+
// Broadcasts the source KV with batch size 1 to this KV cache with batch size
53+
// > 1.
54+
// Example:
55+
// This has shape [3, ...] and other has shape [1, ...]. Then we can copy
56+
// other[0, :, ...] -> this[0, :, ...], this[1, :, ...], this[2, :, ...].
57+
virtual absl::Status BroadcastAndCopyFrom(KVCacheInterface& other) = 0;
3158
};
3259

3360
} // namespace litert::lm

runtime/executor/litert/BUILD

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
# Copyright 2026 The ODML Authors.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [Google-internal load of `cc_library`]
16+
17+
package(
18+
default_hdrs_check = "strict",
19+
default_visibility = [
20+
"//visibility:public",
21+
],
22+
)
23+
24+
licenses(["notice"])
25+
26+
cc_library(
27+
name = "debug_utils",
28+
srcs = ["debug_utils.cc"],
29+
hdrs = ["debug_utils.h"],
30+
deps = [
31+
"@com_google_absl//absl/log",
32+
"@com_google_absl//absl/log:absl_log",
33+
"@com_google_absl//absl/strings",
34+
"@com_google_absl//absl/strings:string_view",
35+
"@com_google_absl//absl/types:span",
36+
"//runtime/util:convert_tensor_buffer",
37+
] + select({
38+
"@litert//litert:litert_link_capi_so": [
39+
"@litert//litert/cc:litert_api_with_dynamic_runtime",
40+
],
41+
"//conditions:default": [
42+
"@litert//litert/cc:litert_tensor_buffer",
43+
],
44+
}),
45+
)
46+
47+
cc_test(
48+
name = "debug_utils_test",
49+
srcs = ["debug_utils_test.cc"],
50+
deps = [
51+
":debug_utils",
52+
"@com_google_googletest//:gtest_main",
53+
"@com_google_absl//absl/base:log_severity",
54+
"@com_google_absl//absl/log:scoped_mock_log",
55+
"@com_google_absl//absl/strings:string_view",
56+
"@com_google_absl//absl/types:span",
57+
"@litert//litert/cc:litert_tensor_buffer",
58+
"@litert//litert/test:matchers",
59+
"//runtime/util:convert_tensor_buffer",
60+
],
61+
)
62+
63+
cc_library(
64+
name = "kv_cache",
65+
srcs = ["kv_cache.cc"],
66+
hdrs = ["kv_cache.h"],
67+
deps = [
68+
"@com_google_absl//absl/container:flat_hash_map",
69+
"@com_google_absl//absl/memory",
70+
"@com_google_absl//absl/status",
71+
"@com_google_absl//absl/status:statusor",
72+
"@com_google_absl//absl/strings",
73+
"@com_google_absl//absl/strings:string_view",
74+
"//runtime/executor:common_utils",
75+
"//runtime/executor:kv_cache_interface",
76+
"//runtime/executor:litert_compiled_model_executor_utils",
77+
"//runtime/util:litert_status_util",
78+
] + select({
79+
"@litert//litert:litert_link_capi_so": [
80+
"@litert//litert/cc:litert_api_with_dynamic_runtime",
81+
],
82+
"//conditions:default": [
83+
"@litert//litert/cc:litert_compiled_model",
84+
"@litert//litert/cc:litert_element_type",
85+
"@litert//litert/cc:litert_environment",
86+
"@litert//litert/cc:litert_layout",
87+
"@litert//litert/cc:litert_macros",
88+
"@litert//litert/cc:litert_model",
89+
"@litert//litert/cc:litert_model_types",
90+
"@litert//litert/cc:litert_options",
91+
"@litert//litert/cc:litert_ranked_tensor_type",
92+
"@litert//litert/cc:litert_tensor_buffer",
93+
"@litert//litert/cc:litert_tensor_buffer_types",
94+
],
95+
}),
96+
)
97+
98+
cc_test(
99+
name = "kv_cache_test",
100+
srcs = ["kv_cache_test.cc"],
101+
data = [
102+
"//runtime/testdata",
103+
],
104+
deps = [
105+
":kv_cache",
106+
"@com_google_googletest//:gtest_main",
107+
"@com_google_absl//absl/status",
108+
"@com_google_absl//absl/strings:string_view",
109+
"@litert//litert/cc:litert_common",
110+
"@litert//litert/cc:litert_compiled_model",
111+
"@litert//litert/cc:litert_environment",
112+
"@litert//litert/cc:litert_model",
113+
"@litert//litert/cc:litert_options",
114+
"@litert//litert/test:matchers",
115+
"//runtime/components:model_resources",
116+
"//runtime/components:model_resources_litert_lm",
117+
"//runtime/util:convert_tensor_buffer",
118+
"//runtime/util:litert_lm_loader",
119+
"//runtime/util:scoped_file",
120+
"//runtime/util:test_utils",
121+
],
122+
)
123+
124+
cc_library(
125+
name = "llm_executor",
126+
srcs = ["llm_executor.cc"],
127+
hdrs = ["llm_executor.h"],
128+
deps = [
129+
":kv_cache",
130+
"@com_google_absl//absl/container:flat_hash_map",
131+
"@com_google_absl//absl/memory",
132+
"@com_google_absl//absl/status",
133+
"@com_google_absl//absl/status:statusor",
134+
"@com_google_absl//absl/strings",
135+
"@com_google_absl//absl/strings:string_view",
136+
"@com_google_absl//absl/types:span",
137+
"//runtime/components:model_resources",
138+
"//runtime/components/embedding_lookup:embedding_lookup_manager",
139+
"//runtime/executor:executor_settings_base",
140+
"//runtime/executor:kv_cache_interface",
141+
"//runtime/executor:litert_compiled_model_executor_utils",
142+
"//runtime/executor:llm_executor_interface",
143+
"//runtime/executor:llm_executor_io_types",
144+
"//runtime/executor:llm_executor_settings",
145+
"//runtime/util:convert_tensor_buffer",
146+
"//runtime/util:litert_status_util",
147+
"//runtime/util:scoped_file",
148+
"@litert//tflite/delegates/xnnpack:xnnpack_delegate",
149+
] + select({
150+
"@litert//litert:litert_link_capi_so": [
151+
"@litert//litert/cc:litert_api_with_dynamic_runtime",
152+
],
153+
"//conditions:default": [
154+
"@litert//litert/cc:litert_common",
155+
"@litert//litert/cc:litert_compiled_model",
156+
"@litert//litert/cc:litert_element_type",
157+
"@litert//litert/cc:litert_environment",
158+
"@litert//litert/cc:litert_expected",
159+
"@litert//litert/cc:litert_layout",
160+
"@litert//litert/cc:litert_macros",
161+
"@litert//litert/cc:litert_model",
162+
"@litert//litert/cc:litert_model_types",
163+
"@litert//litert/cc:litert_options",
164+
"@litert//litert/cc:litert_ranked_tensor_type",
165+
"@litert//litert/cc:litert_tensor_buffer",
166+
"@litert//litert/cc:litert_tensor_buffer_types",
167+
"@litert//litert/cc/options:litert_cpu_options",
168+
"@litert//litert/cc/options:litert_gpu_options",
169+
"@litert//litert/cc/options:litert_runtime_options",
170+
],
171+
}),
172+
)
173+
174+
cc_test(
175+
name = "llm_executor_cpu_test",
176+
srcs = ["llm_executor_cpu_test.cc"],
177+
data = [
178+
],
179+
deps = [
180+
":llm_executor",
181+
"@com_google_googletest//:gtest_main",
182+
"@com_google_absl//absl/status:statusor",
183+
"@com_google_absl//absl/strings:string_view",
184+
"@com_google_absl//absl/types:span",
185+
"@litert//litert/cc:litert_environment",
186+
"@litert//litert/test:matchers",
187+
"//runtime/components:model_resources",
188+
"//runtime/components:model_resources_litert_lm",
189+
"//runtime/executor:executor_settings_base",
190+
"//runtime/executor:llm_executor_io_types",
191+
"//runtime/executor:llm_executor_settings",
192+
"//runtime/util:convert_tensor_buffer",
193+
"//runtime/util:litert_lm_loader",
194+
"//runtime/util:scoped_file",
195+
"//runtime/util:test_utils",
196+
],
197+
)
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// Copyright 2026 The ODML Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "runtime/executor/litert/debug_utils.h"
16+
17+
#include <cstddef>
18+
19+
#include "absl/log/absl_log.h" // from @com_google_absl
20+
#include "absl/strings/str_join.h" // from @com_google_absl
21+
#include "absl/strings/string_view.h" // from @com_google_absl
22+
#include "absl/types/span.h" // from @com_google_absl
23+
#include "litert/cc/litert_tensor_buffer.h" // from @litert
24+
#include "runtime/util/convert_tensor_buffer.h"
25+
26+
namespace litert::lm {
27+
28+
void LogValues(absl::Span<const float> values, size_t num_values_to_log,
29+
absl::string_view debug) {
30+
constexpr size_t kNumExtraValuesToLog = 10;
31+
if (num_values_to_log * 3 + kNumExtraValuesToLog >= values.size()) {
32+
ABSL_LOG(INFO) << debug << "(size=" << values.size()
33+
<< "): " << absl::StrJoin(values, ", ");
34+
return;
35+
}
36+
37+
size_t end_offset = values.size() - num_values_to_log;
38+
size_t mid_offset = end_offset / 2;
39+
ABSL_LOG(INFO) << debug << "(size=" << values.size() << "): "
40+
<< absl::StrJoin(values.subspan(0, num_values_to_log), ", ")
41+
<< " ... "
42+
<< absl::StrJoin(values.subspan(mid_offset, num_values_to_log),
43+
", ")
44+
<< " ... " << absl::StrJoin(values.subspan(end_offset), ", ");
45+
}
46+
47+
void LogTensor(TensorBuffer& tensor, size_t num_values_to_log,
48+
absl::string_view debug) {
49+
// Try to get the reference if tensor is in CPU memory.
50+
auto values_span = ReferTensorBufferAsSpan<float>(tensor);
51+
if (values_span) {
52+
LogValues(*values_span, num_values_to_log, debug);
53+
return;
54+
}
55+
56+
// Otherwise, copy the logits from the tensor buffer to a vector.
57+
auto values_vector = CopyFromTensorBuffer<float>(tensor);
58+
if (values_vector) {
59+
LogValues(*values_vector, num_values_to_log, debug);
60+
return;
61+
}
62+
63+
ABSL_LOG(ERROR) << debug << ": Failed to log logits.";
64+
}
65+
66+
} // namespace litert::lm
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Copyright 2026 The ODML Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#ifndef THIRD_PARTY_ODML_LITERT_LM_RUNTIME_EXECUTOR_LITERT_DEBUG_UTILS_H_
16+
#define THIRD_PARTY_ODML_LITERT_LM_RUNTIME_EXECUTOR_LITERT_DEBUG_UTILS_H_
17+
18+
#include <cstddef>
19+
20+
#include "absl/strings/string_view.h" // from @com_google_absl
21+
#include "absl/types/span.h" // from @com_google_absl
22+
#include "litert/cc/litert_tensor_buffer.h" // from @litert
23+
24+
namespace litert::lm {
25+
26+
// Logs a span of values to the console (absl log info).
27+
void LogValues(absl::Span<const float> values, size_t num_values_to_log,
28+
absl::string_view debug);
29+
30+
// Logs a tensor to the console (absl log info).
31+
void LogTensor(TensorBuffer& tensor, size_t num_values_to_log,
32+
absl::string_view debug);
33+
34+
} // namespace litert::lm
35+
36+
#endif // THIRD_PARTY_ODML_LITERT_LM_RUNTIME_EXECUTOR_LITERT_DEBUG_UTILS_H_

0 commit comments

Comments
 (0)