-
Notifications
You must be signed in to change notification settings - Fork 94
KV-events abstraction #356
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
f5ae878
10d4d53
1289fc4
404c89c
c55e86c
63100f5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -78,7 +78,7 @@ def create_llm(): | |
| disable_hybrid_kv_cache_manager=True, | ||
| kv_events_config=kv_events_config, | ||
| block_size=16, | ||
| prefix_caching_hash_algo="sha256_cbor", | ||
| prefix_caching_hash_algo="sha256_cbor_64bit", | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Had this error when running the test: |
||
| enable_lora=True, | ||
| max_model_len=4096, | ||
| ) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| /* | ||
| Copyright 2025 The llm-d Authors. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| */ | ||
|
|
||
| package decoder | ||
|
|
||
| // Decoder defines the interface for encoding and decoding raw bytes. | ||
| type Decoder interface { | ||
| // Decode unmarshals data into the provided value. | ||
| Decode(data []byte, v interface{}) error | ||
|
|
||
| // Encode marshals the provided value into bytes. | ||
| Encode(v interface{}) ([]byte, error) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| /* | ||
| Copyright 2025 The llm-d Authors. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| */ | ||
|
|
||
| package decoder | ||
|
|
||
| import ( | ||
| "fmt" | ||
|
|
||
| "github.com/vmihailenco/msgpack/v5" | ||
| ) | ||
|
|
||
| // MsgpackDecoder implements Decoder for MessagePack format. | ||
| type MsgpackDecoder struct{} | ||
|
|
||
| // NewMsgpackDecoder creates a new msgpack decoder. | ||
| func NewMsgpackDecoder() *MsgpackDecoder { | ||
| return &MsgpackDecoder{} | ||
| } | ||
|
|
||
| // Decode unmarshals msgpack data into the provided value. | ||
| func (m *MsgpackDecoder) Decode(data []byte, v interface{}) error { | ||
| if err := msgpack.Unmarshal(data, v); err != nil { | ||
| return fmt.Errorf("failed to decode msgpack: %w", err) | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| // Encode marshals the provided value into msgpack bytes. | ||
| func (m *MsgpackDecoder) Encode(v interface{}) ([]byte, error) { | ||
| data, err := msgpack.Marshal(v) | ||
| if err != nil { | ||
| return nil, fmt.Errorf("failed to encode msgpack: %w", err) | ||
| } | ||
| return data, nil | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,75 @@ | ||
| /* | ||
| Copyright 2025 The llm-d Authors. | ||
|
|
||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
|
|
||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
|
|
||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| */ | ||
|
|
||
| package engineadapter | ||
|
|
||
| import ( | ||
| "context" | ||
| "fmt" | ||
|
|
||
| "github.com/llm-d/llm-d-kv-cache/pkg/kvevents/decoder" | ||
| "github.com/llm-d/llm-d-kv-cache/pkg/kvevents/events" | ||
| "github.com/llm-d/llm-d-kv-cache/pkg/kvevents/transport" | ||
| ) | ||
|
|
||
| // EngineType represents the type of LLM engine. | ||
| type EngineType string | ||
|
|
||
| const ( | ||
| // EngineTypeVLLM represents the vLLM engine. | ||
| EngineTypeVLLM EngineType = "vllm" | ||
| ) | ||
|
|
||
| // NewAdapter creates a new engine adapter based on the engine type. | ||
| func NewAdapter(engineType EngineType) (EngineAdapter, error) { | ||
| // It looks useless right now but we're preparing for future support of other engines ;) | ||
| switch engineType { | ||
| case EngineTypeVLLM: | ||
| return NewVLLMAdapter() | ||
| default: | ||
| return nil, fmt.Errorf("unknown engine type: %s", engineType) | ||
| } | ||
| } | ||
|
|
||
| // EngineAdapter defines the interface for engine-specific adapters. | ||
| // Each inference engine has its own adapter implementation that handles | ||
| // engine-specific operations. | ||
| type EngineAdapter interface { | ||
| // Transport returns the transport layer for receiving messages. | ||
| Transport() transport.Transport | ||
|
|
||
| // Decoder returns the decoder for parsing message payloads. | ||
| Decoder() decoder.Decoder | ||
|
|
||
| // getHashAsUint64 converts engine-specific hash formats to uint64. | ||
| getHashAsUint64(raw any) (uint64, error) | ||
|
|
||
| // ReceiveAndDecode receives a message from the transport, parses it, | ||
| // decodes the payload, and returns a batch of generic events. | ||
| ReceiveAndDecode(ctx context.Context) (*events.EventBatch, error) | ||
|
|
||
| // Connect establishes a connection to a remote endpoint. | ||
| Connect(ctx context.Context, endpoint string) error | ||
|
|
||
| // Bind listens on a local endpoint for incoming connections. | ||
| Bind(ctx context.Context, endpoint string) error | ||
|
|
||
| // SubscribeToTopic sets the topic filter for receiving messages. | ||
| SubscribeToTopic(topicFilter string) error | ||
|
|
||
| // Close closes the adapter and releases all resources. | ||
| Close() error | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Previously, test events were created using specific event structures and then converted to a tagged union format via ToTaggedUnion(). This tagged union matched the exact format vllm sends to llm-d. The tagged union structure was necessary because of double marshaling: first to extracted the event type tag, and the second for the actual event data. I avoided it so I completely removed the ToTaggedUnion().