diff --git a/docs/genai/api/c.md b/docs/genai/api/c.md
index 9f431fcf7a8ba..6c7e1d703efc1 100644
--- a/docs/genai/api/c.md
+++ b/docs/genai/api/c.md
@@ -18,11 +18,11 @@ _Note: this API is in preview and is subject to change._
 
 ## Overview
 
-This document describes the C API for ONNX Runtime GenAI.  
-Below are the main functions and types, with code snippets and descriptions for each.
+This document describes the C API for ONNX Runtime GenAI. The API is not thread safe. Below are the main functions and types, with code snippets and descriptions for each.
 
 ---
 
+
 ## Model API
 
 ### OgaCreateModel
@@ -75,22 +75,24 @@ OgaResult* result = OgaCreateModelFromConfig(config, &model);
 
 ### OgaModelGetType
 
-Gets the type of the model.
+Gets the type of the model. The returned string must be destroyed with `OgaDestroyString`.
 
 ```c
 const char* type = NULL;
-OgaModelGetType(model, &type);
+OgaResult* result = OgaModelGetType(model, &type);
+OgaDestroyString(type);
 ```
 
 ---
 
 ### OgaModelGetDeviceType
 
-Gets the device type used by the model.
+Gets the device type used by the model. The returned string must be destroyed with `OgaDestroyString`.
 
 ```c
 const char* device_type = NULL;
-OgaModelGetDeviceType(model, &device_type);
+OgaResult* result = OgaModelGetDeviceType(model, &device_type);
+OgaDestroyString(device_type);
 ```
 
 ---
@@ -99,7 +101,7 @@ OgaModelGetDeviceType(model, &device_type);
 
 ### OgaCreateConfig
 
-Creates a configuration object from a config path.
+Creates a configuration object from a config directory. The path is expected to be encoded in UTF-8.
 
 ```c
 OgaConfig* config = NULL;
@@ -110,20 +112,20 @@ OgaResult* result = OgaCreateConfig("path/to/model_dir", &config);
 
 ### OgaConfigClearProviders
 
-Clears all providers from the configuration.
+Clear the list of execution providers in the given config.
 
 ```c
-OgaConfigClearProviders(config);
+OgaResult* result = OgaConfigClearProviders(config);
 ```
 
 ---
 
 ### OgaConfigAppendProvider
 
-Appends a provider to the configuration.
+Appends an execution provider to the configuration. If the provider already exists, does nothing.
 
 ```c
-OgaConfigAppendProvider(config, "CUDAExecutionProvider");
+OgaResult* result = OgaConfigAppendProvider(config, "CUDAExecutionProvider");
 ```
 
 ---
@@ -133,17 +135,97 @@ OgaConfigAppendProvider(config, "CUDAExecutionProvider");
 Sets a provider option in the configuration.
 
 ```c
-OgaConfigSetProviderOption(config, "CUDAExecutionProvider", "device_id", "0");
+OgaResult* result = OgaConfigSetProviderOption(config, "CUDAExecutionProvider", "device_id", "0");
+```
+
+---
+
+### OgaConfigAddModelData
+
+Adds model data to load the model from memory. The model data must remain valid at least until the model is created. If using session options such as `session.use_ort_model_bytes_directly`, the model data must remain valid until the `OgaModel` is destroyed.
+
+```c
+OgaResult* result = OgaConfigAddModelData(config, "model.onnx", model_data, model_data_length);
+```
+
+---
+
+### OgaConfigRemoveModelData
+
+Removes model data previously added to the config.
+
+```c
+OgaResult* result = OgaConfigRemoveModelData(config, "model.onnx");
+```
+
+---
+
+### OgaConfigSetDecoderProviderOptionsHardwareDeviceType
+
+Filter execution provider devices by hardware device type property.
+
+```c
+OgaResult* result = OgaConfigSetDecoderProviderOptionsHardwareDeviceType(config, "provider_name", "GPU");
+```
+
+---
+
+### OgaConfigSetDecoderProviderOptionsHardwareDeviceId
+
+Filter execution provider devices by hardware device ID property.
+
+```c
+OgaResult* result = OgaConfigSetDecoderProviderOptionsHardwareDeviceId(config, "provider_name", 0);
+```
+
+---
+
+### OgaConfigSetDecoderProviderOptionsHardwareVendorId
+
+Filter execution provider devices by hardware vendor ID property.
+
+```c
+OgaResult* result = OgaConfigSetDecoderProviderOptionsHardwareVendorId(config, "provider_name", 0x1234);
+```
+
+---
+
+### OgaConfigClearDecoderProviderOptionsHardwareDeviceType
+
+Clear the hardware device type property.
+
+```c
+OgaResult* result = OgaConfigClearDecoderProviderOptionsHardwareDeviceType(config, "provider_name");
+```
+
+---
+
+### OgaConfigClearDecoderProviderOptionsHardwareDeviceId
+
+Clear the hardware device ID property.
+
+```c
+OgaResult* result = OgaConfigClearDecoderProviderOptionsHardwareDeviceId(config, "provider_name");
+```
+
+---
+
+### OgaConfigClearDecoderProviderOptionsHardwareVendorId
+
+Clear the hardware vendor ID property.
+
+```c
+OgaResult* result = OgaConfigClearDecoderProviderOptionsHardwareVendorId(config, "provider_name");
 ```
 
 ---
 
 ### OgaConfigOverlay
 
-Overlays a JSON string onto the configuration.
+Overlay JSON on top of the configuration.
 
 ```c
-OgaConfigOverlay(config, "{\"option\": \"value\"}");
+OgaResult* result = OgaConfigOverlay(config, "{\"option\": \"value\"}");
 ```
 
 ---
@@ -214,108 +296,167 @@ OgaDestroyTokenizer(tokenizer);
 
 ---
 
-### OgaTokenizerEncode
+### OgaCreateTokenizerStream
 
-Encodes a single string and adds the encoded sequence of tokens to the OgaSequences.
+Creates a tokenizer stream for incremental decoding. This allows decoding tokens one at a time.
 
 ```c
-OgaSequences* sequences = NULL;
-OgaCreateSequences(&sequences);
-OgaTokenizerEncode(tokenizer, "Hello world", sequences);
+OgaTokenizerStream* stream = NULL;
+OgaResult* result = OgaCreateTokenizerStream(tokenizer, &stream);
 ```
 
 ---
 
-### OgaTokenizerEncodeBatch
+### OgaCreateTokenizerStreamFromProcessor
 
-Encodes a batch of strings.
+Creates a tokenizer stream from a multi-modal processor for incremental decoding.
 
 ```c
-const char* texts[] = {"Hello", "World"};
-OgaTensor* tensor = NULL;
-OgaTokenizerEncodeBatch(tokenizer, texts, 2, &tensor);
+OgaTokenizerStream* stream = NULL;
+OgaResult* result = OgaCreateTokenizerStreamFromProcessor(processor, &stream);
 ```
 
 ---
 
-### OgaTokenizerToTokenId
+### OgaDestroyTokenizerStream
 
-Converts a string to its corresponding token ID.
+Destroys the tokenizer stream.
+
+```c
+OgaDestroyTokenizerStream(stream);
+```
+
+---
+
+### OgaTokenizerApplyChatTemplate
+
+Applies a chat template to input messages. The template can optionally include tools and generation prompt.
+
+```c
+const char* result_string = NULL;
+OgaResult* result = OgaTokenizerApplyChatTemplate(tokenizer, NULL, messages_json, tools_json, true, &result_string);
+OgaDestroyString(result_string);
+```
+
+---
+
+### OgaUpdateTokenizerOptions
+
+Updates tokenizer options for the given tokenizer instance.
+
+Supported options:
+- `add_special_tokens`: Controls whether to add special tokens (e.g., BOS/EOS) during tokenization. Values: `"true"` / `"false"` or `"1"` / `"0"`. Default: `"false"`.
+- `skip_special_tokens`: Controls whether to remove special tokens during detokenization. Values: `"true"` / `"false"` or `"1"` / `"0"`. Default: `"true"`.
+
+```c
+const char* keys[] = {"add_special_tokens", "skip_special_tokens"};
+const char* values[] = {"true", "false"};
+OgaResult* result = OgaUpdateTokenizerOptions(tokenizer, keys, values, 2);
+```
+
+---
+
+### OgaTokenizerGetBosTokenId
+
+Returns the BOS (Beginning of Sequence) token ID.
 
 ```c
 int32_t token_id = 0;
-OgaTokenizerToTokenId(tokenizer, "Hello", &token_id);
+OgaResult* result = OgaTokenizerGetBosTokenId(tokenizer, &token_id);
 ```
 
 ---
 
-### OgaTokenizerDecode
+### OgaTokenizerGetEosTokenIds
 
-Decodes a sequence of tokens into a string.
+Returns an array of EOS (End of Sequence) token IDs. The array is owned by the tokenizer and will be freed when the tokenizer is destroyed.
 
 ```c
-const char* out_string = NULL;
-OgaTokenizerDecode(tokenizer, tokens, token_count, &out_string);
-// Use out_string, then:
-OgaDestroyString(out_string);
+const int32_t* eos_token_ids = NULL;
+size_t token_count = 0;
+OgaResult* result = OgaTokenizerGetEosTokenIds(tokenizer, &eos_token_ids, &token_count);
 ```
 
 ---
 
-### OgaTokenizerApplyChatTemplate
+### OgaTokenizerGetPadTokenId
 
-Applies a chat template to messages and tools.
+Returns the PAD (padding) token ID.
 
 ```c
-const char* result = NULL;
-OgaTokenizerApplyChatTemplate(tokenizer, "template", "messages", "tools", true, &result);
-OgaDestroyString(result);
+int32_t token_id = 0;
+OgaResult* result = OgaTokenizerGetPadTokenId(tokenizer, &token_id);
 ```
 
 ---
 
-### OgaTokenizerDecodeBatch
+### OgaTokenizerEncode
 
-Decodes a batch of token sequences.
+Encodes a single string and adds the encoded sequence of tokens to the `OgaSequences`. The `OgaSequences` must be freed with `OgaDestroySequences` when it is no longer needed.
 
 ```c
-OgaStringArray* out_strings = NULL;
-OgaTokenizerDecodeBatch(tokenizer, tensor, &out_strings);
-// Use out_strings, then:
-OgaDestroyStringArray(out_strings);
+OgaSequences* sequences = NULL;
+OgaCreateSequences(&sequences);
+OgaResult* result = OgaTokenizerEncode(tokenizer, "Hello world", sequences);
 ```
 
 ---
 
-### OgaCreateTokenizerStream
+### OgaTokenizerDecode
 
-Creates a tokenizer stream for incremental decoding.
+Decodes a sequence of tokens into a string. The output string must be freed with `OgaDestroyString`.
 
 ```c
-OgaTokenizerStream* stream = NULL;
-OgaCreateTokenizerStream(tokenizer, &stream);
+const char* out_string = NULL;
+OgaResult* result = OgaTokenizerDecode(tokenizer, tokens, token_count, &out_string);
+OgaDestroyString(out_string);
 ```
 
 ---
 
-### OgaDestroyTokenizerStream
+### OgaTokenizerEncodeBatch
 
-Destroys the tokenizer stream.
+Encodes a batch of strings and returns a single tensor output.
 
 ```c
-OgaDestroyTokenizerStream(stream);
+const char* texts[] = {"Hello", "World"};
+OgaTensor* tensor = NULL;
+OgaResult* result = OgaTokenizerEncodeBatch(tokenizer, texts, 2, &tensor);
+```
+
+---
+
+### OgaTokenizerDecodeBatch
+
+Decodes a batch of token sequences and returns an array of strings.
+
+```c
+OgaStringArray* out_strings = NULL;
+OgaResult* result = OgaTokenizerDecodeBatch(tokenizer, tensor, &out_strings);
+OgaDestroyStringArray(out_strings);
 ```
 
 ---
 
 ### OgaTokenizerStreamDecode
 
-Decodes a single token in the stream.
+Decodes a single token in the stream. If a word is generated, it will be returned in `out`. The chunk is valid until the next call or when the stream is destroyed.
 
 ```c
 const char* chunk = NULL;
-OgaTokenizerStreamDecode(stream, token, &chunk);
-// chunk is valid until next call or stream is destroyed
+OgaResult* result = OgaTokenizerStreamDecode(stream, token, &chunk);
+```
+
+---
+
+
+### OgaTokenizerToTokenId
+
+Converts a string to its corresponding token ID.
+
+```c
+int32_t token_id = 0;
+OgaResult* result = OgaTokenizerToTokenId(tokenizer, "Hello", &token_id);
 ```
 
 ---
@@ -324,18 +465,18 @@ OgaTokenizerStreamDecode(stream, token, &chunk);
 
 ### OgaCreateSequences
 
-Creates an empty OgaSequences object.
+Creates an empty `OgaSequences` object.
 
 ```c
 OgaSequences* sequences = NULL;
-OgaCreateSequences(&sequences);
+OgaResult* result = OgaCreateSequences(&sequences);
 ```
 
 ---
 
 ### OgaDestroySequences
 
-Destroys the given OgaSequences.
+Destroys the given `OgaSequences`.
 
 ```c
 OgaDestroySequences(sequences);
@@ -365,7 +506,7 @@ size_t token_count = OgaSequencesGetSequenceCount(sequences, 0);
 
 ### OgaSequencesGetSequenceData
 
-Returns a pointer to the token data for the sequence at the given index.
+Returns a pointer to the token data for the sequence at the given index. The pointer is valid until the `OgaSequences` is destroyed.
 
 ```c
 const int32_t* data = OgaSequencesGetSequenceData(sequences, 0);
@@ -373,6 +514,27 @@ const int32_t* data = OgaSequencesGetSequenceData(sequences, 0);
 
 ---
 
+### OgaAppendTokenSequence
+
+Appends multiple tokens to the sequences.
+
+```c
+const int32_t tokens[] = {1, 2, 3};
+OgaResult* result = OgaAppendTokenSequence(tokens, 3, sequences);
+```
+
+---
+
+### OgaAppendTokenToSequence
+
+Appends a single token to the sequence at the given index. If the sequence does not exist and the index equals the current sequence count, a new sequence is created.
+
+```c
+OgaResult* result = OgaAppendTokenToSequence(token_id, sequences, sequence_index);
+```
+
+---
+
 ## Generator Params API
 
 ### OgaCreateGeneratorParams
@@ -381,7 +543,7 @@ Creates generator parameters for the given model.
 
 ```c
 OgaGeneratorParams* params = NULL;
-OgaCreateGeneratorParams(model, &params);
+OgaResult* result = OgaCreateGeneratorParams(model, &params);
 ```
 
 ---
@@ -398,80 +560,77 @@ OgaDestroyGeneratorParams(params);
 
 ### OgaGeneratorParamsSetSearchNumber
 
-Sets a numeric search option.
+Sets a numeric search option (e.g., temperature, top_k, max_length).
 
 ```c
-OgaGeneratorParamsSetSearchNumber(params, "max_length", 128);
+OgaResult* result = OgaGeneratorParamsSetSearchNumber(params, "max_length", 128);
 ```
 
 ---
 
 ### OgaGeneratorParamsSetSearchBool
 
-Sets a boolean search option.
+Sets a boolean search option (e.g., do_sample).
 
 ```c
-OgaGeneratorParamsSetSearchBool(params, "do_sample", true);
+OgaResult* result = OgaGeneratorParamsSetSearchBool(params, "do_sample", true);
 ```
 
 ---
 
 ### OgaGeneratorParamsTryGraphCaptureWithMaxBatchSize
 
-Attempts to enable graph capture mode with a maximum batch size.
+Attempts to enable graph capture mode with a maximum batch size for improved performance.
 
 ```c
-OgaGeneratorParamsTryGraphCaptureWithMaxBatchSize(params, 8);
+OgaResult* result = OgaGeneratorParamsTryGraphCaptureWithMaxBatchSize(params, 8);
 ```
 
 ---
 
-### OgaGeneratorParamsSetInputIDs
+### OgaGeneratorParamsSetGuidance
+
+Sets guidance data for constrained generation (e.g., JSON schema, regex, Lark grammar).
 
-Sets the input ids for the generator params.
+Supported guidance types:
+- `json_schema`: Constrains output to a specific JSON schema
+- `regex`: Constrains output to match a regular expression
+- `lark_grammar`: Constrains output to a Lark grammar
+
+The `enable_ff_tokens` flag allows force-forwarding tokens that satisfy the grammar without calling the model, speeding up generation (only valid when batch_size=1 and beam_size=1).
 
 ```c
-OgaGeneratorParamsSetInputIDs(params, input_ids, input_ids_count, sequence_length, batch_size);
+OgaResult* result = OgaGeneratorParamsSetGuidance(params, "json_schema", schema_string, true);
 ```
 
 ---
 
 ### OgaGeneratorParamsSetInputSequences
 
-Sets the input id sequences for the generator params.
+Sets the input ID sequences for the generator params.
 
 ```c
-OgaGeneratorParamsSetInputSequences(params, sequences);
+OgaResult* result = OgaGeneratorParamsSetInputSequences(params, sequences);
 ```
 
 ---
 
 ### OgaGeneratorParamsSetModelInput
 
-Sets an additional model input.
+Sets an additional model input for advanced use cases (e.g., LoRA models).
 
 ```c
-OgaGeneratorParamsSetModelInput(params, "input_name", tensor);
+OgaResult* result = OgaGeneratorParamsSetModelInput(params, "input_name", tensor);
 ```
 
 ---
 
 ### OgaGeneratorParamsSetInputs
 
-Sets named tensors as inputs.
-
-```c
-OgaGeneratorParamsSetInputs(params, named_tensors);
-```
-
----
-
-### OgaGeneratorParamsSetGuidance
-
-Sets guidance data.
+Sets named tensors as inputs for the generator.
 
 ```c
-OgaGeneratorParamsSetGuidance(params, "type", "data");
+OgaResult* result = OgaGeneratorParamsSetInputs(params, named_tensors);
 ```
 
 ---
@@ -484,7 +643,7 @@ Creates a generator from the given model and generator params.
 
 ```c
 OgaGenerator* generator = NULL;
-OgaCreateGenerator(model, params, &generator);
+OgaResult* result = OgaCreateGenerator(model, params, &generator);
 ```
 
 ---
@@ -501,7 +660,7 @@ OgaDestroyGenerator(generator);
 
 ### OgaGenerator_IsDone
 
-Checks if generation is complete.
+Checks if generation is complete (all sequences have reached termination conditions).
 
 ```c
 bool done = OgaGenerator_IsDone(generator);
@@ -509,62 +668,75 @@ bool done = OgaGenerator_IsDone(generator);
 
 ---
 
+### OgaGenerator_IsSessionTerminated
+
+Checks if the session is terminated.
+
+```c
+bool terminated = OgaGenerator_IsSessionTerminated(generator);
+```
+
+---
+
 ### OgaGenerator_AppendTokenSequences
 
-Appends token sequences to the generator.
+Appends token sequences to the generator for seeding generation.
 
 ```c
-OgaGenerator_AppendTokenSequences(generator, sequences);
+OgaResult* result = OgaGenerator_AppendTokenSequences(generator, sequences);
 ```
 
 ---
 
 ### OgaGenerator_AppendTokens
 
-Appends tokens to the generator.
+Appends individual tokens to the generator.
 
 ```c
-OgaGenerator_AppendTokens(generator, input_ids, input_ids_count);
+const int32_t input_ids[] = {1, 2, 3};
+OgaResult* result = OgaGenerator_AppendTokens(generator, input_ids, 3);
 ```
 
 ---
 
-### OgaGenerator_IsSessionTerminated
+### OgaGenerator_GenerateNextToken
 
-Checks if the session is terminated.
+Generates the next token. This performs one iteration of the generation loop.
 
 ```c
-bool terminated = OgaGenerator_IsSessionTerminated(generator);
+OgaResult* result = OgaGenerator_GenerateNextToken(generator);
 ```
 
 ---
 
-### OgaGenerator_GenerateNextToken
+### OgaGenerator_GetNextTokens
 
-Generates the next token.
+Returns the next tokens generated by the model. The count matches the batch size. The pointer is valid until the next `OgaGenerator` call.
 
 ```c
-OgaGenerator_GenerateNextToken(generator);
+const int32_t* tokens = NULL;
+size_t count = 0;
+OgaResult* result = OgaGenerator_GetNextTokens(generator, &tokens, &count);
 ```
 
 ---
 
 ### OgaGenerator_RewindTo
 
-Rewinds the sequence to a new length.
+Rewinds the sequence to a new length. This is useful when the user wants to rewind the generator to a specific length and continue generating from that point.
 
 ```c
-OgaGenerator_RewindTo(generator, new_length);
+OgaResult* result = OgaGenerator_RewindTo(generator, new_length);
 ```
 
 ---
 
 ### OgaGenerator_SetRuntimeOption
 
-Sets a runtime option.
+Sets a runtime option (e.g., `terminate_session`).
 
 ```c
-OgaGenerator_SetRuntimeOption(generator, "terminate_session", "1");
+OgaResult* result = OgaGenerator_SetRuntimeOption(generator, "terminate_session", "1");
 ```
 
 ---
@@ -581,7 +753,7 @@ size_t count = OgaGenerator_GetSequenceCount(generator, 0);
 
 ### OgaGenerator_GetSequenceData
 
-Returns a pointer to the sequence data at the given index.
+Returns a pointer to the sequence data at the given index. The sequence data is owned by the `OgaGenerator` and will be freed when it is destroyed. The caller must copy the data if it needs to be used after the generator is destroyed.
 
 ```c
 const int32_t* data = OgaGenerator_GetSequenceData(generator, 0);
@@ -589,44 +761,65 @@ const int32_t* data = OgaGenerator_GetSequenceData(generator, 0);
 
 ---
 
-### OgaGenerator_GetOutput
+### OgaGenerator_SetModelInput
 
-Gets a named output tensor.
+Sets an additional model input for advanced use cases.
 
 ```c
-OgaTensor* tensor = NULL;
-OgaGenerator_GetOutput(generator, "output_name", &tensor);
+OgaResult* result = OgaGenerator_SetModelInput(generator, "input_name", tensor);
 ```
 
 ---
 
-### OgaGenerator_GetLogits
+### OgaGenerator_SetInputs
 
-Gets the logits tensor.
+Sets named tensors as additional inputs.
 
 ```c
-OgaTensor* logits = NULL;
-OgaGenerator_GetLogits(generator, &logits);
+OgaResult* result = OgaGenerator_SetInputs(generator, named_tensors);
 ```
 
 ---
 
-### OgaGenerator_SetLogits
+### OgaGenerator_GetInput
 
-Sets the logits tensor.
+Returns a copy of the model input identified by the given name as an `OgaTensor` on CPU. The buffer is owned by the returned tensor and will be released when the tensor is destroyed.
 
 ```c
-OgaGenerator_SetLogits(generator, tensor);
+OgaTensor* input_tensor = NULL;
+OgaResult* result = OgaGenerator_GetInput(generator, "input_name", &input_tensor);
 ```
 
 ---
 
-### OgaSetActiveAdapter
+### OgaGenerator_GetOutput
 
-Sets the active adapter for the generator.
+Returns a copy of the model output identified by the given name as an `OgaTensor` on CPU. The buffer is owned by the returned tensor and will be released when the tensor is destroyed.
+
+```c
+OgaTensor* output_tensor = NULL;
+OgaResult* result = OgaGenerator_GetOutput(generator, "output_name", &output_tensor);
+```
+
+---
+
+### OgaGenerator_GetLogits
+
+Returns a copy of the logits from the model as an `OgaTensor` on CPU. The logits contain only the last token logits even during prompt processing. The buffer is owned by the returned tensor and will be released when it is destroyed.
 
 ```c
-OgaSetActiveAdapter(generator, adapters, "adapter_name");
+OgaTensor* logits = NULL;
+OgaResult* result = OgaGenerator_GetLogits(generator, &logits);
+```
+
+---
+
+### OgaGenerator_SetLogits
+
+Sets the logits for the generator. This is useful for guided generation. The tensor must have the same shape as the logits returned by `OgaGenerator_GetLogits`.
+
+```c
+OgaResult* result = OgaGenerator_SetLogits(generator, logits_tensor);
 ```
 
 ---
@@ -635,31 +828,51 @@ OgaSetActiveAdapter(generator, adapters, "adapter_name");
 
 ### OgaCreateAdapters
 
-Creates the object that manages the adapters.
+Creates the object that manages the adapters. Used to load all the model adapters with reference counting support.
 
 ```c
 OgaAdapters* adapters = NULL;
-OgaCreateAdapters(model, &adapters);
+OgaResult* result = OgaCreateAdapters(model, &adapters);
+```
+
+---
+
+### OgaDestroyAdapters
+
+Destroys the adapters object.
+
+```c
+OgaDestroyAdapters(adapters);
 ```
 
 ---
 
 ### OgaLoadAdapter
 
-Loads the model adapter from the given adapter file path and adapter name.
+Loads a model adapter from the given adapter file path and assigns it a unique name for later reference.
 
 ```c
-OgaLoadAdapter(adapters, "adapter_file_path", "adapter_name");
+OgaResult* result = OgaLoadAdapter(adapters, "adapter_file_path", "adapter_name");
 ```
 
 ---
 
 ### OgaUnloadAdapter
 
-Unloads the adapter with the given identifier.
+Unloads the adapter with the given name. Returns an error if the adapter is not found or is still in use.
 
 ```c
-OgaUnloadAdapter(adapters, "adapter_name");
+OgaResult* result = OgaUnloadAdapter(adapters, "adapter_name");
+```
+
+---
+
+### OgaSetActiveAdapter
+
+Sets the active adapter for the generator.
+
+```c
+OgaResult* result = OgaSetActiveAdapter(generator, adapters, "adapter_name");
 ```
 
 ---
@@ -668,11 +881,12 @@ OgaUnloadAdapter(adapters, "adapter_name");
 
 ### OgaCreateTensorFromBuffer
 
-Creates a tensor from a buffer.
+Creates a tensor from an optional user-owned buffer. If a user-owned buffer is supplied, the tensor does not own the memory, so the data must remain valid for the lifetime of the tensor. If the data pointer is `NULL`, the tensor will allocate its own memory.
 
 ```c
+int64_t shape[] = {1, 3, 224, 224};
 OgaTensor* tensor = NULL;
-OgaCreateTensorFromBuffer(data, shape_dims, shape_dims_count, element_type, &tensor);
+OgaResult* result = OgaCreateTensorFromBuffer(data, shape, 4, OgaElementType_float32, &tensor);
 ```
 
 ---
@@ -683,7 +897,7 @@ Returns the element type of the tensor.
 
 ```c
 OgaElementType type;
-OgaTensorGetType(tensor, &type);
+OgaResult* result = OgaTensorGetType(tensor, &type);
 ```
 
 ---
@@ -694,29 +908,29 @@ Returns the rank (number of dimensions) of the tensor.
 
 ```c
 size_t rank;
-OgaTensorGetShapeRank(tensor, &rank);
+OgaResult* result = OgaTensorGetShapeRank(tensor, &rank);
 ```
 
 ---
 
 ### OgaTensorGetShape
 
-Returns the shape of the tensor.
+Copies the shape dimensions into the provided array. The array size must match the rank returned by `OgaTensorGetShapeRank`.
 
 ```c
 int64_t shape[rank];
-OgaTensorGetShape(tensor, shape, rank);
+OgaResult* result = OgaTensorGetShape(tensor, shape, rank);
 ```
 
 ---
 
 ### OgaTensorGetData
 
-Returns a pointer to the tensor data.
+Returns a pointer to the tensor data. The pointer should be cast to the actual data type of the tensor.
 
 ```c
 void* data = NULL;
-OgaTensorGetData(tensor, &data);
+OgaResult* result = OgaTensorGetData(tensor, &data);
 ```
 
 ---
@@ -733,15 +947,26 @@ OgaDestroyTensor(tensor);
 
 ## Images and Audios API
 
+### OgaLoadImage
+
+Loads a single image from a file path.
+
+```c
+OgaImages* images = NULL;
+OgaResult* result = OgaLoadImage("image_path.jpg", &images);
+```
+
+---
+
 ### OgaLoadImages
 
-Loads images from file paths.
+Loads multiple images from file paths.
 
 ```c
 OgaStringArray* image_paths = NULL;
 OgaCreateStringArrayFromStrings(paths, count, &image_paths);
 OgaImages* images = NULL;
-OgaLoadImages(image_paths, &images);
+OgaResult* result = OgaLoadImages(image_paths, &images);
 OgaDestroyStringArray(image_paths);
 ```
 
@@ -749,11 +974,11 @@ OgaDestroyStringArray(image_paths);
 
 ### OgaLoadImagesFromBuffers
 
-Loads images from memory buffers.
+Loads multiple images from memory buffers.
 
 ```c
 OgaImages* images = NULL;
-OgaLoadImagesFromBuffers(image_data, image_sizes, count, &images);
+OgaResult* result = OgaLoadImagesFromBuffers(image_data, image_sizes, count, &images);
 ```
 
 ---
@@ -768,15 +993,26 @@ OgaDestroyImages(images);
 
 ---
 
+### OgaLoadAudio
+
+Loads a single audio from a file path.
+
+```c
+OgaAudios* audios = NULL;
+OgaResult* result = OgaLoadAudio("audio_path.wav", &audios);
+```
+
+---
+
 ### OgaLoadAudios
 
-Loads audios from file paths.
+Loads multiple audios from file paths.
 
 ```c
 OgaStringArray* audio_paths = NULL;
 OgaCreateStringArrayFromStrings(paths, count, &audio_paths);
 OgaAudios* audios = NULL;
-OgaLoadAudios(audio_paths, &audios);
+OgaResult* result = OgaLoadAudios(audio_paths, &audios);
 OgaDestroyStringArray(audio_paths);
 ```
 
@@ -784,11 +1020,11 @@ OgaDestroyStringArray(audio_paths);
 
 ### OgaLoadAudiosFromBuffers
 
-Loads audios from memory buffers.
+Loads multiple audios from memory buffers.
 
 ```c
 OgaAudios* audios = NULL;
-OgaLoadAudiosFromBuffers(audio_data, audio_sizes, count, &audios);
+OgaResult* result = OgaLoadAudiosFromBuffers(audio_data, audio_sizes, count, &audios);
 ```
 
 ---
@@ -811,7 +1047,7 @@ Creates a named tensors object.
 
 ```c
 OgaNamedTensors* named_tensors = NULL;
-OgaCreateNamedTensors(&named_tensors);
+OgaResult* result = OgaCreateNamedTensors(&named_tensors);
 ```
 
 ---
@@ -822,7 +1058,7 @@ Gets a tensor by name.
 
 ```c
 OgaTensor* tensor = NULL;
-OgaNamedTensorsGet(named_tensors, "input_name", &tensor);
+OgaResult* result = OgaNamedTensorsGet(named_tensors, "input_name", &tensor);
 ```
 
 ---
@@ -832,7 +1068,7 @@ OgaNamedTensorsGet(named_tensors, "input_name", &tensor);
 Sets a tensor by name.
 
 ```c
-OgaNamedTensorsSet(named_tensors, "input_name", tensor);
+OgaResult* result = OgaNamedTensorsSet(named_tensors, "input_name", tensor);
 ```
 
 ---
@@ -842,7 +1078,7 @@ OgaNamedTensorsSet(named_tensors, "input_name", tensor);
 Deletes a tensor by name.
 
 ```c
-OgaNamedTensorsDelete(named_tensors, "input_name");
+OgaResult* result = OgaNamedTensorsDelete(named_tensors, "input_name");
 ```
 
 ---
@@ -853,18 +1089,18 @@ Returns the number of named tensors.
 
 ```c
 size_t count = 0;
-OgaNamedTensorsCount(named_tensors, &count);
+OgaResult* result = OgaNamedTensorsCount(named_tensors, &count);
 ```
 
 ---
 
 ### OgaNamedTensorsGetNames
 
-Gets the names of all tensors.
+Returns an `OgaStringArray` containing the names of all tensors. Must be freed with `OgaDestroyStringArray`.
 
 ```c
 OgaStringArray* names = NULL;
-OgaNamedTensorsGetNames(named_tensors, &names);
+OgaResult* result = OgaNamedTensorsGetNames(named_tensors, &names);
 OgaDestroyStringArray(names);
 ```
 
@@ -880,34 +1116,266 @@ OgaDestroyNamedTensors(named_tensors);
 
 ---
 
-## Utility Functions
+## Multi-Modal Processing API
 
-### OgaSetLogBool
+### OgaCreateMultiModalProcessor
 
-Sets a boolean logging option.
+Creates a multi-modal processor for the given model.
 
 ```c
-OgaSetLogBool("option_name", true);
+OgaMultiModalProcessor* processor = NULL;
+OgaResult* result = OgaCreateMultiModalProcessor(model, &processor);
 ```
 
 ---
 
-### OgaSetLogString
+### OgaDestroyMultiModalProcessor
+
+Destroys the multi-modal processor.
+
+```c
+OgaDestroyMultiModalProcessor(processor);
+```
+
+---
+
+### OgaProcessorProcessImages
+
+Processes images with an input prompt.
+
+```c
+OgaNamedTensors* input_tensors = NULL;
+OgaResult* result = OgaProcessorProcessImages(processor, "prompt text", images, &input_tensors);
+```
+
+---
+
+### OgaProcessorProcessImagesAndPrompts
+
+Processes images with multiple input prompts.
+
+```c
+OgaNamedTensors* input_tensors = NULL;
+OgaResult* result = OgaProcessorProcessImagesAndPrompts(processor, prompts, images, &input_tensors);
+```
+
+---
+
+### OgaProcessorProcessAudios
+
+Processes audios with an input prompt.
+
+```c
+OgaNamedTensors* input_tensors = NULL;
+OgaResult* result = OgaProcessorProcessAudios(processor, "prompt text", audios, &input_tensors);
+```
+
+---
+
+### OgaProcessorProcessAudiosAndPrompts
+
+Processes audios with multiple input prompts.
+
+```c
+OgaNamedTensors* input_tensors = NULL;
+OgaResult* result = OgaProcessorProcessAudiosAndPrompts(processor, prompts, audios, &input_tensors);
+```
+
+---
+
+### OgaProcessorProcessImagesAndAudios
+
+Processes images and/or audios with an input prompt.
+
+```c
+OgaNamedTensors* input_tensors = NULL;
+OgaResult* result = OgaProcessorProcessImagesAndAudios(processor, "prompt", images, audios, &input_tensors);
+```
+
+---
 
-Sets a string logging option.
+### OgaProcessorProcessImagesAndAudiosAndPrompts
+
+Processes images and/or audios with multiple input prompts.
+
+```c
+OgaNamedTensors* input_tensors = NULL;
+OgaResult* result = OgaProcessorProcessImagesAndAudiosAndPrompts(processor, prompts, images, audios, &input_tensors);
+```
+
+---
+
+### OgaProcessorDecode
+
+Decodes a sequence of tokens using the multi-modal processor.
+
+```c
+const char* out_string = NULL;
+OgaResult* result = OgaProcessorDecode(processor, tokens, token_count, &out_string);
+OgaDestroyString(out_string);
+```
+
+---
+
+
+## Engine and Request API
+
+### OgaCreateEngine
+
+Creates an engine from the given model. The engine is responsible for managing and scheduling multiple requests, executing model inference, and coordinating batching, caching, and resource management for efficient processing.
+
+```c
+OgaEngine* engine = NULL;
+OgaResult* result = OgaCreateEngine(model, &engine);
+```
+
+---
+
+### OgaDestroyEngine
+
+Destroys the given engine.
+
+```c
+OgaDestroyEngine(engine);
+```
+
+---
+
+### OgaEngineStep
+
+Runs one step of the engine if there are pending requests. Returns a request that has been processed and is ready to be queried for results. This function should be called repeatedly to process all requests.
+
+```c
+OgaRequest* ready_request = NULL;
+OgaResult* result = OgaEngineStep(engine, &ready_request);
+```
+
+---
+
+### OgaEngineHasPendingRequests
+
+Checks if the engine has any pending requests to process.
+
+```c
+bool has_pending = false;
+OgaResult* result = OgaEngineHasPendingRequests(engine, &has_pending);
+```
+
+---
+
+### OgaEngineAddRequest
+
+Adds a request to the engine for processing. The request will be processed in subsequent calls to `OgaEngineStep`.
+
+```c
+OgaResult* result = OgaEngineAddRequest(engine, request);
+```
+
+---
+
+### OgaEngineRemoveRequest
+
+Removes a request from the engine.
 
 ```c
-OgaSetLogString("option_name", "value");
+OgaResult* result = OgaEngineRemoveRequest(engine, request);
 ```
 
 ---
 
+### OgaCreateRequest
+
+Creates a new request for the engine with the specified generator parameters.
+
+```c
+OgaRequest* request = NULL;
+OgaResult* result = OgaCreateRequest(params, &request);
+```
+
+---
+
+### OgaDestroyRequest
+
+Destroys the given request and cleans up its resources.
+
+```c
+OgaDestroyRequest(request);
+```
+
+---
+
+### OgaRequestAddTokens
+
+Adds input sequences to the request, which are used to seed the generation process.
+
+```c
+OgaResult* result = OgaRequestAddTokens(request, tokens);
+```
+
+---
+
+### OgaRequestSetOpaqueData
+
+Sets custom user data on the request that is opaque to the engine and can be retrieved later.
+
+```c
+OgaResult* result = OgaRequestSetOpaqueData(request, user_data_pointer);
+```
+
+---
+
+### OgaRequestGetOpaqueData
+
+Retrieves the custom user data previously set on the request using `OgaRequestSetOpaqueData`.
+
+```c
+void* opaque_data = NULL;
+OgaResult* result = OgaRequestGetOpaqueData(request, &opaque_data);
+```
+
+---
+
+### OgaRequestHasUnseenTokens
+
+Checks if the request has any unseen tokens that have not yet been queried.
+
+```c
+bool has_unseen = false;
+OgaResult* result = OgaRequestHasUnseenTokens(request, &has_unseen);
+```
+
+---
+
+### OgaRequestGetUnseenToken
+
+Retrieves the next unseen token from the request. Unseen tokens are those generated by the model but not yet queried.
+
+```c
+int32_t token = 0;
+OgaResult* result = OgaRequestGetUnseenToken(request, &token);
+```
+
+---
+
+### OgaRequestIsDone
+
+Checks if the request has finished processing. A request is done when one of the termination conditions has been reached.
+
+```c
+bool done = false;
+OgaResult* result = OgaRequestIsDone(request, &done);
+```
+
+---
+
+## GPU Device Management
+
 ### OgaSetCurrentGpuDeviceId
 
 Sets the current GPU device ID.
 
 ```c
-OgaSetCurrentGpuDeviceId(0);
+OgaResult* result = OgaSetCurrentGpuDeviceId(0);
 ```
 
 ---
@@ -918,14 +1386,81 @@ Gets the current GPU device ID.
 
 ```c
 int device_id = 0;
-OgaGetCurrentGpuDeviceId(&device_id);
+OgaResult* result = OgaGetCurrentGpuDeviceId(&device_id);
+```
+
+---
+
+## Execution Provider Registration
+
+### OgaRegisterExecutionProviderLibrary
+
+Registers an execution provider library with ONNX Runtime.
+
+```c
+OgaRegisterExecutionProviderLibrary("registration_name", "/path/to/provider_library.so");
+```
+
+---
+
+### OgaUnregisterExecutionProviderLibrary
+
+Unregisters an execution provider library from ONNX Runtime.
+
+```c
+OgaUnregisterExecutionProviderLibrary("registration_name");
+```
+
+---
+
+## Global API and Utilities
+
+### OgaShutdown
+
+Cleanly shutdown the genai library and its ONNX Runtime usage on process exit.
+
+```c
+OgaShutdown();
+```
+
+---
+
+### OgaSetLogBool
+
+Control the logging behavior of the library by setting boolean logging options.
+
+```c
+OgaResult* result = OgaSetLogBool("option_name", true);
+```
+
+---
+
+### OgaSetLogString
+
+Control the logging behavior of the library by setting string logging options. If the option name is `"filename"` and a valid file path is provided, logging will be directed to that file. An empty string will reset logging to the default destination (std::cerr).
+
+```c
+OgaResult* result = OgaSetLogString("filename", "/path/to/logfile.txt");
+```
+
+---
+
+### OgaSetLogCallback
+
+Register a callback function to receive log messages from the library.
+
+```c
+void log_callback(const char* string, size_t length) {
+  // Handle log message
+}
+OgaResult* result = OgaSetLogCallback(log_callback);
 ```
 
 ---
 
 ### OgaResultGetError
 
-Gets the error message from an OgaResult.
+Gets the error message from an `OgaResult`.
 
 ```c
 const char* error = OgaResultGetError(result);
@@ -935,7 +1470,7 @@ const char* error = OgaResultGetError(result);
 
 ### OgaDestroyResult
 
-Destroys an OgaResult.
+Destroys an `OgaResult`.
 
 ```c
 OgaDestroyResult(result);
@@ -953,53 +1488,69 @@ OgaDestroyString(str);
 
 ---
 
-### OgaDestroyBuffer
+## String Array API
+
+### OgaCreateStringArray
 
-Destroys a buffer.
+Creates an empty string array.
 
 ```c
-OgaDestroyBuffer(buffer);
+OgaStringArray* string_array = NULL;
+OgaResult* result = OgaCreateStringArray(&string_array);
 ```
 
 ---
 
-### OgaBufferGetType
+### OgaCreateStringArrayFromStrings
 
-Gets the type of the buffer.
+Creates a string array from an array of strings.
 
 ```c
-OgaDataType type = OgaBufferGetType(buffer);
+const char* strs[] = {"string1", "string2", "string3"};
+OgaStringArray* string_array = NULL;
+OgaResult* result = OgaCreateStringArrayFromStrings(strs, 3, &string_array);
 ```
 
 ---
 
-### OgaBufferGetDimCount
+### OgaDestroyStringArray
 
-Gets the number of dimensions of a buffer.
+Destroys the string array.
 
 ```c
-size_t dim_count = OgaBufferGetDimCount(buffer);
+OgaDestroyStringArray(string_array);
 ```
 
 ---
 
-### OgaBufferGetDims
+### OgaStringArrayAddString
 
-Gets the dimensions of a buffer.
+Adds a string to the string array.
 
 ```c
-size_t dims[dim_count];
-OgaBufferGetDims(buffer, dims, dim_count);
+OgaResult* result = OgaStringArrayAddString(string_array, "new_string");
 ```
 
 ---
 
-### OgaBufferGetData
+### OgaStringArrayGetCount
 
-Gets the data from a buffer.
+Gets the number of strings in the string array.
 
 ```c
-const void* data = OgaBufferGetData(buffer);
+size_t count = 0;
+OgaResult* result = OgaStringArrayGetCount(string_array, &count);
 ```
 
----
\ No newline at end of file
+---
+
+### OgaStringArrayGetString
+
+Gets a string from the string array at the given index.
+
+```c
+const char* str = NULL;
+OgaResult* result = OgaStringArrayGetString(string_array, 0, &str);
+```
+
+---
diff --git a/docs/genai/api/cpp.md b/docs/genai/api/cpp.md
index 67e9c48324246..b10ba4d5e5a5f 100644
--- a/docs/genai/api/cpp.md
+++ b/docs/genai/api/cpp.md
@@ -109,6 +109,86 @@ config->Overlay("{\"option\": \"value\"}");
 
 ---
 
+### AddModelData
+
+Adds in-memory model data that should be loaded with the configuration.
+
+```cpp
+config->AddModelData("model.onnx", model_bytes.data(), model_bytes.size());
+```
+
+---
+
+### RemoveModelData
+
+Removes previously added in-memory model data.
+
+```cpp
+config->RemoveModelData("model.onnx");
+```
+
+---
+
+### SetDecoderProviderOptionsHardwareDeviceType
+
+Specifies the hardware device type for decoder provider options.
+
+```cpp
+config->SetDecoderProviderOptionsHardwareDeviceType("CUDAExecutionProvider", "gpu");
+```
+
+---
+
+### SetDecoderProviderOptionsHardwareDeviceId
+
+Sets the hardware device ID for decoder provider options.
+
+```cpp
+config->SetDecoderProviderOptionsHardwareDeviceId("CUDAExecutionProvider", 0);
+```
+
+---
+
+### SetDecoderProviderOptionsHardwareVendorId
+
+Sets the hardware vendor ID for decoder provider options.
+
+```cpp
+config->SetDecoderProviderOptionsHardwareVendorId("CUDAExecutionProvider", 0);
+```
+
+---
+
+### ClearDecoderProviderOptionsHardwareDeviceType
+
+Clears any decoder provider hardware device type override.
+
+```cpp
+config->ClearDecoderProviderOptionsHardwareDeviceType("CUDAExecutionProvider");
+```
+
+---
+
+### ClearDecoderProviderOptionsHardwareDeviceId
+
+Clears any decoder provider hardware device ID override.
+
+```cpp
+config->ClearDecoderProviderOptionsHardwareDeviceId("CUDAExecutionProvider");
+```
+
+---
+
+### ClearDecoderProviderOptionsHardwareVendorId
+
+Clears any decoder provider hardware vendor ID override.
+
+```cpp
+config->ClearDecoderProviderOptionsHardwareVendorId("CUDAExecutionProvider");
+```
+
+---
+
 ## OgaRuntimeSettings
 
 ### Create
@@ -143,6 +223,48 @@ auto tokenizer = OgaTokenizer::Create(*model);
 
 ---
 
+### UpdateOptions
+
+Updates tokenizer options using key/value pairs.
+
+```cpp
+const char* keys[] = {"padding_side"};
+const char* values[] = {"left"};
+tokenizer->UpdateOptions(keys, values, 1);
+```
+
+---
+
+### GetBosTokenId
+
+Gets the beginning-of-sequence token ID.
+
+```cpp
+int32_t bos_id = tokenizer->GetBosTokenId();
+```
+
+---
+
+### GetEosTokenIds
+
+Gets the configured end-of-sequence token IDs.
+
+```cpp
+auto eos_ids = tokenizer->GetEosTokenIds();
+```
+
+---
+
+### GetPadTokenId
+
+Gets the padding token ID.
+
+```cpp
+int32_t pad_id = tokenizer->GetPadTokenId();
+```
+
+---
+
 ### Encode
 
 Encodes a string and adds the encoded sequence of tokens to the provided OgaSequences.
@@ -213,6 +335,7 @@ Creates a tokenizer stream for incremental decoding.
 
 ```cpp
 auto stream = OgaTokenizerStream::Create(*tokenizer);
+auto stream_from_processor = OgaTokenizerStream::Create(*processor);
 ```
 
 ---
@@ -312,44 +435,54 @@ params->SetSearchOptionBool("do_sample", true);
 
 ---
 
-### SetModelInput
+### TryGraphCaptureWithMaxBatchSize
 
-Sets an additional model input.
+Deprecated helper to request graph capture with a maximum batch size.
 
 ```cpp
-params->SetModelInput("input_name", *tensor);
+params->TryGraphCaptureWithMaxBatchSize(4);
 ```
 
 ---
 
-### SetInputs
+### SetGuidance
 
-Sets named tensors as inputs.
+Sets guidance data, optionally enabling forced-first tokens.
 
 ```cpp
-params->SetInputs(*named_tensors);
+params->SetGuidance("type", "data", /*enable_ff_tokens*/ true);
 ```
 
 ---
 
-### SetGuidance
+## OgaGenerator
+
+### Create
 
-Sets guidance data.
+Creates a generator from the given model and parameters.
 
 ```cpp
-params->SetGuidance("type", "data");
+auto generator = OgaGenerator::Create(*model, *params);
 ```
 
 ---
 
-## OgaGenerator
+### SetModelInput
 
-### Create
+Sets an additional model input tensor.
 
-Creates a generator from the given model and parameters.
+```cpp
+generator->SetModelInput("input_name", *tensor);
+```
+
+---
+
+### SetInputs
+
+Sets multiple named tensors as inputs in one call.
 
 ```cpp
-auto generator = OgaGenerator::Create(*model, *params);
+generator->SetInputs(*named_tensors);
 ```
 
 ---
@@ -404,6 +537,16 @@ generator->GenerateNextToken();
 
 ---
 
+### GetNextTokens
+
+Retrieves the token IDs produced by the last generation step.
+
+```cpp
+auto next_tokens = generator->GetNextTokens();
+```
+
+---
+
 ### RewindTo
 
 Rewinds the sequence to a new length.
@@ -444,6 +587,16 @@ const int32_t* data = generator->GetSequenceData(0);
 
 ---
 
+### GetInput
+
+Gets a named input tensor.
+
+```cpp
+auto input = generator->GetInput("input_name");
+```
+
+---
+
 ### GetOutput
 
 Gets a named output tensor.
@@ -492,6 +645,10 @@ Creates a tensor from a buffer.
 
 ```cpp
 auto tensor = OgaTensor::Create(data, shape, shape_dims_count, element_type);
+
+// With typed data and shape vector (C++20 span overloads are used when available)
+std::vector<int64_t> shape_vec = {1, 3};
+auto typed_tensor = OgaTensor::Create<float>(float_data, shape_vec);
 ```
 
 ---
@@ -650,6 +807,112 @@ adapters->UnloadAdapter("adapter_name");
 
 ---
 
+## OgaRequest
+
+### Create
+
+Creates a request wrapper for incremental decoding.
+
+```cpp
+auto request = OgaRequest::Create(*params);
+```
+
+---
+
+### AddTokens
+
+Adds the initial token sequences to the request.
+
+```cpp
+request->AddTokens(*sequences);
+```
+
+---
+
+### IsDone
+
+Checks whether the request has finished generating.
+
+```cpp
+bool done = request->IsDone();
+```
+
+---
+
+### HasUnseenTokens
+
+Indicates whether the request has unseen tokens to consume.
+
+```cpp
+bool has_unseen = request->HasUnseenTokens();
+```
+
+---
+
+### GetUnseenToken
+
+Retrieves the next unseen token from the request.
+
+```cpp
+int32_t token = request->GetUnseenToken();
+```
+
+---
+
+### SetOpaqueData / GetOpaqueData
+
+Stores and retrieves arbitrary user data associated with the request.
+
+```cpp
+request->SetOpaqueData(user_ptr);
+void* data = request->GetOpaqueData();
+```
+
+---
+
+## OgaEngine
+
+### Create
+
+Creates an engine that manages multiple requests.
+
+```cpp
+auto engine = OgaEngine::Create(*model);
+```
+
+---
+
+### HasPendingRequests
+
+Checks if the engine has requests waiting to be processed.
+
+```cpp
+bool pending = engine->HasPendingRequests();
+```
+
+---
+
+### Add / Remove
+
+Adds or removes a request from the engine.
+
+```cpp
+engine->Add(*request);
+engine->Remove(*request);
+```
+
+---
+
+### Step
+
+Advances the engine one step and returns a request that has new tokens, if any.
+
+```cpp
+auto completed_request = engine->Step();
+```
+
+---
+
 ## OgaMultiModalProcessor
 
 ### Create
@@ -672,12 +935,34 @@ auto named_tensors = processor->ProcessImages("prompt", images.get());
 
 ---
 
+### ProcessImages (multiple prompts)
+
+Processes images with a batch of prompts.
+
+```cpp
+std::vector<const char*> prompts = {"first prompt", "second prompt"};
+auto named_tensors = processor->ProcessImages(prompts, images.get());
+```
+
+---
+
 ### ProcessAudios
 
 Processes audios and returns named tensors.
 
 ```cpp
-auto named_tensors = processor->ProcessAudios(audios.get());
+auto named_tensors = processor->ProcessAudios("prompt", audios.get());
+```
+
+---
+
+### ProcessAudios (multiple prompts)
+
+Processes audios with a batch of prompts.
+
+```cpp
+std::vector<const char*> prompts = {"first prompt", "second prompt"};
+auto named_tensors = processor->ProcessAudios(prompts, audios.get());
 ```
 
 ---
@@ -692,6 +977,17 @@ auto named_tensors = processor->ProcessImagesAndAudios("prompt", images.get(), a
 
 ---
 
+### ProcessImagesAndAudios (multiple prompts)
+
+Processes images and audios with a batch of prompts.
+
+```cpp
+std::vector<const char*> prompts = {"first prompt", "second prompt"};
+auto named_tensors = processor->ProcessImagesAndAudios(prompts, images.get(), audios.get());
+```
+
+---
+
 ### Decode
 
 Decodes a sequence of tokens into a string.
@@ -736,6 +1032,18 @@ Oga::SetLogString("option_name", "value");
 
 ---
 
+### SetLogCallback
+
+Registers a callback for log messages.
+
+```cpp
+Oga::SetLogCallback([](const char* msg, size_t len) {
+	fwrite(msg, 1, len, stdout);
+});
+```
+
+---
+
 ### SetCurrentGpuDeviceId
 
 Sets the current GPU device ID.
diff --git a/docs/genai/api/csharp.md b/docs/genai/api/csharp.md
index 3f130538b0975..ab5ab3d8f56c2 100644
--- a/docs/genai/api/csharp.md
+++ b/docs/genai/api/csharp.md
@@ -18,8 +18,7 @@ _Note: this API is in preview and is subject to change._
 
 ## Overview
 
-This document describes the C# API for ONNX Runtime GenAI.  
-Below are the main classes and methods, with code snippets and descriptions for each.
+This document describes the C# API for ONNX Runtime GenAI.
 
 ---
 
@@ -27,20 +26,16 @@ Below are the main classes and methods, with code snippets and descriptions for
 
 ### Constructor
 
-Initializes a new model from the given model path.
+Initialize a new model from the given model path.
 
 ```csharp
 public Model(string modelPath)
 ```
 
----
-
-### Generate
-
-Generates output sequences using the provided generator parameters.
+Initialize a new model from an existing configuration.
 
 ```csharp
-public Sequences Generate(GeneratorParams generatorParams)
+public Model(Config config)
 ```
 
 ---
@@ -49,7 +44,7 @@ public Sequences Generate(GeneratorParams generatorParams)
 
 ### Constructor
 
-Initializes a new configuration object from a config path.
+Initialize a new configuration object from a config path.
 
 ```csharp
 public Config(string configPath)
@@ -59,7 +54,7 @@ public Config(string configPath)
 
 ### ClearProviders
 
-Clears all providers from the configuration.
+Clear all providers from the configuration.
 
 ```csharp
 public void ClearProviders()
@@ -69,7 +64,7 @@ public void ClearProviders()
 
 ### AppendProvider
 
-Appends a provider to the configuration.
+Append a provider to the configuration.
 
 ```csharp
 public void AppendProvider(string provider)
@@ -79,7 +74,7 @@ public void AppendProvider(string provider)
 
 ### SetProviderOption
 
-Sets a provider option in the configuration.
+Set a provider option in the configuration.
 
 ```csharp
 public void SetProviderOption(string provider, string name, string value)
@@ -87,12 +82,82 @@ public void SetProviderOption(string provider, string name, string value)
 
 ---
 
-### Overlay
+### AddModelData
+
+Add in-memory model data to the configuration.
+
+```csharp
+public void AddModelData(string modelFilename, byte[] modelData)
+```
+
+---
+
+### RemoveModelData
+
+Remove model data that was previously added.
+
+```csharp
+public void RemoveModelData(string modelFilename)
+```
+
+---
+
+### SetDecoderProviderOptionsHardwareDeviceType
+
+Set the decoder hardware device type for a provider.
+
+```csharp
+public void SetDecoderProviderOptionsHardwareDeviceType(string provider, string hardware_device_type)
+```
+
+---
+
+### SetDecoderProviderOptionsHardwareDeviceId
+
+Set the decoder hardware device ID for a provider.
+
+```csharp
+public void SetDecoderProviderOptionsHardwareDeviceId(string provider, uint hardware_device_id)
+```
+
+---
+
+### SetDecoderProviderOptionsHardwareVendorId
+
+Set the decoder hardware vendor ID for a provider.
+
+```csharp
+public void SetDecoderProviderOptionsHardwareVendorId(string provider, uint hardware_vendor_id)
+```
+
+---
+
+### ClearDecoderProviderOptionsHardwareDeviceType
+
+Clear the decoder hardware device type setting for a provider.
+
+```csharp
+public void ClearDecoderProviderOptionsHardwareDeviceType(string provider)
+```
+
+---
+
+### ClearDecoderProviderOptionsHardwareDeviceId
 
-Overlays a JSON string onto the configuration.
+Clear the decoder hardware device ID setting for a provider.
 
 ```csharp
-public void Overlay(string json)
+public void ClearDecoderProviderOptionsHardwareDeviceId(string provider)
+```
+
+---
+
+### ClearDecoderProviderOptionsHardwareVendorId
+
+Clear the decoder hardware vendor ID setting for a provider.
+
+```csharp
+public void ClearDecoderProviderOptionsHardwareVendorId(string provider)
 ```
 
 ---
@@ -101,7 +166,7 @@ public void Overlay(string json)
 
 ### Constructor
 
-Initializes a tokenizer for the given model.
+Initialize a tokenizer for the given model.
 
 ```csharp
 public Tokenizer(Model model)
@@ -111,7 +176,7 @@ public Tokenizer(Model model)
 
 ### Encode
 
-Encodes a string and returns the encoded sequences.
+Encode a string and return the encoded sequences.
 
 ```csharp
 public Sequences Encode(string str)
@@ -121,7 +186,7 @@ public Sequences Encode(string str)
 
 ### EncodeBatch
 
-Encodes a batch of strings and returns the encoded sequences.
+Encode a batch of strings and return the encoded sequences.
 
 ```csharp
 public Sequences EncodeBatch(string[] strings)
@@ -131,7 +196,7 @@ public Sequences EncodeBatch(string[] strings)
 
 ### Decode
 
-Decodes a sequence of tokens into a string.
+Decode a sequence of tokens into a string.
 
 ```csharp
 public string Decode(ReadOnlySpan<int> sequence)
@@ -141,7 +206,7 @@ public string Decode(ReadOnlySpan<int> sequence)
 
 ### DecodeBatch
 
-Decodes a batch of sequences into an array of strings.
+Decode a batch of sequences into an array of strings.
 
 ```csharp
 public string[] DecodeBatch(Sequences sequences)
@@ -149,9 +214,19 @@ public string[] DecodeBatch(Sequences sequences)
 
 ---
 
+### UpdateOptions
+
+Update tokenizer options in bulk.
+
+```csharp
+public void UpdateOptions(Dictionary<string, string> options)
+```
+
+---
+
 ### ApplyChatTemplate
 
-Applies a chat template to messages and tools.
+Apply a chat template to messages and tools.
 
 ```csharp
 public string ApplyChatTemplate(string template, string messages, string tools, bool addGenerationPrompt)
@@ -159,9 +234,39 @@ public string ApplyChatTemplate(string template, string messages, string tools,
 
 ---
 
+### GetBosTokenId
+
+Return the beginning-of-sequence token ID.
+
+```csharp
+public int GetBosTokenId()
+```
+
+---
+
+### GetEosTokenIds
+
+Return the end-of-sequence token IDs.
+
+```csharp
+public ReadOnlySpan<int> GetEosTokenIds()
+```
+
+---
+
+### GetPadTokenId
+
+Return the padding token ID.
+
+```csharp
+public int GetPadTokenId()
+```
+
+---
+
 ### CreateStream
 
-Creates a tokenizer stream for incremental decoding.
+Create a tokenizer stream for incremental decoding.
 
 ```csharp
 public TokenizerStream CreateStream()
@@ -173,7 +278,7 @@ public TokenizerStream CreateStream()
 
 ### Decode
 
-Decodes a single token in the stream and returns the generated string chunk.
+Decode a single token in the stream and return the generated string chunk.
 
 ```csharp
 public string Decode(int token)
@@ -185,7 +290,7 @@ public string Decode(int token)
 
 ### Constructor
 
-Initializes generator parameters for the given model.
+Initialize generator parameters for the given model.
 
 ```csharp
 public GeneratorParams(Model model)
@@ -195,7 +300,7 @@ public GeneratorParams(Model model)
 
 ### SetSearchOption (double)
 
-Sets a numeric search option.
+Set a numeric search option.
 
 ```csharp
 public void SetSearchOption(string searchOption, double value)
@@ -205,7 +310,7 @@ public void SetSearchOption(string searchOption, double value)
 
 ### SetSearchOption (bool)
 
-Sets a boolean search option.
+Set a boolean search option.
 
 ```csharp
 public void SetSearchOption(string searchOption, bool value)
@@ -215,7 +320,7 @@ public void SetSearchOption(string searchOption, bool value)
 
 ### TryGraphCaptureWithMaxBatchSize
 
-Attempts to enable graph capture mode with a maximum batch size.
+Attempt to enable graph capture mode with a maximum batch size (deprecated; logs a warning).
 
 ```csharp
 public void TryGraphCaptureWithMaxBatchSize(int maxBatchSize)
@@ -223,29 +328,41 @@ public void TryGraphCaptureWithMaxBatchSize(int maxBatchSize)
 
 ---
 
-### SetInputIDs
+### SetGuidance
+
+Configure guided generation behavior.
+
+```csharp
+public void SetGuidance(string type, string data, bool enableFFTokens = false)
+```
+
+---
+
+## Generator class
+
+### Constructor
 
-Sets the input IDs for the generator parameters.
+Initialize a generator from the given model and generator parameters.
 
 ```csharp
-public void SetInputIDs(ReadOnlySpan<int> inputIDs, ulong sequenceLength, ulong batchSize)
+public Generator(Model model, GeneratorParams generatorParams)
 ```
 
 ---
 
-### SetInputSequences
+### IsDone
 
-Sets the input sequences for the generator parameters.
+Check if generation is complete.
 
 ```csharp
-public void SetInputSequences(Sequences sequences)
+public bool IsDone()
 ```
 
 ---
 
 ### SetModelInput
 
-Sets an additional model input.
+Set a named model input tensor.
 
 ```csharp
 public void SetModelInput(string name, Tensor value)
@@ -253,41 +370,39 @@ public void SetModelInput(string name, Tensor value)
 
 ---
 
-## Generator class
+### SetInputs
 
-### Constructor
-
-Initializes a generator from the given model and generator parameters.
+Set multiple model inputs at once.
 
 ```csharp
-public Generator(Model model, GeneratorParams generatorParams)
+public void SetInputs(NamedTensors namedTensors)
 ```
 
 ---
 
-### IsDone
+### AppendTokens
 
-Checks if generation is complete.
+Append token IDs to the active sequence.
 
 ```csharp
-public bool IsDone()
+public void AppendTokens(ReadOnlySpan<int> inputIDs)
 ```
 
 ---
 
-### ComputeLogits
+### AppendTokenSequences
 
-Computes the logits for the current state.
+Append pre-built sequences.
 
 ```csharp
-public void ComputeLogits()
+public void AppendTokenSequences(Sequences sequences)
 ```
 
 ---
 
 ### GenerateNextToken
 
-Generates the next token.
+Generate the next token.
 
 ```csharp
 public void GenerateNextToken()
@@ -295,57 +410,74 @@ public void GenerateNextToken()
 
 ---
 
-### GetSequence
+### RewindTo
 
-Returns the generated sequence at the given index.
+Rewind the generator to a specified length.
 
 ```csharp
-public ReadOnlySpan<int> GetSequence(ulong index)
+public void RewindTo(ulong newLength)
 ```
 
 ---
 
-### SetActiveAdapter
+### GetNextTokens
 
-Sets the active adapter on this Generator instance.
+Return the tokens generated in the most recent step.
 
 ```csharp
-public void SetActiveAdapter(Adapters adapters, string adapterName)
+public ReadOnlySpan<int> GetNextTokens()
 ```
 
-**Parameters**
+---
 
-- `adapters`: the previously created `Adapters` object
-- `adapterName`: the name of the adapter to activate
+### GetSequence
+
+Return the generated sequence at the given index.
+
+```csharp
+public ReadOnlySpan<int> GetSequence(ulong index)
+```
 
-**Return value**
+---
 
-`void`
+### GetInput
 
-**Exception**
+Retrieve an input tensor by name.
 
-Throws on error.
+```csharp
+public Tensor GetInput(string inputName)
+```
 
 ---
 
-## Result class
+### GetOutput
+
+Retrieve an output tensor by name.
+
+```csharp
+public Tensor GetOutput(string outputName)
+```
+
+---
 
-### Error
+### SetActiveAdapter
 
-Gets the error message from a failed operation.
+Set the active adapter on this generator instance.
 
 ```csharp
-public string Error { get; }
+public void SetActiveAdapter(Adapters adapters, string adapterName)
 ```
 
 ---
 
-### Success
+## OnnxRuntimeGenAIException class
+
+### Overview
 
-Indicates if the operation was successful.
+Exception type thrown when GenAI operations fail.
 
 ```csharp
-public bool Success { get; }
+public class OnnxRuntimeGenAIException : Exception
 ```
 
 ---
@@ -354,7 +486,7 @@ public bool Success { get; }
 
 ### NumSequences
 
-Gets the number of sequences.
+Get the number of sequences.
 
 ```csharp
 public ulong NumSequences { get; }
@@ -362,9 +494,19 @@ public ulong NumSequences { get; }
 
 ---
 
+### Append
+
+Append a token to the specified sequence.
+
+```csharp
+public void Append(int token, ulong sequenceIndex)
+```
+
+---
+
 ### Indexer
 
-Gets the sequence at the specified index.
+Get the sequence at the specified index.
 
 ```csharp
 public ReadOnlySpan<int> this[ulong sequenceIndex]
@@ -376,49 +518,77 @@ public ReadOnlySpan<int> this[ulong sequenceIndex]
 
 ### Constructor
 
-Initializes a tensor from a buffer.
+Initialize a tensor from a buffer.
 
 ```csharp
-public Tensor(Array data, long[] shape, ElementType elementType)
+public Tensor(IntPtr data, long[] shape, ElementType elementType)
 ```
 
 ---
 
-### Data
+### Shape
 
-Gets the underlying data buffer.
+Get the shape of the tensor.
 
 ```csharp
-public Array Data { get; }
+public long[] Shape()
 ```
 
 ---
 
-### Shape
+### Type
+
+Get the element type of the tensor.
+
+```csharp
+public ElementType Type()
+```
+
+---
+
+### ElementsFromShape
+
+Compute the total element count for a shape.
+
+```csharp
+public static long ElementsFromShape(long[] shape)
+```
+
+---
 
-Gets the shape of the tensor.
+### NumElements
+
+Return the number of elements in the tensor.
 
 ```csharp
-public long[] Shape { get; }
+public long NumElements()
 ```
 
 ---
 
-### ElementType
+### GetData
 
-Gets the element type of the tensor.
+Return a read-only span over the tensor data.
 
 ```csharp
-public ElementType ElementType { get; }
+public ReadOnlySpan<T> GetData<T>()
 ```
 
 ---
 
+## NamedTensors class
+
+### Overview
+
+Represent a disposable collection of named tensors returned by processors.
+
+---
+
 ## Utils class
 
 ### SetLogBool
 
-Sets a boolean logging option.
+Set a boolean logging option.
 
 ```csharp
 public static void SetLogBool(string name, bool value)
@@ -428,7 +598,7 @@ public static void SetLogBool(string name, bool value)
 
 ### SetLogString
 
-Sets a string logging option.
+Set a string logging option.
 
 ```csharp
 public static void SetLogString(string name, string value)
@@ -438,7 +608,7 @@ public static void SetLogString(string name, string value)
 
 ### SetCurrentGpuDeviceId
 
-Sets the current GPU device ID.
+Set the current GPU device ID.
 
 ```csharp
 public static void SetCurrentGpuDeviceId(int deviceId)
@@ -448,10 +618,209 @@ public static void SetCurrentGpuDeviceId(int deviceId)
 
 ### GetCurrentGpuDeviceId
 
-Gets the current GPU device ID.
+Get the current GPU device ID.
 
 ```csharp
 public static int GetCurrentGpuDeviceId()
 ```
 
+---
+
+## OgaHandle class
+
+### Overview
+
+Provide a disposable handle that triggers GenAI shutdown when disposed.
+
+```csharp
+public class OgaHandle : IDisposable
+```
+
+---
+
+## Images class
+
+### Load (paths)
+
+Load images from file paths.
+
+```csharp
+public static Images Load(string[] imagePaths)
+```
+
+---
+
+### Load (bytes)
+
+Load images from in-memory data.
+
+```csharp
+public static Images Load(byte[] imageBytesData)
+```
+
+---
+
+## Audios class
+
+### Load (paths)
+
+Load audio from file paths.
+
+```csharp
+public static Audios Load(string[] audioPaths)
+```
+
+---
+
+### Load (bytes)
+
+Load audio from in-memory data.
+
+```csharp
+public static Audios Load(byte[] audioBytesData)
+```
+
+---
+
+## MultiModalProcessor class
+
+### Constructor
+
+Initialize a processor for multimodal inputs.
+
+```csharp
+public MultiModalProcessor(Model model)
+```
+
+---
+
+### ProcessImages
+
+Process text and images into named tensors.
+
+```csharp
+public NamedTensors ProcessImages(string prompt, Images images)
+public NamedTensors ProcessImages(string[] prompts, Images images)
+```
+
+---
+
+### ProcessAudios
+
+Process text and audio into named tensors.
+
+```csharp
+public NamedTensors ProcessAudios(string prompt, Audios audios)
+public NamedTensors ProcessAudios(string[] prompts, Audios audios)
+```
+
+---
+
+### ProcessImagesAndAudios
+
+Process text with both images and audio.
+
+```csharp
+public NamedTensors ProcessImagesAndAudios(string prompt, Images images, Audios audios)
+public NamedTensors ProcessImagesAndAudios(string[] prompts, Images images, Audios audios)
+```
+
+---
+
+### Decode
+
+Decode token IDs to text.
+
+```csharp
+public string Decode(ReadOnlySpan<int> sequence)
+```
+
+---
+
+### CreateStream
+
+Create a tokenizer stream for multimodal decoding.
+
+```csharp
+public TokenizerStream CreateStream()
+```
+
+---
+
+## Adapters class
+
+### Constructor
+
+Create an adapter container for a model.
+
+```csharp
+public Adapters(Model model)
+```
+
+---
+
+### LoadAdapter
+
+Load an adapter file into the container.
+
+```csharp
+public void LoadAdapter(string adapterPath, string adapterName)
+```
+
+---
+
+### UnloadAdapter
+
+Unload a previously loaded adapter.
+
+```csharp
+public void UnloadAdapter(string adapterName)
+```
+
+---
+
+## OnnxRuntimeGenAIChatClientOptions class
+
+### Properties
+
+```csharp
+public IList<string>? StopSequences { get; set; }
+public Func<IEnumerable<ChatMessage>, ChatOptions?, string>? PromptFormatter { get; set; }
+public bool EnableCaching { get; set; }
+```
+
+---
+
+## OnnxRuntimeGenAIChatClient class
+
+### Constructors
+
+Create a chat client from a model path, model, or config.
+
+```csharp
+public OnnxRuntimeGenAIChatClient(string modelPath, OnnxRuntimeGenAIChatClientOptions? options = null)
+public OnnxRuntimeGenAIChatClient(Model model, bool ownsModel = true, OnnxRuntimeGenAIChatClientOptions? options = null)
+public OnnxRuntimeGenAIChatClient(Config config, bool ownsConfig = true, OnnxRuntimeGenAIChatClientOptions? options = null)
+```
+
+---
+
+### GetResponseAsync
+
+Generate a complete chat response.
+
+```csharp
+public Task<ChatResponse> GetResponseAsync(IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellationToken = default)
+```
+
+---
+
+### GetStreamingResponseAsync
+
+Stream chat response updates.
+
+```csharp
+public IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync(IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellationToken = default)
+```
+
 ---
\ No newline at end of file
diff --git a/docs/genai/api/java.md b/docs/genai/api/java.md
index b48b0a4d86e7d..84775f5969917 100644
--- a/docs/genai/api/java.md
+++ b/docs/genai/api/java.md
@@ -35,7 +35,7 @@ import ai.onnxruntime.genai.*;
 
 ## Model class
 
-### Constructor
+### Constructor (model path)
 
 Initializes a new model from the given model path.
 
@@ -45,32 +45,22 @@ public Model(String modelPath) throws GenAIException
 
 ---
 
-### createGeneratorParams
+### Constructor (config)
 
-Creates a GeneratorParams instance for executing the model.
+Initializes a new model using a pre-built configuration.
 
 ```java
-public GeneratorParams createGeneratorParams() throws GenAIException
+public Model(Config config) throws GenAIException
 ```
 
 ---
 
-### createTokenizer
+### close
 
-Creates a Tokenizer instance for this model.
+Releases native resources owned by the model.
 
 ```java
-public Tokenizer createTokenizer() throws GenAIException
-```
-
----
-
-### generate
-
-Generates output sequences using the provided generator parameters.
-
-```java
-public Sequences generate(GeneratorParams generatorParams) throws GenAIException
+public void close()
 ```
 
 ---
@@ -92,7 +82,7 @@ public Config(String configPath) throws GenAIException
 Clears all providers from the configuration.
 
 ```java
-public void clearProviders() throws GenAIException
+public void clearProviders()
 ```
 
 ---
@@ -102,7 +92,7 @@ public void clearProviders() throws GenAIException
 Appends a provider to the configuration.
 
 ```java
-public void appendProvider(String provider) throws GenAIException
+public void appendProvider(String providerName)
 ```
 
 ---
@@ -112,17 +102,17 @@ public void appendProvider(String provider) throws GenAIException
 Sets a provider option in the configuration.
 
 ```java
-public void setProviderOption(String provider, String name, String value) throws GenAIException
+public void setProviderOption(String providerName, String optionKey, String optionValue)
 ```
 
 ---
 
-### overlay
+### close
 
-Overlays a JSON string onto the configuration.
+Releases native resources owned by the configuration.
 
 ```java
-public void overlay(String json) throws GenAIException
+public void close()
 ```
 
 ---
@@ -179,6 +169,68 @@ public String[] decodeBatch(Sequences sequences) throws GenAIException
 
 ---
 
+### getBosTokenId
+
+Gets the beginning-of-sentence token id.
+
+```java
+public int getBosTokenId() throws GenAIException
+```
+
+---
+
+### getPadTokenId
+
+Gets the padding token id.
+
+```java
+public int getPadTokenId() throws GenAIException
+```
+
+---
+
+### getEosTokenIds
+
+Gets the end-of-sentence token ids.
+
+```java
+public int[] getEosTokenIds() throws GenAIException
+```
+
+---
+
+### toTokenId
+
+Converts a string to its token id.
+
+```java
+public int toTokenId(String str) throws GenAIException
+```
+
+---
+
+### applyChatTemplate
+
+Applies a chat template to format messages and tools.
+
+```java
+public String applyChatTemplate(
+    String templateStr, String messages, String tools, boolean addGenerationPrompt)
+    throws GenAIException
+```
+
+---
+
+### updateOptions
+
+Updates tokenizer options via key/value pairs.
+
+```java
+public void updateOptions(java.util.Map<String, String> options) throws GenAIException
+```
+
+---
+
 ### createStream
 
 Creates a TokenizerStream object for streaming tokenization.
@@ -189,6 +241,16 @@ public TokenizerStream createStream() throws GenAIException
 
 ---
 
+### close
+
+Releases native resources owned by the tokenizer.
+
+```java
+public void close()
+```
+
+---
+
 ## TokenizerStream class
 
 ### decode
@@ -201,6 +263,16 @@ public String decode(int token) throws GenAIException
 
 ---
 
+### close
+
+Releases native resources owned by the tokenizer stream.
+
+```java
+public void close()
+```
+
+---
+
 ## GeneratorParams class
 
 ### Constructor
@@ -233,34 +305,34 @@ public void setSearchOption(String optionName, boolean value) throws GenAIExcept
 
 ---
 
-### setInput (Sequences)
+### close
 
-Sets the prompt(s) for model execution using sequences.
+Releases native resources owned by the generator parameters.
 
 ```java
-public void setInput(Sequences sequences) throws GenAIException
+public void close()
 ```
 
 ---
 
-### setInput (int[])
+## Generator class
+
+### Constructor
 
-Sets the prompt(s) token ids for model execution.
+Constructs a Generator object with the given model and generator parameters.
 
 ```java
-public void setInput(int[] tokenIds, int sequenceLength, int batchSize) throws GenAIException
+public Generator(Model model, GeneratorParams generatorParams) throws GenAIException
 ```
 
 ---
 
-## Generator class
+### iterator
 
-### Constructor
-
-Constructs a Generator object with the given model and generator parameters.
+Generates a token on each call to `next()` by calling `generateNextToken` internally.
 
 ```java
-public Generator(Model model, GeneratorParams generatorParams) throws GenAIException
+public java.util.Iterator<Integer> iterator()
 ```
 
 ---
@@ -275,19 +347,59 @@ public boolean isDone()
 
 ---
 
-### computeLogits
+### setModelInput
+
+Adds a tensor as a named model input.
+
+```java
+public void setModelInput(String name, Tensor tensor) throws GenAIException
+```
+
+---
+
+### setInputs
 
-Computes the logits for the next token in the sequence.
+Adds a batch of named tensors as model inputs.
 
 ```java
-public void computeLogits() throws GenAIException
+public void setInputs(NamedTensors namedTensors) throws GenAIException
+```
+
+---
+
+### appendTokens
+
+Appends token ids to the generator input.
+
+```java
+public void appendTokens(int[] inputIDs) throws GenAIException
+```
+
+---
+
+### appendTokenSequences
+
+Appends token sequences to the generator input.
+
+```java
+public void appendTokenSequences(Sequences sequences) throws GenAIException
+```
+
+---
+
+### rewindTo
+
+Rewinds the generator to a specific token length before continuing generation.
+
+```java
+public void rewindTo(long newLength) throws GenAIException
 ```
 
 ---
 
 ### generateNextToken
 
-Generates the next token in the sequence.
+Generates the next token in the sequence using cached logits/state.
 
 ```java
 public void generateNextToken() throws GenAIException
@@ -315,6 +427,46 @@ public int getLastTokenInSequence(long sequenceIndex) throws GenAIException
 
 ---
 
+### getInput
+
+Returns a copy of the named model input as a tensor.
+
+```java
+public Tensor getInput(String name) throws GenAIException
+```
+
+---
+
+### getOutput
+
+Returns a copy of the named model output as a tensor.
+
+```java
+public Tensor getOutput(String name) throws GenAIException
+```
+
+---
+
+### setActiveAdapter
+
+Activates a previously loaded adapter by name.
+
+```java
+public void setActiveAdapter(Adapters adapters, String adapterName) throws GenAIException
+```
+
+---
+
+### close
+
+Releases native resources owned by the generator.
+
+```java
+public void close()
+```
+
+---
+
 ## Sequences class
 
 ### numSequences
@@ -337,6 +489,16 @@ public int[] getSequence(long sequenceIndex)
 
 ---
 
+### close
+
+Releases native resources owned by the sequences.
+
+```java
+public void close()
+```
+
+---
+
 ## Tensor class
 
 ### Constructor
@@ -349,66 +511,257 @@ public Tensor(ByteBuffer data, long[] shape, ElementType elementType) throws Gen
 
 ---
 
-## Result class
+### getType
+
+Gets the tensor element type.
+
+```java
+public Tensor.ElementType getType()
+```
+
+---
+
+### getShape
+
+Gets the tensor shape.
+
+```java
+public long[] getShape()
+```
+
+---
+
+### close
+
+Releases native resources owned by the tensor.
+
+```java
+public void close()
+```
+
+---
+
+## Images class
+
+### Constructor
+
+Loads images from the given path.
+
+```java
+public Images(String imagePath) throws GenAIException
+```
+
+---
+
+### close
+
+Releases native resources owned by the images.
+
+```java
+public void close()
+```
+
+---
+
+## Audios class
+
+### Constructor
+
+Loads audio from the given path.
+
+```java
+public Audios(String audioPath) throws GenAIException
+```
+
+---
+
+### close
+
+Releases native resources owned by the audios.
+
+```java
+public void close()
+```
+
+---
+
+## MultiModalProcessor class
+
+### Constructor
+
+Creates a processor for a given model.
+
+```java
+public MultiModalProcessor(Model model) throws GenAIException
+```
+
+---
+
+### processImages (single prompt)
+
+Processes a text prompt and images into named tensors.
+
+```java
+public NamedTensors processImages(String prompt, Images images) throws GenAIException
+```
+
+---
+
+### processImages (batch prompts)
+
+Processes batch prompts and images into named tensors.
+
+```java
+public NamedTensors processImages(String[] prompts, Images images) throws GenAIException
+```
+
+---
+
+### processAudios (single prompt)
+
+Processes a text prompt and audios into named tensors.
+
+```java
+public NamedTensors processAudios(String prompt, Audios audios) throws GenAIException
+```
+
+---
 
-### isSuccess
+### processAudios (batch prompts)
 
-Indicates if the operation was successful.
+Processes batch prompts and audios into named tensors.
 
 ```java
-public boolean isSuccess()
+public NamedTensors processAudios(String[] prompts, Audios audios) throws GenAIException
 ```
 
 ---
 
-### getError
+### processImagesAndAudios (single prompt)
 
-Gets the error message from a failed operation.
+Processes a text prompt with images and audios into named tensors.
 
 ```java
-public String getError()
+public NamedTensors processImagesAndAudios(String prompt, Images images, Audios audios)
+    throws GenAIException
 ```
 
 ---
 
-## Utils class
+### processImagesAndAudios (batch prompts)
 
-### setLogBool
+Processes batch prompts with images and audios into named tensors.
 
-Sets a boolean logging option.
+```java
+public NamedTensors processImagesAndAudios(String[] prompts, Images images, Audios audios)
+    throws GenAIException
+```
+
+---
+
+### decode
+
+Decodes a token sequence produced by the processor back to text.
 
 ```java
-public static void setLogBool(String name, boolean value)
+public String decode(int[] sequence) throws GenAIException
 ```
 
 ---
 
-### setLogString
+### createStream
 
-Sets a string logging option.
+Creates a TokenizerStream tied to this processor for streaming tokenization.
 
 ```java
-public static void setLogString(String name, String value)
+public TokenizerStream createStream() throws GenAIException
 ```
 
 ---
 
-### setCurrentGpuDeviceId
+### close
 
-Sets the current GPU device ID.
+Releases native resources owned by the processor.
 
 ```java
-public static void setCurrentGpuDeviceId(int deviceId)
+public void close()
 ```
 
 ---
 
-### getCurrentGpuDeviceId
+## NamedTensors class
+
+### Constructor
+
+Wraps a native handle containing a named tensor collection.
+
+```java
+public NamedTensors(long handle)
+```
+
+---
+
+### close
+
+Releases native resources owned by the named tensors.
+
+```java
+public void close()
+```
+
+---
+
+## Adapters class
+
+### Constructor
+
+Creates an adapter container bound to a model.
+
+```java
+public Adapters(Model model) throws GenAIException
+```
+
+---
+
+### loadAdapter
+
+Loads an adapter from disk and registers it under a name.
+
+```java
+public void loadAdapter(String adapterFilePath, String adapterName) throws GenAIException
+```
+
+---
+
+### unloadAdapter
+
+Unloads a previously loaded adapter.
+
+```java
+public void unloadAdapter(String adapterName) throws GenAIException
+```
+
+---
+
+### close
+
+Releases native resources owned by the adapters container.
+
+```java
+public void close()
+```
+
+---
+
+## GenAIException class
+
+### Constructors
 
-Gets the current GPU device ID.
+Exceptions propagated from the native layer.
 
 ```java
-public static int getCurrentGpuDeviceId()
+GenAIException(String message)
+GenAIException(String message, Exception innerException)
 ```
 
 ---
\ No newline at end of file
diff --git a/docs/genai/api/python.md b/docs/genai/api/python.md
index f5a6cc4a47a46..d50df8dae3756 100644
--- a/docs/genai/api/python.md
+++ b/docs/genai/api/python.md
@@ -89,36 +89,48 @@ onnxruntime_genai.Config(config_path: str) -> Config
   config.clear_providers()
   ```
 
----
-
-## GeneratorParams class
-
-```python
-onnxruntime_genai.GeneratorParams(model: Model) -> GeneratorParams
-```
+- `add_model_data(model_filename: str, data: bytes | buffer)`
 
-#### Methods
+  ```python
+  with open("decoder.onnx", "rb") as f:
+      config.add_model_data("decoder.onnx", f.read())
+  ```
 
-- `set_inputs(named_tensors: NamedTensors)`
+- `remove_model_data(model_filename: str)`
 
   ```python
-  params = onnxruntime_genai.GeneratorParams(model)
-  named_tensors = onnxruntime_genai.NamedTensors()
-  params.set_inputs(named_tensors)
+  config.remove_model_data("decoder.onnx")
   ```
 
-- `set_model_input(name: str, value: numpy.ndarray)`
+- `overlay(config_path: str)`
 
   ```python
-  import numpy as np
-  params.set_model_input("input_ids", np.array([1, 2, 3], dtype=np.int32))
+  config.overlay("config.overlay.json")
   ```
 
+- `set_decoder_provider_options_hardware_device_type(device_type: int)`
+- `set_decoder_provider_options_hardware_device_id(device_id: int)`
+- `set_decoder_provider_options_hardware_vendor_id(vendor_id: int)`
+- `clear_decoder_provider_options_hardware_device_type()`
+- `clear_decoder_provider_options_hardware_device_id()`
+- `clear_decoder_provider_options_hardware_vendor_id()`
+
+---
+
+## GeneratorParams class
+
+```python
+onnxruntime_genai.GeneratorParams(model: Model) -> GeneratorParams
+```
+
+#### Methods
+
 - `try_graph_capture_with_max_batch_size(max_batch_size: int)`
 
   ```python
   params.try_graph_capture_with_max_batch_size(8)
   ```
+  _Deprecated: emits a warning and will be removed in a future release._
 
 - `set_search_options(**options)`
 
@@ -129,7 +141,7 @@ onnxruntime_genai.GeneratorParams(model: Model) -> GeneratorParams
 - `set_guidance(type: str, data: str)`
 
   ```python
-  params.set_guidance("prefix", "Once upon a time")
+  params.set_guidance("prefix", "Once upon a time", enable_ff_tokens=False)
   ```
 
 ---
@@ -149,12 +161,32 @@ onnxruntime_genai.Generator(model: Model, params: GeneratorParams) -> Generator
   done = generator.is_done()
   ```
 
+- `get_input(name: str) -> numpy.ndarray`
+
+  ```python
+  input_ids = generator.get_input("input_ids")
+  ```
+
 - `get_output(name: str) -> numpy.ndarray`
 
   ```python
   output = generator.get_output("output_ids")
   ```
 
+- `set_inputs(named_tensors: NamedTensors)`
+
+  ```python
+  named = onnxruntime_genai.NamedTensors()
+  named["input_ids"] = np.array([1, 2, 3], dtype=np.int32)
+  generator.set_inputs(named)
+  ```
+
+- `set_model_input(name: str, value: numpy.ndarray)`
+
+  ```python
+  generator.set_model_input("input_ids", np.array([1, 2, 3], dtype=np.int32))
+  ```
+
 - `append_tokens(tokens: numpy.ndarray[int32])`
 
   ```python
@@ -221,6 +253,30 @@ onnxruntime_genai.Tokenizer(model: Model) -> Tokenizer
 
 #### Methods
 
+- `bos_token_id: int`
+
+  ```python
+  bos = tokenizer.bos_token_id
+  ```
+
+- `eos_token_ids: numpy.ndarray[int32]`
+
+  ```python
+  eos_ids = tokenizer.eos_token_ids
+  ```
+
+- `pad_token_id: int`
+
+  ```python
+  pad = tokenizer.pad_token_id
+  ```
+
+- `update_options(**options)`
+
+  ```python
+  tokenizer.update_options(space_replacement="▁")
+  ```
+
 - `encode(text: str) -> numpy.ndarray[int32]`
 
   ```python
@@ -240,10 +296,10 @@ onnxruntime_genai.Tokenizer(model: Model) -> Tokenizer
   text = tokenizer.decode(tokens)
   ```
 
-- `apply_chat_template(template_str: str, messages: str, tools: str = None, add_generation_prompt: bool = False) -> str`
+- `apply_chat_template(messages: str, template_str: str | None = None, tools: str | None = None, add_generation_prompt: bool = True) -> str`
 
   ```python
-  chat = tokenizer.apply_chat_template("{user}: {message}", messages="Hi!", add_generation_prompt=True)
+  chat = tokenizer.apply_chat_template(messages="Hi!", template_str="{user}: {message}", add_generation_prompt=True)
   ```
 
 - `encode_batch(texts: list[str]) -> onnxruntime_genai.Tensor`
@@ -385,6 +441,81 @@ onnxruntime_genai.Adapters(model: Model) -> Adapters
 
 ---
 
+## Request class
+
+```python
+onnxruntime_genai.Request(params: GeneratorParams) -> Request
+```
+
+#### Methods
+
+- `add_tokens(tokens: numpy.ndarray[int32])`
+
+  ```python
+  request.add_tokens(np.array([1, 2, 3], dtype=np.int32))
+  ```
+
+- `has_unseen_tokens() -> bool`
+
+  ```python
+  still_pending = request.has_unseen_tokens()
+  ```
+
+- `is_done() -> bool`
+
+  ```python
+  done = request.is_done()
+  ```
+
+- `get_unseen_token() -> int`
+
+  ```python
+  next_token = request.get_unseen_token()
+  ```
+
+- `set_opaque_data(data: object)` / `get_opaque_data() -> object | None`
+
+  ```python
+  request.set_opaque_data({"trace_id": "abc"})
+  opaque = request.get_opaque_data()
+  ```
+
+---
+
+## Engine class
+
+```python
+onnxruntime_genai.Engine(model: Model) -> Engine
+```
+
+#### Methods
+
+- `add_request(request: Request)`
+
+  ```python
+  engine.add_request(request)
+  ```
+
+- `step()`
+
+  ```python
+  engine.step()
+  ```
+
+- `remove_request(request: Request)`
+
+  ```python
+  engine.remove_request(request)
+  ```
+
+- `has_pending_requests() -> bool`
+
+  ```python
+  pending = engine.has_pending_requests()
+  ```
+
+---
+
 ## MultiModalProcessor class
 
 ```python
@@ -393,10 +524,11 @@ onnxruntime_genai.MultiModalProcessor(model: Model) -> MultiModalProcessor
 
 #### Methods
 
-- `__call__(prompt: str = None, images: Images = None, audios: Audios = None) -> onnxruntime_genai.Tensor`
+- `__call__(prompt: str | list[str] = None, images: Images = None, audios: Audios = None) -> onnxruntime_genai.Tensor`
 
   ```python
   result = processor(prompt="Describe this image", images=onnxruntime_genai.Images.open("image.png"))
+  batched = processor(["Describe first", "Describe second"], images=onnxruntime_genai.Images.open("image1.png", "image2.png"))
   ```
 
 - `create_stream() -> TokenizerStream`
@@ -451,6 +583,12 @@ with open("audio1.wav", "rb") as f:
   onnxruntime_genai.set_log_options(verbose=True)
   ```
 
+- `onnxruntime_genai.set_log_callback(callback: Callable[[str], None] | None)`
+
+  ```python
+  onnxruntime_genai.set_log_callback(lambda message: print("LOG:", message))
+  ```
+
 - `onnxruntime_genai.is_cuda_available() -> bool`
 
   ```python
@@ -497,4 +635,16 @@ with open("audio1.wav", "rb") as f:
 
   ```python
   print(onnxruntime_genai.get_current_gpu_device_id())
+  ```
+
+- `onnxruntime_genai.register_execution_provider_library(provider_name: str, path: str)`
+
+  ```python
+  onnxruntime_genai.register_execution_provider_library("MyEP", "/path/to/libMyEP.so")
+  ```
+
+- `onnxruntime_genai.unregister_execution_provider_library(provider_name: str)`
+
+  ```python
+  onnxruntime_genai.unregister_execution_provider_library("MyEP")
   ```
\ No newline at end of file
diff --git a/docs/genai/reference/config.md b/docs/genai/reference/config.md
index 2333c82d106c2..b0e6fd2ac2b46 100644
--- a/docs/genai/reference/config.md
+++ b/docs/genai/reference/config.md
@@ -81,7 +81,7 @@ Below is an example `genai_config.json` for a decoder-only style model:
 
 ## Configuration structure
 
-The configuration file is structured as a JSON object with two main sections: `model` and `search`.  
+The configuration file is structured as a JSON object with `model`, `search`, and optional `engine` sections.  
 
 
 ---
@@ -99,6 +99,9 @@ Top-level configuration object.
 - **search**: *(object)*  
   Generation/search parameters.
 
+- **engine**: *(object, optional)*  
+  Batch scheduling configuration.
+
 ---
 
 ### Config::Model
@@ -151,6 +154,15 @@ Describes the model architecture, files, and tokenization.
 - **decoder_start_token_id**: *(int, optional)*  
   The id of the decoder start token (for encoder-decoder models).
 
+- **image_token_id**: *(int, optional)*  
+  Token id used to delimit images in multi-modal models.
+
+- **video_token_id**: *(int, optional)*  
+  Token id used to delimit video content in multi-modal models.
+
+- **vision_start_token_id**: *(int, optional)*  
+  Token id used to mark the start of vision content in multi-modal models.
+
 - **vocab_size**: *(int)*  
   The size of the vocabulary.
 
@@ -176,12 +188,21 @@ Describes the model architecture, files, and tokenization.
 
 #### Model::Encoder
 
+- **session_options**: *(object, optional)*  
+  See [SessionOptions](#sessionoptions).
+
+- **run_options**: *(array of [string, string] pairs, optional)*  
+  Per-run configuration entries applied to the encoder session.
+
 - **filename**: *(string)*  
   Path to the encoder ONNX file.
 
 - **hidden_size**: *(int)*  
   Hidden size of the encoder.
 
+- **num_attention_heads**: *(int)*  
+  Number of attention heads.
+
 - **num_key_value_heads**: *(int)*  
   Number of key-value heads.
 
@@ -192,16 +213,26 @@ Describes the model architecture, files, and tokenization.
   Size of each attention head.
 
 - **inputs**: *(object)*  
-  - **input_features**: *(string)*  
-    Name of the input features tensor.
   - **input_ids**: *(string)*  
     Name of the input ids tensor.
+  - **embeddings**: *(string)*  
+    Name of the input embeddings tensor.
   - **attention_mask**: *(string)*  
     Name of the attention mask tensor.
+  - **position_ids**: *(string)*  
+    Name of the position ids tensor.
+  - **audio_features**: *(string)*  
+    Name of the audio features tensor.
 
 - **outputs**: *(object)*  
   - **encoder_outputs**: *(string)*  
     Name of the encoder outputs tensor.
+  - **hidden_states**: *(string)*  
+    Name of the encoder hidden states tensor.
+  - **cross_present_key_names**: *(string)*  
+    Name pattern for cross-attention present key tensors.
+  - **cross_present_value_names**: *(string)*  
+    Name pattern for cross-attention present value tensors.
 
 ---
 
@@ -210,6 +241,12 @@ Describes the model architecture, files, and tokenization.
 - **filename**: *(string)*  
   Path to the embedding ONNX file.
 
+- **session_options**: *(object, optional)*  
+  See [SessionOptions](#sessionoptions).
+
+- **run_options**: *(array of [string, string] pairs, optional)*  
+  Per-run configuration entries applied to the embedding session.
+
 - **inputs**: *(object)*  
   - **input_ids**: *(string)*  
     Name of the input ids tensor.
@@ -229,17 +266,31 @@ Describes the model architecture, files, and tokenization.
 - **filename**: *(string)*  
   Path to the vision ONNX file.
 
+- **session_options**: *(object, optional)*  
+  See [SessionOptions](#sessionoptions).
+
+- **run_options**: *(array of [string, string] pairs, optional)*  
+  Per-run configuration entries applied to the vision session.
+
 - **config_filename**: *(string, optional)*  
-  Path to the vision processor config file.
+  Path to the vision processor config file. Defaults to `processor_config.json`.
 
 - **adapter_filename**: *(string, optional)*  
   Path to the vision adapter file.
 
+- **spatial_merge_size**: *(int, optional)*  
+  Patch merge size used by some models (for example, Qwen2.5-VL). Defaults to 2.
+
+- **tokens_per_second**: *(float, optional)*  
+  Tokens-per-second parameter used by some models. Defaults to 2.0.
+
 - **inputs**: *(object)*  
   - **pixel_values**: *(string)*  
     Name of the pixel values tensor.
   - **image_sizes**: *(string)*  
     Name of the image sizes tensor.
+  - **image_grid_thw**: *(string)*  
+    Name of the image grid tensor. Defaults to `image_sizes` when not provided.
   - **attention_mask**: *(string)*  
     Name of the image attention mask tensor.
 
@@ -247,6 +298,23 @@ Describes the model architecture, files, and tokenization.
   - **image_features**: *(string)*  
     Name of the image features output tensor.
 
+- **pipeline**: *(array, optional)*  
+  Ordered list of sub-models for vision pipelines (for example, patch embedding, attention, merge).
+  - **filename**: *(string)*  
+    Path to the ONNX file.
+  - **session_options**: *(object, optional)*  
+    Session options for this pipeline model.
+  - **run_options**: *(array of [string, string] pairs, optional)*  
+    Run options for this pipeline model.
+  - **model_id**: *(string)*  
+    Identifier used to link outputs to subsequent stages.
+  - **inputs**: *(array of string)*  
+    Graph input names.
+  - **outputs**: *(array of string)*  
+    Graph output names.
+  - **run_on_cpu**: *(bool, optional)*  
+    If true, forces CPU EP when multiple EPs are configured.
+
 ---
 
 #### Model::Speech
@@ -254,6 +322,12 @@ Describes the model architecture, files, and tokenization.
 - **filename**: *(string)*  
   Path to the speech ONNX file.
 
+- **session_options**: *(object, optional)*  
+  See [SessionOptions](#sessionoptions).
+
+- **run_options**: *(array of [string, string] pairs, optional)*  
+  Per-run configuration entries applied to the speech session.
+
 - **config_filename**: *(string, optional)*  
   Path to the speech processor config file.
 
@@ -284,6 +358,9 @@ Describes the model architecture, files, and tokenization.
 - **session_options**: *(object)*  
   See [SessionOptions](#sessionoptions).
 
+- **run_options**: *(array of [string, string] pairs, optional)*  
+  Per-run configuration entries applied to the decoder session.
+
 - **hidden_size**: *(int)*  
   Size of the hidden layers.
 
@@ -309,6 +386,10 @@ Describes the model architecture, files, and tokenization.
     "left" or "right".
   - **slide_key_value_cache**: *(bool)*  
     Whether to slide the key-value cache.
+  - **slide_inputs**: *(bool, optional)*  
+    Whether to slide the input prompt along with the cache.
+  - **layers**: *(array of int, optional)*  
+    Layer indices that use sliding window attention.
 
 - **inputs**: *(object)*  
   - **input_ids**: *(string)*  
@@ -329,20 +410,28 @@ Describes the model architecture, files, and tokenization.
     Name for cross-attention past key tensors.
   - **cross_past_value_names**: *(string, optional)*  
     Name for cross-attention past value tensors.
+  - **past_key_values_length**: *(string)*  
+    Name of the past key values length tensor.
   - **current_sequence_length**: *(string)*  
     Name of the current sequence length tensor.
   - **past_sequence_length**: *(string)*  
     Name of the past sequence length tensor.
-  - **past_key_values_length**: *(string)*  
-    Name of the past key values length tensor.
   - **total_sequence_length**: *(string)*  
     Name of the total sequence length tensor.
+  - **cache_indirection**: *(string)*  
+    Name of the cache indirection tensor.
   - **encoder_hidden_states**: *(string)*  
     Name of the encoder hidden states tensor.
   - **rnn_prev_states**: *(string, optional)*  
     Name of the previous RNN states tensor.
   - **encoder_attention_mask**: *(string, optional)*  
     Name of the encoder attention mask tensor.
+  - **cumulative_sequence_lengths**: *(string, optional)*  
+    Name of the cumulative sequence lengths tensor.
+  - **past_sequence_lengths**: *(string, optional)*  
+    Name of the past sequence lengths tensor.
+  - **block_table**: *(string, optional)*  
+    Name of the block table tensor.
 
 - **outputs**: *(object)*  
   - **logits**: *(string)*  
@@ -353,10 +442,8 @@ Describes the model architecture, files, and tokenization.
     Name pattern for present value tensors.
   - **present_names**: *(string, optional)*  
     Name for combined present key/value pairs.
-  - **cross_present_key_names**: *(string, optional)*  
-    Name for cross-attention present key tensors.
-  - **cross_present_value_names**: *(string, optional)*  
-    Name for cross-attention present value tensors.
+  - **output_cross_qk_names**: *(string, optional)*  
+    Name pattern for cross-attention QK outputs.
   - **rnn_states**: *(string, optional)*  
     Name of the RNN states output tensor.
 
@@ -376,6 +463,9 @@ Describes the model architecture, files, and tokenization.
 - **session_options**: *(object, optional)*  
   Session options for this pipeline model.
 
+- **run_options**: *(array of [string, string] pairs, optional)*  
+  Run options for this pipeline model.
+
 - **inputs**: *(array of string)*  
   List of input tensor names.
 
@@ -391,6 +481,9 @@ Describes the model architecture, files, and tokenization.
 - **run_on_token_gen**: *(bool)*  
   Whether to run this model during token generation.
 
+- **is_lm_head**: *(bool, optional)*  
+  True if this pipeline model is the language modeling head.
+
 - **reset_session_idx**: *(int)*  
   Index of the session to reset for memory management.
 
@@ -412,39 +505,21 @@ Options passed to ONNX Runtime for model execution.
 - **enable_mem_pattern**: *(bool, optional)*  
   Enable/disable memory pattern optimization.
 
-- **disable_cpu_ep_fallback**: *(bool, optional)*  
-  Disable fallback to CPU execution provider.
-
-- **disable_quant_qdq**: *(bool, optional)*  
-  Disable quantization QDQ.
-
-- **enable_quant_qdq_cleanup**: *(bool, optional)*  
-  Enable quantization QDQ cleanup.
-
-- **ep_context_enable**: *(bool, optional)*  
-  Enable execution provider context.
-
-- **ep_context_embed_mode**: *(string, optional)*  
-  Execution provider context embed mode.
-
-- **ep_context_file_path**: *(string, optional)*  
-  Path to execution provider context file.
-
 - **log_id**: *(string, optional)*  
   Prefix for logging.
 
 - **log_severity_level**: *(int, optional)*  
   Logging severity level.
 
+- **log_verbosity_level**: *(int, optional)*  
+  Logging verbosity level.
+
 - **enable_profiling**: *(string, optional)*  
   Enable profiling.
 
 - **custom_ops_library**: *(string, optional)*  
   Path to custom ops library.
 
-- **use_env_allocators**: *(bool)*  
-  Use environment allocators.
-
 - **config_entries**: *(array of [string, string] pairs)*  
   Additional config entries.
 
@@ -477,6 +552,26 @@ Options passed to ONNX Runtime for model execution.
 - **options**: *(array of [string, string] pairs)*  
   Provider-specific options.
 
+- **device_filtering_options**: *(object, optional)*  
+  Device filtering constraints for this provider.
+  - **hardware_device_type**: *(string, optional)*  
+    Hardware type to target (CPU, GPU, NPU).
+  - **hardware_device_id**: *(int, optional)*  
+    Hardware device id to target.
+  - **hardware_vendor_id**: *(int, optional)*  
+    Hardware vendor id to target.
+
+---
+
+### RunOptions
+
+Entries added to `OrtRunOptions` for a specific session run.
+
+The options in this section can be any ONNX Runtime run option.
+
+- **run_options**: *(array of [string, string] pairs)*  
+  Key/value config entries applied to the run.
+
 ---
 
 ### Search
@@ -531,6 +626,31 @@ Describes the generation/search parameters.
 - **random_seed**: *(int)*  
   Seed for the random number generator. -1 means use a random device.
 
+- **chunk_size**: *(int, optional)*  
+  Chunk size for prefill chunking during context processing. Enables chunking when set > 0.
+
+---
+
+### Engine
+
+Batching and scheduling settings for the runtime engine.
+
+- **dynamic_batching**: *(object, optional)*  
+  Dynamic batching configuration.
+  - **block_size**: *(int)*  
+    Total number of slots per block. Defaults to 256.
+  - **num_blocks**: *(int, optional)*  
+    Total number of blocks per layer.
+  - **gpu_utilization_factor**: *(float, optional)*  
+    Fraction of free GPU memory to use for key-value cache.
+  - **max_batch_size**: *(int)*  
+    Maximum batch size for dynamically batching requests. Defaults to 16.
+
+- **static_batching**: *(object, optional)*  
+  Static batching configuration.
+  - **max_batch_size**: *(int)*  
+    Maximum batch size for static batching. Defaults to 4.
+
 ---
 
 ## Notes
diff --git a/src/routes/onnx/+page.svelte b/src/routes/onnx/+page.svelte
index 6ee20bf3f269a..c39b0c26e0666 100644
--- a/src/routes/onnx/+page.svelte
+++ b/src/routes/onnx/+page.svelte
@@ -60,10 +60,6 @@
 				ONNX Operators
 			</div>
 
-			<div class="p-3">
-				<a href="https://aka.ms/onnx/modeltypes" class="btn btn-primary">onnx/modeltypes</a>
-				Model classifications (ADO)
-			</div>
 		</div>
 	</section>