From d7679c5f8de5555743967efb2ab8355a358671c4 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Thu, 19 Feb 2026 11:26:47 -0500
Subject: [PATCH 01/16] Add comprehensive documentation overhaul with tutorials

This commit introduces a complete documentation overhaul covering recent
features including vision/multimodal, async support, agents, tool calling,
and embeddings.

Changes:
- Rewrite libs/oci/README.md with full feature coverage
- Add tutorials/ directory with progressive learning path
- 8 comprehensive tutorials with code examples:
  - 01-getting-started: Authentication, ChatOCIGenAI, providers
  - 02-vision-and-multimodal: 13 vision models, Gemini PDF/video/audio
  - 03-building-ai-agents: create_oci_agent, checkpointing, HiTL
  - 04-tool-calling-mastery: parallel tools, workflows
  - 05-structured-output: Pydantic schemas, JSON modes
  - 07-async-for-production: ainvoke, astream, FastAPI
  - 10-embeddings: text & image embeddings, RAG patterns
- Add tutorials/README.md with learning path index

Features documented:
- Vision support (13 models, load_image, encode_image)
- Gemini multimodal (PDF, video, audio)
- Async support (ainvoke, astream, abatch)
- AI Agents (create_oci_agent, checkpointing, LangGraph)
- Tool calling (parallel_tool_calls, max_sequential_tool_calls)
- Structured output (with_structured_output, json_mode)
- Image embeddings (embed_image, embed_image_batch)
---
 libs/oci/README.md                            | 707 +++++++++++-------
 .../tutorials/01-getting-started/README.md    | 338 +++++++++
 .../01-getting-started/code/auth_examples.py  |  77 ++
 .../01-getting-started/code/basic_chat.py     |  29 +
 .../code/conversation_example.py              |  44 ++
 .../02-vision-and-multimodal/README.md        | 452 +++++++++++
 .../code/image_analysis.py                    |  77 ++
 .../code/pdf_processing.py                    |  84 +++
 .../code/video_analysis.py                    |  98 +++
 .../tutorials/03-building-ai-agents/README.md | 354 +++++++++
 .../code/agent_with_memory.py                 |  93 +++
 .../03-building-ai-agents/code/basic_agent.py |  87 +++
 .../code/human_in_loop.py                     |  94 +++
 .../04-tool-calling-mastery/README.md         | 430 +++++++++++
 .../code/basic_tools.py                       | 102 +++
 .../code/parallel_tools.py                    | 105 +++
 .../code/tool_workflows.py                    | 103 +++
 .../tutorials/05-structured-output/README.md  | 410 ++++++++++
 .../code/data_classification.py               | 107 +++
 .../code/pydantic_schemas.py                  | 101 +++
 .../07-async-for-production/README.md         | 411 ++++++++++
 .../code/async_basics.py                      |  59 ++
 .../code/batch_processing.py                  |  83 ++
 .../code/fastapi_app.py                       | 112 +++
 libs/oci/tutorials/10-embeddings/README.md    | 352 +++++++++
 .../10-embeddings/code/image_embeddings.py    |  94 +++
 .../10-embeddings/code/rag_example.py         | 106 +++
 .../10-embeddings/code/text_embeddings.py     |  65 ++
 libs/oci/tutorials/README.md                  | 122 +++
 29 files changed, 5045 insertions(+), 251 deletions(-)
 create mode 100644 libs/oci/tutorials/01-getting-started/README.md
 create mode 100644 libs/oci/tutorials/01-getting-started/code/auth_examples.py
 create mode 100644 libs/oci/tutorials/01-getting-started/code/basic_chat.py
 create mode 100644 libs/oci/tutorials/01-getting-started/code/conversation_example.py
 create mode 100644 libs/oci/tutorials/02-vision-and-multimodal/README.md
 create mode 100644 libs/oci/tutorials/02-vision-and-multimodal/code/image_analysis.py
 create mode 100644 libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py
 create mode 100644 libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py
 create mode 100644 libs/oci/tutorials/03-building-ai-agents/README.md
 create mode 100644 libs/oci/tutorials/03-building-ai-agents/code/agent_with_memory.py
 create mode 100644 libs/oci/tutorials/03-building-ai-agents/code/basic_agent.py
 create mode 100644 libs/oci/tutorials/03-building-ai-agents/code/human_in_loop.py
 create mode 100644 libs/oci/tutorials/04-tool-calling-mastery/README.md
 create mode 100644 libs/oci/tutorials/04-tool-calling-mastery/code/basic_tools.py
 create mode 100644 libs/oci/tutorials/04-tool-calling-mastery/code/parallel_tools.py
 create mode 100644 libs/oci/tutorials/04-tool-calling-mastery/code/tool_workflows.py
 create mode 100644 libs/oci/tutorials/05-structured-output/README.md
 create mode 100644 libs/oci/tutorials/05-structured-output/code/data_classification.py
 create mode 100644 libs/oci/tutorials/05-structured-output/code/pydantic_schemas.py
 create mode 100644 libs/oci/tutorials/07-async-for-production/README.md
 create mode 100644 libs/oci/tutorials/07-async-for-production/code/async_basics.py
 create mode 100644 libs/oci/tutorials/07-async-for-production/code/batch_processing.py
 create mode 100644 libs/oci/tutorials/07-async-for-production/code/fastapi_app.py
 create mode 100644 libs/oci/tutorials/10-embeddings/README.md
 create mode 100644 libs/oci/tutorials/10-embeddings/code/image_embeddings.py
 create mode 100644 libs/oci/tutorials/10-embeddings/code/rag_example.py
 create mode 100644 libs/oci/tutorials/10-embeddings/code/text_embeddings.py
 create mode 100644 libs/oci/tutorials/README.md

diff --git a/libs/oci/README.md b/libs/oci/README.md
index d04edd8c..7be2fb50 100644
--- a/libs/oci/README.md
+++ b/libs/oci/README.md
@@ -1,407 +1,612 @@
 # langchain-oci
 
-This package contains the LangChain integrations with oci.
+[![PyPI version](https://img.shields.io/pypi/v/langchain-oci)](https://pypi.org/project/langchain-oci/)
+[![Python versions](https://img.shields.io/pypi/pyversions/langchain-oci)](https://pypi.org/project/langchain-oci/)
+[![License](https://img.shields.io/badge/License-UPL%201.0-green)](https://opensource.org/licenses/UPL)
+
+LangChain integrations for Oracle Cloud Infrastructure (OCI) Generative AI.
+
+## Table of Contents
+
+- [Installation](#installation)
+- [Quick Start](#quick-start)
+- [Authentication](#authentication)
+- [Chat Models](#chat-models)
+- [Vision & Multimodal](#vision--multimodal)
+- [Embeddings](#embeddings)
+- [Async Support](#async-support)
+- [Tool Calling](#tool-calling)
+- [Structured Output](#structured-output)
+- [AI Agents](#ai-agents)
+- [OpenAI Responses API](#openai-responses-api)
+- [OCI Data Science Deployments](#oci-data-science-deployments)
+- [Provider Reference](#provider-reference)
+- [Tutorials](#tutorials)
+- [Troubleshooting](#troubleshooting)
+
+---
 
 ## Installation
 
 ```bash
-pip install -U langchain-oci
+pip install langchain-oci oci
 ```
-All integrations in this package assume that you have the credentials setup to connect with oci services.
 
 ---
 
 ## Quick Start
 
-This repository includes two main integration categories:
+```python
+from langchain_oci import ChatOCIGenAI
 
-- [OCI Generative AI](#oci-generative-ai-examples)
-- [OCI Data Science (Model Deployment)](#oci-data-science-model-deployment-examples)
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..your-compartment-id",
+)
 
+response = llm.invoke("What is the capital of France?")
+print(response.content)
+```
 
 ---
 
-## OCI Generative AI Examples
+## Authentication
 
-OCI Generative AI supports two types of models:
-- **On-Demand Models**: Pre-hosted foundation models.
-- **DAC Models**: Models hosted on Dedicated AI Clusters (DAC), including custom models imported from Hugging Face or Object Storage
+Four authentication methods are supported:
 
-### 1a. Use a Chat Model (On-Demand)
+### API Key (Default)
 
-`ChatOCIGenAI` class exposes chat models from OCI Generative AI.
+Uses credentials from `~/.oci/config`:
 
 ```python
-from langchain_oci import ChatOCIGenAI
-
-# Using a pre-hosted on-demand model
 llm = ChatOCIGenAI(
-    model_id="MY_MODEL_ID",  # Pre-hosted model ID
-    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",  # Regional endpoint
-    compartment_id="ocid1.compartment.oc1..xxxxx",  # Your compartment OCID
-    model_kwargs={"max_tokens": 1024},  # Use max_completion_tokens for OpenAI models
-    auth_profile="MY_AUTH_PROFILE",
-    is_stream=True,
-    auth_type="SECURITY_TOKEN"
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+    auth_type="API_KEY",        # Default
+    auth_profile="DEFAULT",      # Profile name in ~/.oci/config
 )
-
-response = llm.invoke("Sing a ballad of LangChain.")
 ```
 
-### 1b. Use a Chat Model (Imported Model on DAC)
+### Security Token (Session-Based)
 
-For models you've imported and deployed on a Dedicated AI Cluster:
+```bash
+oci session authenticate --profile-name MY_PROFILE
+```
 
 ```python
-from langchain_oci import ChatOCIGenAI
-
-# Using an imported model on Dedicated AI Cluster
 llm = ChatOCIGenAI(
-    model_id="ocid1.generativeaiendpoint.oc1.us-chicago-1.xxxxx",  # Endpoint OCID from your DAC
-    provider="generic",  # Provider type: "cohere", "google", "meta", or "generic"
-    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",  # Regional endpoint
-    compartment_id="ocid1.compartment.oc1..xxxxx",  # Your compartment OCID
-    auth_type="SECURITY_TOKEN",  # Authentication type
-    auth_profile="MY_AUTH_PROFILE",
-    model_kwargs={"temperature": 0.7, "max_tokens": 500},
+    model_id="meta.llama-3.3-70b-instruct",
+    auth_type="SECURITY_TOKEN",
+    auth_profile="MY_PROFILE",
+    ...
 )
-
-response = llm.invoke("Hello, what is your name?")
 ```
 
-**Additional Arguments for Imported Models:**
-- `model_id`: Use the **endpoint OCID** (starts with `ocid1.generativeaiendpoint`)
-- `provider`: Provider type for your model. Available providers:
-  - `"cohere"`: For Cohere models (CohereProvider)
-  - `"google"`: For Google Gemini models (GeminiProvider) - automatically handles `max_output_tokens` to `max_tokens` parameter mapping
-  - `"meta"`: For Meta Llama models (MetaProvider)
-  - `"generic"`: Default for other models including OpenAI (GenericProvider)
-  If not specified, the provider is auto-detected from the model_id prefix.
-- `service_endpoint`: Use regional API endpoint (not the internal cluster URL)
+### Instance Principal
 
+For applications running on OCI compute instances:
 
-### 1c. Multimodal Content (Vision, PDF, Video, Audio)
+```python
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    auth_type="INSTANCE_PRINCIPAL",
+    ...
+)
+```
+
+### Resource Principal
 
-`ChatOCIGenAI` supports multimodal content types including images, PDFs, video, and audio. Support varies by model:
+For OCI Functions and other resources:
+
+```python
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    auth_type="RESOURCE_PRINCIPAL",
+    ...
+)
+```
 
-| Model Family | Images | PDF | Video | Audio |
-|--------------|--------|-----|-------|-------|
-| **Google Gemini** | ✓ | ✓ | ✓ | ✓ |
-| **Meta Llama Vision** | ✓ | - | - | - |
-| **Cohere Vision** | ✓ | - | - | - |
-| **OpenAI GPT-5.x** | ✓ | - | - | - |
+---
 
-<sub>**Note:** Other models may have limited or no multimodal support. Check your model's documentation.</sub>
+## Chat Models
 
-#### Image Analysis
+### On-Demand Models
 
 ```python
-import base64
-from langchain_core.messages import HumanMessage
 from langchain_oci import ChatOCIGenAI
 
 llm = ChatOCIGenAI(
-    model_id="meta.llama-3.2-90b-vision-instruct",  # Any vision model
+    model_id="meta.llama-3.3-70b-instruct",
     service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
-    compartment_id="MY_COMPARTMENT_ID",
+    compartment_id="ocid1.compartment.oc1..xxx",
+    model_kwargs={"temperature": 0.7, "max_tokens": 500},
 )
-
-with open("image.png", "rb") as f:
-    image_b64 = base64.b64encode(f.read()).decode("utf-8")
-
-message = HumanMessage(content=[
-    {"type": "text", "text": "Describe this image"},
-    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}},
-])
-response = llm.invoke([message])
 ```
 
-#### PDF Document Analysis
+### DAC/Imported Models
 
-```python
-import base64
-from langchain_core.messages import HumanMessage
-from langchain_oci import ChatOCIGenAI
+For models deployed on Dedicated AI Clusters:
 
+```python
 llm = ChatOCIGenAI(
-    model_id="google.gemini-2.5-flash",  # Gemini supports PDF
+    model_id="ocid1.generativeaiendpoint.oc1.us-chicago-1.xxxxx",  # Endpoint OCID
+    provider="meta",  # "cohere", "google", "meta", "generic"
     service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
-    compartment_id="MY_COMPARTMENT_ID",
+    compartment_id="ocid1.compartment.oc1..xxx",
 )
+```
 
-with open("document.pdf", "rb") as f:
-    pdf_b64 = base64.b64encode(f.read()).decode("utf-8")
+### Provider Matrix
 
-message = HumanMessage(content=[
-    {"type": "text", "text": "Summarize this PDF document"},
-    {"type": "document_url", "document_url": {"url": f"data:application/pdf;base64,{pdf_b64}"}},
-])
-response = llm.invoke([message])
+| Provider | Models | Features |
+|----------|--------|----------|
+| **Meta** | Llama 3.2, 3.3, 4 | Vision, parallel tools |
+| **Google** | Gemini 2.0/2.5 Flash | Multimodal (PDF, video, audio) |
+| **xAI** | Grok 4 | Vision, reasoning |
+| **Cohere** | Command R+, Command A | RAG, vision (V2) |
+| **OpenAI** | GPT-4, o1 | Reasoning |
+| **Mistral** | Mistral models | Fast inference |
+
+---
+
+## Vision & Multimodal
+
+### Vision-Capable Models
+
+```python
+from langchain_oci.utils.vision import VISION_MODELS
+
+# 13+ vision-capable models
+print(VISION_MODELS)
+# ['meta.llama-3.2-90b-vision-instruct', 'google.gemini-2.5-flash', 'xai.grok-4', ...]
 ```
 
-#### Video Analysis
+### Image Analysis
 
 ```python
-import base64
 from langchain_core.messages import HumanMessage
-from langchain_oci import ChatOCIGenAI
+from langchain_oci import ChatOCIGenAI, load_image
 
 llm = ChatOCIGenAI(
-    model_id="google.gemini-2.5-flash",
+    model_id="meta.llama-3.2-90b-vision-instruct",
     service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
-    compartment_id="MY_COMPARTMENT_ID",
+    compartment_id="ocid1.compartment.oc1..xxx",
 )
 
-with open("video.mp4", "rb") as f:
-    video_b64 = base64.b64encode(f.read()).decode("utf-8")
+message = HumanMessage(
+    content=[
+        {"type": "text", "text": "Describe this image"},
+        load_image("./photo.jpg"),
+    ]
+)
 
-message = HumanMessage(content=[
-    {"type": "text", "text": "What happens in this video?"},
-    {"type": "video_url", "video_url": {"url": f"data:video/mp4;base64,{video_b64}"}},
-])
 response = llm.invoke([message])
 ```
 
-#### Audio Analysis
+### Utility Functions
+
+| Function | Description |
+|----------|-------------|
+| `load_image(path)` | Load image file as content block |
+| `encode_image(bytes, mime_type)` | Encode bytes as content block |
+| `to_data_uri(image)` | Convert to data URI string |
+| `is_vision_model(model_id)` | Check if model supports vision |
+
+### Gemini Multimodal
+
+Gemini models support PDF, video, and audio:
 
 ```python
 import base64
 from langchain_core.messages import HumanMessage
 from langchain_oci import ChatOCIGenAI
 
-llm = ChatOCIGenAI(
-    model_id="google.gemini-2.5-flash",
-    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
-    compartment_id="MY_COMPARTMENT_ID",
-)
+llm = ChatOCIGenAI(model_id="google.gemini-2.0-flash", ...)
 
-with open("audio.wav", "rb") as f:
-    audio_b64 = base64.b64encode(f.read()).decode("utf-8")
+# PDF
+with open("doc.pdf", "rb") as f:
+    pdf_data = base64.b64encode(f.read()).decode()
 
 message = HumanMessage(content=[
-    {"type": "text", "text": "Transcribe this audio"},
-    {"type": "audio_url", "audio_url": {"url": f"data:audio/wav;base64,{audio_b64}"}},
+    {"type": "text", "text": "Summarize this PDF"},
+    {"type": "media", "data": pdf_data, "mime_type": "application/pdf"}
 ])
-response = llm.invoke([message])
 ```
 
-<sub>**Note:** Document, video, and audio content requires a multimodal-capable model. Check your model's documentation for supported content types.</sub>
+---
 
+## Embeddings
 
-### 2. Use a Completion Model
-`OCIGenAI` class exposes LLMs from OCI Generative AI.
+### Text Embeddings
 
 ```python
-from langchain_oci import OCIGenAI
+from langchain_oci import OCIGenAIEmbeddings
+
+embeddings = OCIGenAIEmbeddings(
+    model_id="cohere.embed-english-v3.0",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
 
-llm = OCIGenAI()
-llm.invoke("The meaning of life is")
+# Single query
+vector = embeddings.embed_query("What is machine learning?")
+
+# Multiple documents
+vectors = embeddings.embed_documents(["Doc 1", "Doc 2"])
 ```
 
-### 3. Use an Embedding Model
-`OCIGenAIEmbeddings` class exposes embeddings from OCI Generative AI.
+### Image Embeddings
 
 ```python
-from langchain_oci import OCIGenAIEmbeddings
+embeddings = OCIGenAIEmbeddings(
+    model_id="cohere.embed-v4.0",  # Multimodal model
+    ...
+)
+
+# Single image
+vector = embeddings.embed_image("./photo.jpg")
 
-embeddings = OCIGenAIEmbeddings()
-embeddings.embed_query("What is the meaning of life?")
+# Batch
+vectors = embeddings.embed_image_batch(["img1.jpg", "img2.jpg"])
 ```
 
-### 3b. Use Image Embeddings (Multimodal)
-`OCIGenAIEmbeddings` supports image embeddings with multimodal models like `cohere.embed-v4.0`.
+### Embedding Models
+
+| Model | Type | Dimensions |
+|-------|------|------------|
+| `cohere.embed-english-v3.0` | Text | 1024 |
+| `cohere.embed-multilingual-v3.0` | Text | 1024 |
+| `cohere.embed-v4.0` | Text + Image | 256-1536 |
+
+---
+
+## Async Support
+
+All chat models support async operations via LangChain's base classes:
 
 ```python
-from langchain_oci import OCIGenAIEmbeddings
+import asyncio
+from langchain_oci import ChatOCIGenAI
 
-embeddings = OCIGenAIEmbeddings(
-    model_id="cohere.embed-v4.0",
-    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
-    compartment_id="ocid1.compartment.oc1..xxxxx",
-)
+llm = ChatOCIGenAI(...)
 
-# Embed a single image (from file path, bytes, or data URI)
-image_vector = embeddings.embed_image("path/to/image.png")
+async def main():
+    # Single async request
+    response = await llm.ainvoke("Hello!")
 
-# Embed multiple images in a batch
-image_vectors = embeddings.embed_image_batch([
-    "path/to/image1.png",
-    "path/to/image2.jpg",
-    b"\x89PNG...",  # raw bytes
-])
+    # Async streaming
+    async for chunk in llm.astream("Tell me a story"):
+        print(chunk.content, end="")
 
-# Image and text embeddings share the same vector space for cross-modal retrieval
-text_vector = embeddings.embed_query("a photo of a cat")
+    # Concurrent requests
+    results = await asyncio.gather(
+        llm.ainvoke("Question 1"),
+        llm.ainvoke("Question 2"),
+        llm.ainvoke("Question 3"),
+    )
+
+asyncio.run(main())
 ```
 
-<sub>**Note:** Image embeddings require a multimodal model. Use `IMAGE_EMBEDDING_MODELS` to check supported models.</sub>
+---
 
-### 4. Use Structured Output
-`ChatOCIGenAI` supports structured output.
+## Tool Calling
 
-<sub>**Note:** The default method is `function_calling`. If default method returns `None` (e.g., for Google Gemini models using GeminiProvider), try `json_schema` or `json_mode`.</sub>
+### Basic Tools
 
 ```python
+from langchain_core.tools import tool
 from langchain_oci import ChatOCIGenAI
-from pydantic import BaseModel
 
-class Joke(BaseModel):
-    setup: str
-    punchline: str
+@tool
+def get_weather(city: str) -> str:
+    """Get weather for a city."""
+    return f"Weather in {city}: 72F, sunny"
+
+llm = ChatOCIGenAI(model_id="meta.llama-3.3-70b-instruct", ...)
+llm_with_tools = llm.bind_tools([get_weather])
 
-llm = ChatOCIGenAI()
-structured_llm = llm.with_structured_output(Joke)
-structured_llm.invoke("Tell me a joke about programming")
+response = llm_with_tools.invoke("What's the weather in Chicago?")
 ```
 
-### 5. Use OpenAI Responses API
-`ChatOCIOpenAI` supports OpenAI Responses API.
+### Parallel Tool Calls (Llama 4+)
 
 ```python
-from oci_openai import (
-    OciSessionAuth,
+llm_with_tools = llm.bind_tools(
+    [get_weather, get_time],
+    parallel_tool_calls=True,  # Enable parallel execution
 )
-from langchain_oci import ChatOCIOpenAI
-client = ChatOCIOpenAI(
-        auth=OciSessionAuth(profile_name="MY_PROFILE_NAME"),
-        compartment_id="MY_COMPARTMENT_ID",
-        region="us-chicago-1",
-        model="openai.gpt-4.1",
-        conversation_store_id="MY_CONVERSATION_STORE_ID"
-    )
-messages = [
-        (
-            "system",
-            "You are a helpful translator. Translate the user sentence to French.",
-        ),
-        ("human", "I love programming."),
-    ]
-response = client.invoke(messages)
 ```
-NOTE: By default `store` argument is set to `True` which requires passing `conversation_store_id`. You can set `store` to `False` and not pass `conversation_store_id`.
+
+### Tool Configuration
+
+| Parameter | Description |
+|-----------|-------------|
+| `parallel_tool_calls` | Enable parallel tool execution (Llama 4+) |
+| `max_sequential_tool_calls` | Limit consecutive tool calls (default: 8) |
+| `tool_result_guidance` | Guide model to use tool results naturally |
+| `tool_choice` | "auto", "required", "none", or tool name |
+
+---
+
+## Structured Output
+
 ```python
-from oci_openai import (
-    OciSessionAuth,
+from pydantic import BaseModel
+from langchain_oci import ChatOCIGenAI
+
+class Contact(BaseModel):
+    name: str
+    email: str
+
+llm = ChatOCIGenAI(model_id="meta.llama-3.3-70b-instruct", ...)
+structured_llm = llm.with_structured_output(Contact)
+
+result = structured_llm.invoke("Extract: John Doe john@example.com")
+print(result.name)   # "John Doe"
+print(result.email)  # "john@example.com"
+```
+
+### Methods
+
+| Method | Description |
+|--------|-------------|
+| `function_calling` | Default, most reliable |
+| `json_mode` | Simple schemas |
+| `json_schema` | Native OCI support |
+
+---
+
+## AI Agents
+
+### create_oci_agent()
+
+```python
+from langchain_core.tools import tool
+from langchain_oci import create_oci_agent
+
+@tool
+def search(query: str) -> str:
+    """Search for information."""
+    return f"Results for: {query}"
+
+agent = create_oci_agent(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    tools=[search],
+    compartment_id="ocid1.compartment.oc1..xxx",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    system_prompt="You are a helpful assistant.",
 )
-from langchain_oci import ChatOCIOpenAI
-client = ChatOCIOpenAI(
-        auth=OciSessionAuth(profile_name="MY_PROFILE_NAME"),
-        compartment_id="MY_COMPARTMENT_ID",
-        region="us-chicago-1",
-        model="openai.gpt-4.1",
-        store=False
-    )
-messages = [
-        (
-            "system",
-            "You are a helpful translator. Translate the user sentence to French.",
-        ),
-        ("human", "I love programming."),
-    ]
-response = client.invoke(messages)
+
+from langchain_core.messages import HumanMessage
+result = agent.invoke({
+    "messages": [HumanMessage(content="Search for Python tutorials")]
+})
 ```
 
-### 6. Use Parallel Tool Calling (Meta/Llama 4+ models only)
-Enable parallel tool calling to execute multiple tools simultaneously, improving performance for multi-tool workflows.
+### With Checkpointing
 
 ```python
-from langchain_oci import ChatOCIGenAI
+from langgraph.checkpoint.memory import MemorySaver
 
-llm = ChatOCIGenAI(
-    model_id="meta.llama-4-maverick-17b-128e-instruct-fp8",
-    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
-    compartment_id="MY_COMPARTMENT_ID",
+agent = create_oci_agent(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    tools=[search],
+    checkpointer=MemorySaver(),
+    ...
 )
 
-# Enable parallel tool calling in bind_tools
-llm_with_tools = llm.bind_tools(
-    [get_weather, calculate_tip, get_population],
-    parallel_tool_calls=True  # Tools can execute simultaneously
+# Conversations persist by thread_id
+result = agent.invoke(
+    {"messages": [HumanMessage(content="Hello")]},
+    config={"configurable": {"thread_id": "user_123"}},
+)
+```
+
+### Human-in-the-Loop
+
+```python
+agent = create_oci_agent(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    tools=[dangerous_action],
+    checkpointer=MemorySaver(),
+    interrupt_before=["tools"],  # Pause before tool execution
 )
 ```
 
-<sub>**Note:** Parallel tool calling is only supported for Llama 4+ models. Llama 3.x (including 3.3) and Cohere models will raise an error if this parameter is used.</sub>
+---
+
+## OpenAI Responses API
 
+```python
+from oci_openai import OciSessionAuth
+from langchain_oci import ChatOCIOpenAI
+
+client = ChatOCIOpenAI(
+    auth=OciSessionAuth(profile_name="MY_PROFILE"),
+    compartment_id="ocid1.compartment.oc1..xxx",
+    region="us-chicago-1",
+    model="openai.gpt-4.1",
+    conversation_store_id="ocid1.conversationstore...",  # Required if store=True
+)
+
+response = client.invoke([
+    ("system", "You are a helpful assistant."),
+    ("human", "Hello!"),
+])
+```
 
-## OCI Data Science Model Deployment Examples
+---
 
-### 1. Use a Chat Model
+## OCI Data Science Deployments
 
-You may instantiate the OCI Data Science model with the generic `ChatOCIModelDeployment` or framework specific class like `ChatOCIModelDeploymentVLLM`.
+### ChatOCIModelDeployment
 
 ```python
-from langchain_oci.chat_models import ChatOCIModelDeployment, ChatOCIModelDeploymentVLLM
+from langchain_oci.chat_models import ChatOCIModelDeployment
 
-# Create an instance of OCI Model Deployment Endpoint
-# Replace the endpoint uri with your own
 endpoint = "https://modeldeployment.<region>.oci.customer-oci.com/<ocid>/predict"
 
-messages = [
-    (
-        "system",
-        "You are a helpful assistant that translates English to French. Translate the user sentence.",
-    ),
-    ("human", "I love programming."),
-]
-
 chat = ChatOCIModelDeployment(
     endpoint=endpoint,
     streaming=True,
-    max_retries=1,
-    model_kwargs={
-        "temperature": 0.2,
-        "max_tokens": 512,
-    },  # other model params...
-    default_headers={
-        "route": "/v1/chat/completions",
-        # other request headers ...
-    },
+    model_kwargs={"temperature": 0.2, "max_tokens": 512},
 )
-chat.invoke(messages)
 
-chat_vllm = ChatOCIModelDeploymentVLLM(endpoint=endpoint)
-chat_vllm.invoke(messages)
+response = chat.invoke("Hello!")
 ```
 
-### 2. Use a Completion Model
-You may instantiate the OCI Data Science model with `OCIModelDeploymentLLM` or `OCIModelDeploymentVLLM`.
+### vLLM/TGI Deployments
 
 ```python
-from langchain_oci.llms import OCIModelDeploymentLLM, OCIModelDeploymentVLLM
+from langchain_oci.chat_models import ChatOCIModelDeploymentVLLM
 
-# Create an instance of OCI Model Deployment Endpoint
-# Replace the endpoint uri and model name with your own
-endpoint = "https://modeldeployment.<region>.oci.customer-oci.com/<ocid>/predict"
+chat = ChatOCIModelDeploymentVLLM(endpoint=endpoint)
+response = chat.invoke("Hello!")
+```
 
-llm = OCIModelDeploymentLLM(
-    endpoint=endpoint,
-    model="odsc-llm",
-)
-llm.invoke("Who is the first president of United States?")
+---
 
-vllm = OCIModelDeploymentVLLM(
-    endpoint=endpoint,
-)
-vllm.invoke("Who is the first president of United States?")
+## Provider Reference
+
+### Meta Llama
+
+```python
+# Vision models
+"meta.llama-3.2-90b-vision-instruct"
+"meta.llama-3.2-11b-vision-instruct"
+
+# Text models
+"meta.llama-3.3-70b-instruct"
+
+# Llama 4 (parallel tools)
+"meta.llama-4-scout-17b-16e-instruct"
+"meta.llama-4-maverick-17b-128e-instruct-fp8"
 ```
 
-### 3. Use an Embedding Model
-You may instantiate the OCI Data Science model with the `OCIModelDeploymentEndpointEmbeddings`.
+### Google Gemini
 
 ```python
-from langchain_oci.embeddings import OCIModelDeploymentEndpointEmbeddings
+"google.gemini-2.0-flash"       # Fast, multimodal
+"google.gemini-2.5-flash"       # Latest
+"google.gemini-2.5-pro"         # Most capable
+```
 
-# Create an instance of OCI Model Deployment Endpoint
-# Replace the endpoint uri with your own
-endpoint = "https://modeldeployment.<region>.oci.customer-oci.com/<ocid>/predict"
+### xAI Grok
 
-embeddings = OCIModelDeploymentEndpointEmbeddings(
-    endpoint=endpoint,
-)
+```python
+"xai.grok-4"                    # Vision + reasoning
+"xai.grok-4-fast-reasoning"     # Optimized reasoning
+```
+
+### Cohere
+
+```python
+"cohere.command-r-plus"         # Powerful reasoning
+"cohere.command-a-03-2025"      # Latest
+"cohere.command-a-vision"       # Vision support (V2 API)
+```
+
+---
+
+## Tutorials
+
+Comprehensive tutorials covering all features:
+
+| Tutorial | Description |
+|----------|-------------|
+| [01. Getting Started](./tutorials/01-getting-started/) | Authentication, basic chat |
+| [02. Vision & Multimodal](./tutorials/02-vision-and-multimodal/) | Images, PDFs, video, audio |
+| [03. Building AI Agents](./tutorials/03-building-ai-agents/) | create_oci_agent, checkpointing |
+| [04. Tool Calling Mastery](./tutorials/04-tool-calling-mastery/) | Parallel tools, workflows |
+| [05. Structured Output](./tutorials/05-structured-output/) | Pydantic, JSON modes |
+| [07. Async for Production](./tutorials/07-async-for-production/) | ainvoke, astream, FastAPI |
+| [10. Embeddings](./tutorials/10-embeddings/) | Text & image embeddings, RAG |
+
+See [tutorials/README.md](./tutorials/README.md) for the full learning path.
+
+---
 
-query = "Hello World!"
-embeddings.embed_query(query)
+## Troubleshooting
 
-documents = ["This is a sample document", "and here is another one"]
-embeddings.embed_documents(documents)
+### Authentication Errors
+
+```
+AuthenticationError: Could not authenticate
+```
+- Verify `~/.oci/config` exists and is valid
+- Check profile name matches `auth_profile`
+- For session auth: `oci session authenticate --profile-name MY_PROFILE`
+
+### Model Not Found
+
+```
+NotAuthorizedOrNotFound: model_id
 ```
+- Verify model ID spelling
+- Check model is available in your region
+- Ensure compartment has GenAI access
+
+### Tool Calling Issues
+
+```
+Model keeps calling the same tool
+```
+- Enable `tool_result_guidance=True`
+- Set `max_sequential_tool_calls` limit
+- Check tool returns informative results
+
+### Vision Not Working
+
+```
+Content type not supported
+```
+- Verify using a vision-capable model (`is_vision_model()`)
+- Check image format (PNG, JPEG, GIF, WebP)
+- Reduce image size if too large
+
+---
+
+## API Reference
+
+### Chat Models
+
+| Class | Description |
+|-------|-------------|
+| `ChatOCIGenAI` | Main chat model for OCI GenAI |
+| `ChatOCIOpenAI` | OpenAI Responses API compatibility |
+| `ChatOCIModelDeployment` | Custom OCI Data Science deployments |
+
+### Embeddings
+
+| Class | Description |
+|-------|-------------|
+| `OCIGenAIEmbeddings` | Text and image embeddings |
+| `OCIModelDeploymentEndpointEmbeddings` | Custom deployment embeddings |
+
+### Agents
+
+| Function | Description |
+|----------|-------------|
+| `create_oci_agent()` | Create ReAct agent with tools |
+
+### Utilities
+
+| Function | Description |
+|----------|-------------|
+| `load_image()` | Load image for vision models |
+| `encode_image()` | Encode bytes for vision models |
+| `to_data_uri()` | Convert to data URI |
+| `is_vision_model()` | Check vision support |
+
+---
+
+## Contributing
+
+See [CONTRIBUTING.md](../../CONTRIBUTING.md) for development setup and guidelines.
+
+## License
+
+This project is licensed under the [Universal Permissive License (UPL) 1.0](https://opensource.org/licenses/UPL).
diff --git a/libs/oci/tutorials/01-getting-started/README.md b/libs/oci/tutorials/01-getting-started/README.md
new file mode 100644
index 00000000..b43d086b
--- /dev/null
+++ b/libs/oci/tutorials/01-getting-started/README.md
@@ -0,0 +1,338 @@
+# Tutorial 01: Getting Started with OCI GenAI
+
+Welcome to langchain-oci! This tutorial will get you up and running with Oracle Cloud Infrastructure's Generative AI service integrated with LangChain.
+
+## What You'll Build
+
+By the end of this tutorial, you'll be able to:
+- Configure authentication for OCI Generative AI
+- Create your first chat conversation
+- Understand providers and model selection
+- Use streaming responses for real-time output
+
+## Prerequisites
+
+- An OCI account with access to Generative AI service
+- Python 3.9 or higher
+- OCI CLI configured (for API key authentication)
+
+## Concepts Covered
+
+| Concept | Description |
+|---------|-------------|
+| `ChatOCIGenAI` | Main chat model class |
+| Authentication | 4 methods: API Key, Instance Principal, Resource Principal, Session Token |
+| Providers | Meta, Cohere, Google (Gemini), xAI (Grok) |
+| `invoke()` | Send a message and get a response |
+| `stream()` | Get streaming responses |
+
+---
+
+## Part 1: Installation & Setup
+
+### Install the Package
+
+```bash
+pip install langchain-oci oci
+```
+
+### Configure OCI CLI (API Key Authentication)
+
+If you haven't already, set up the OCI CLI:
+
+```bash
+oci setup config
+```
+
+This creates `~/.oci/config` with your credentials. The default profile is named `DEFAULT`.
+
+---
+
+## Part 2: Your First Chat
+
+Let's start with the simplest possible example:
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+# Create a chat model
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..your-compartment-id",
+)
+
+# Send a message
+response = llm.invoke("What is the capital of France?")
+print(response.content)
+```
+
+**Output:**
+```
+The capital of France is Paris.
+```
+
+### Understanding the Parameters
+
+| Parameter | Required | Description |
+|-----------|----------|-------------|
+| `model_id` | Yes | The model to use (e.g., `meta.llama-3.3-70b-instruct`) |
+| `service_endpoint` | Yes | Regional endpoint for GenAI service |
+| `compartment_id` | Yes | Your OCI compartment OCID |
+| `auth_type` | No | Authentication method (default: `API_KEY`) |
+| `auth_profile` | No | Profile name in `~/.oci/config` (default: `DEFAULT`) |
+
+### Service Endpoints by Region
+
+| Region | Endpoint |
+|--------|----------|
+| Chicago | `https://inference.generativeai.us-chicago-1.oci.oraclecloud.com` |
+| Frankfurt | `https://inference.generativeai.eu-frankfurt-1.oci.oraclecloud.com` |
+
+---
+
+## Part 3: Authentication Methods
+
+### Method 1: API Key (Default)
+
+Uses credentials from `~/.oci/config`. This is the default and simplest method:
+
+```python
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+    auth_type="API_KEY",          # Optional, this is the default
+    auth_profile="DEFAULT",        # Optional, uses DEFAULT profile
+)
+```
+
+### Method 2: Security Token (Session-Based)
+
+For interactive sessions with temporary credentials:
+
+```bash
+# First, authenticate
+oci session authenticate --profile-name MY_PROFILE
+```
+
+```python
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+    auth_type="SECURITY_TOKEN",
+    auth_profile="MY_PROFILE",
+)
+```
+
+### Method 3: Instance Principal
+
+For applications running on OCI compute instances:
+
+```python
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+    auth_type="INSTANCE_PRINCIPAL",
+)
+```
+
+### Method 4: Resource Principal
+
+For OCI Functions and other resources:
+
+```python
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+    auth_type="RESOURCE_PRINCIPAL",
+)
+```
+
+---
+
+## Part 4: Choosing a Provider & Model
+
+### Available Providers
+
+| Provider | Models | Strengths |
+|----------|--------|-----------|
+| **Meta** | Llama 3.2, 3.3, 4 | Excellent general-purpose, tool calling |
+| **Cohere** | Command R+, Command A | RAG, document processing |
+| **Google** | Gemini 2.0 Flash, 2.5 | Multimodal (PDF, video, audio) |
+| **xAI** | Grok 4 | Fast reasoning, vision |
+
+### Popular Model IDs
+
+```python
+# Meta Llama models
+"meta.llama-3.3-70b-instruct"           # Latest text model
+"meta.llama-3.2-90b-vision-instruct"    # Vision-capable
+"meta.llama-4-scout-17b-16e-instruct"   # Llama 4 with parallel tools
+
+# Cohere models
+"cohere.command-r-plus"                  # Powerful reasoning
+"cohere.command-a-03-2025"              # Latest, with vision
+
+# Google Gemini models
+"google.gemini-2.0-flash"               # Fast, multimodal
+
+# xAI Grok models
+"xai.grok-4"                            # Reasoning and vision
+```
+
+### Provider Detection
+
+The provider is automatically detected from the model ID:
+
+```python
+# Auto-detects "meta" provider
+llm = ChatOCIGenAI(model_id="meta.llama-3.3-70b-instruct", ...)
+
+# Or specify explicitly
+llm = ChatOCIGenAI(model_id="my-custom-model", provider="meta", ...)
+```
+
+---
+
+## Part 5: Conversations with Messages
+
+For multi-turn conversations, use LangChain message types:
+
+```python
+from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Multi-turn conversation
+messages = [
+    SystemMessage(content="You are a helpful cooking assistant."),
+    HumanMessage(content="I have chicken, rice, and vegetables."),
+    AIMessage(content="Great! You could make a stir-fry or a chicken rice bowl."),
+    HumanMessage(content="How do I make a stir-fry?"),
+]
+
+response = llm.invoke(messages)
+print(response.content)
+```
+
+---
+
+## Part 6: Streaming Responses
+
+For real-time output, use streaming:
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Stream the response
+for chunk in llm.stream("Tell me a short story about a robot."):
+    print(chunk.content, end="", flush=True)
+```
+
+---
+
+## Part 7: Model Parameters
+
+Fine-tune model behavior with `model_kwargs`:
+
+```python
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+    model_kwargs={
+        "temperature": 0.7,      # Creativity (0.0 = deterministic, 1.0 = creative)
+        "max_tokens": 500,       # Maximum response length
+        "top_p": 0.9,           # Nucleus sampling
+        "top_k": 50,            # Top-k sampling
+    }
+)
+```
+
+### Parameter Reference
+
+| Parameter | Range | Effect |
+|-----------|-------|--------|
+| `temperature` | 0.0 - 1.0 | Higher = more creative, lower = more focused |
+| `max_tokens` | 1 - 4096+ | Maximum tokens in the response |
+| `top_p` | 0.0 - 1.0 | Nucleus sampling cutoff |
+| `top_k` | 1 - 500 | Number of top tokens to consider |
+
+---
+
+## Summary
+
+In this tutorial, you learned:
+
+1. **Installation** - `pip install langchain-oci oci`
+2. **Basic chat** - Using `ChatOCIGenAI` with `invoke()`
+3. **Authentication** - 4 methods (API Key, Session, Instance Principal, Resource Principal)
+4. **Providers** - Meta, Cohere, Google, xAI and their model families
+5. **Conversations** - Multi-turn chat with message types
+6. **Streaming** - Real-time responses with `stream()`
+7. **Parameters** - Fine-tuning with `model_kwargs`
+
+## Next Steps
+
+- **[Tutorial 02: Vision & Multimodal](../02-vision-and-multimodal/)** - Analyze images, PDFs, and videos
+- **[Tutorial 03: Building AI Agents](../03-building-ai-agents/)** - Create autonomous agents with tools
+
+## API Reference
+
+| Class/Function | Description |
+|----------------|-------------|
+| `ChatOCIGenAI` | Main chat model class |
+| `invoke(input)` | Send messages, get response |
+| `stream(input)` | Stream response chunks |
+| `batch(inputs)` | Process multiple inputs |
+
+## Troubleshooting
+
+### "Authentication failed"
+- Verify `~/.oci/config` exists and contains valid credentials
+- Check that your profile name matches `auth_profile`
+- Ensure your API key hasn't expired
+
+### "NotAuthorizedOrNotFound"
+- Verify `compartment_id` is correct
+- Check you have permissions for GenAI service in that compartment
+
+### "InvalidParameter: model_id"
+- Ensure model ID is spelled correctly
+- Check model is available in your region
+
+---
+
+## Appendix: Legacy OCIGenAI LLM Class
+
+For text completion (non-chat) use cases, the legacy `OCIGenAI` class is available:
+
+```python
+from langchain_oci import OCIGenAI
+
+llm = OCIGenAI(
+    model_id="cohere.command-r-plus",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Text completion (not chat)
+response = llm.invoke("Complete this sentence: The quick brown fox")
+```
+
+**Note:** For most use cases, prefer `ChatOCIGenAI` over `OCIGenAI`.
diff --git a/libs/oci/tutorials/01-getting-started/code/auth_examples.py b/libs/oci/tutorials/01-getting-started/code/auth_examples.py
new file mode 100644
index 00000000..a80c3a9f
--- /dev/null
+++ b/libs/oci/tutorials/01-getting-started/code/auth_examples.py
@@ -0,0 +1,77 @@
+# Tutorial 01: Authentication Examples
+# Demonstrates the 4 authentication methods for OCI Generative AI
+
+from langchain_oci import ChatOCIGenAI
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+MODEL_ID = "meta.llama-3.3-70b-instruct"
+
+
+def example_api_key():
+    """Method 1: API Key Authentication (Default)
+
+    Uses credentials from ~/.oci/config file.
+    This is the most common method for local development.
+    """
+    llm = ChatOCIGenAI(
+        model_id=MODEL_ID,
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+        auth_type="API_KEY",       # Optional, this is the default
+        auth_profile="DEFAULT",     # Optional, uses DEFAULT profile
+    )
+    return llm.invoke("Hello!")
+
+
+def example_security_token():
+    """Method 2: Security Token (Session-Based)
+
+    First run: oci session authenticate --profile-name MY_PROFILE
+    Uses temporary session credentials.
+    """
+    llm = ChatOCIGenAI(
+        model_id=MODEL_ID,
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+        auth_type="SECURITY_TOKEN",
+        auth_profile="MY_PROFILE",
+    )
+    return llm.invoke("Hello!")
+
+
+def example_instance_principal():
+    """Method 3: Instance Principal
+
+    For applications running on OCI Compute instances.
+    No credentials needed - uses instance metadata.
+    """
+    llm = ChatOCIGenAI(
+        model_id=MODEL_ID,
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+        auth_type="INSTANCE_PRINCIPAL",
+    )
+    return llm.invoke("Hello!")
+
+
+def example_resource_principal():
+    """Method 4: Resource Principal
+
+    For OCI Functions and other OCI resources.
+    No credentials needed - uses resource metadata.
+    """
+    llm = ChatOCIGenAI(
+        model_id=MODEL_ID,
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+        auth_type="RESOURCE_PRINCIPAL",
+    )
+    return llm.invoke("Hello!")
+
+
+if __name__ == "__main__":
+    # Try API Key authentication (default)
+    print("Testing API Key authentication...")
+    response = example_api_key()
+    print(f"Response: {response.content}")
diff --git a/libs/oci/tutorials/01-getting-started/code/basic_chat.py b/libs/oci/tutorials/01-getting-started/code/basic_chat.py
new file mode 100644
index 00000000..5a28e749
--- /dev/null
+++ b/libs/oci/tutorials/01-getting-started/code/basic_chat.py
@@ -0,0 +1,29 @@
+# Tutorial 01: Basic Chat Example
+# This is the simplest way to use OCI Generative AI with LangChain
+
+from langchain_oci import ChatOCIGenAI
+
+# Configuration - replace with your values
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+# Create the chat model
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint=SERVICE_ENDPOINT,
+    compartment_id=COMPARTMENT_ID,
+    model_kwargs={
+        "temperature": 0.7,
+        "max_tokens": 500,
+    }
+)
+
+# Simple invocation
+response = llm.invoke("What is the capital of France?")
+print(f"Response: {response.content}")
+
+# Streaming response
+print("\nStreaming response:")
+for chunk in llm.stream("Tell me 3 interesting facts about Paris."):
+    print(chunk.content, end="", flush=True)
+print()  # Newline at the end
diff --git a/libs/oci/tutorials/01-getting-started/code/conversation_example.py b/libs/oci/tutorials/01-getting-started/code/conversation_example.py
new file mode 100644
index 00000000..182c6c0d
--- /dev/null
+++ b/libs/oci/tutorials/01-getting-started/code/conversation_example.py
@@ -0,0 +1,44 @@
+# Tutorial 01: Multi-turn Conversation Example
+# Demonstrates how to maintain conversation context
+
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+
+from langchain_oci import ChatOCIGenAI
+
+# Configuration
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+# Create chat model
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint=SERVICE_ENDPOINT,
+    compartment_id=COMPARTMENT_ID,
+)
+
+# Multi-turn conversation with context
+messages = [
+    SystemMessage(content="You are a helpful cooking assistant. Be concise."),
+    HumanMessage(content="I have chicken, rice, and vegetables."),
+]
+
+# First turn
+print("User: I have chicken, rice, and vegetables.")
+response = llm.invoke(messages)
+print(f"Assistant: {response.content}\n")
+
+# Add assistant response to history
+messages.append(AIMessage(content=response.content))
+
+# Second turn
+messages.append(HumanMessage(content="How do I make a stir-fry?"))
+print("User: How do I make a stir-fry?")
+response = llm.invoke(messages)
+print(f"Assistant: {response.content}\n")
+
+# Add to history and continue
+messages.append(AIMessage(content=response.content))
+messages.append(HumanMessage(content="What sauce should I use?"))
+print("User: What sauce should I use?")
+response = llm.invoke(messages)
+print(f"Assistant: {response.content}")
diff --git a/libs/oci/tutorials/02-vision-and-multimodal/README.md b/libs/oci/tutorials/02-vision-and-multimodal/README.md
new file mode 100644
index 00000000..b5c758c3
--- /dev/null
+++ b/libs/oci/tutorials/02-vision-and-multimodal/README.md
@@ -0,0 +1,452 @@
+# Tutorial 02: Vision & Multimodal
+
+Learn how to analyze images, documents, videos, and audio with OCI Generative AI vision-capable models.
+
+## What You'll Build
+
+By the end of this tutorial, you'll be able to:
+- Identify vision-capable models
+- Load and encode images for analysis
+- Analyze single and multiple images
+- Process PDFs with Gemini models
+- Handle video and audio content
+
+## Prerequisites
+
+- Completed [Tutorial 01: Getting Started](../01-getting-started/)
+- An OCI compartment with Generative AI access
+
+## Concepts Covered
+
+| Concept | Description |
+|---------|-------------|
+| `VISION_MODELS` | Registry of vision-capable models |
+| `load_image()` | Load an image file as a content block |
+| `encode_image()` | Encode raw bytes as a content block |
+| `to_data_uri()` | Convert image to data URI string |
+| `is_vision_model()` | Check if a model supports vision |
+
+---
+
+## Part 1: Vision-Capable Models
+
+Not all models can process images. Here are the vision-capable models available in OCI Generative AI:
+
+### Model Registry
+
+```python
+from langchain_oci.utils.vision import VISION_MODELS
+
+print(VISION_MODELS)
+```
+
+**Output:**
+```python
+[
+    # Meta Llama Vision
+    "meta.llama-3.2-90b-vision-instruct",
+    "meta.llama-3.2-11b-vision-instruct",
+    "meta.llama-4-scout-17b-16e-instruct",
+    "meta.llama-4-maverick-17b-128e-instruct-fp8",
+    # Google Gemini
+    "google.gemini-2.5-flash",
+    "google.gemini-2.5-pro",
+    "google.gemini-2.5-flash-lite",
+    # xAI Grok
+    "xai.grok-4",
+    "xai.grok-4-1-fast-reasoning",
+    "xai.grok-4-1-fast-non-reasoning",
+    "xai.grok-4-fast-reasoning",
+    "xai.grok-4-fast-non-reasoning",
+    # Cohere Command A
+    "cohere.command-a-vision",
+]
+```
+
+### Check If a Model Supports Vision
+
+```python
+from langchain_oci.utils.vision import is_vision_model
+
+# Returns True
+is_vision_model("meta.llama-3.2-90b-vision-instruct")
+
+# Returns False
+is_vision_model("meta.llama-3.3-70b-instruct")
+```
+
+---
+
+## Part 2: Loading Images
+
+### Method 1: From File Path (`load_image`)
+
+The simplest way to use images:
+
+```python
+from langchain_oci import load_image
+
+# Load from file path
+image_block = load_image("./photo.jpg")
+
+# Returns: {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}}
+```
+
+### Method 2: From Bytes (`encode_image`)
+
+For images from HTTP responses, PIL, or other sources:
+
+```python
+import requests
+from langchain_oci import encode_image
+
+# From HTTP response
+response = requests.get("https://example.com/image.png")
+image_block = encode_image(response.content, mime_type="image/png")
+
+# From PIL Image
+from PIL import Image
+import io
+
+pil_image = Image.open("photo.jpg")
+buffer = io.BytesIO()
+pil_image.save(buffer, format="PNG")
+image_block = encode_image(buffer.getvalue(), mime_type="image/png")
+```
+
+### Method 3: Direct Data URI (`to_data_uri`)
+
+For lower-level control:
+
+```python
+from langchain_oci.utils.vision import to_data_uri
+
+# From file path
+uri = to_data_uri("photo.jpg")
+# "data:image/jpeg;base64,/9j/4AAQ..."
+
+# From bytes
+uri = to_data_uri(image_bytes, mime_type="image/png")
+# "data:image/png;base64,iVBORw0KGgo..."
+
+# Passthrough existing data URIs
+uri = to_data_uri("data:image/png;base64,iVBORw0...")
+# "data:image/png;base64,iVBORw0..."
+```
+
+---
+
+## Part 3: Single Image Analysis
+
+Let's analyze an image with Meta Llama Vision:
+
+```python
+from langchain_core.messages import HumanMessage
+from langchain_oci import ChatOCIGenAI, load_image
+
+# Create vision-capable model
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.2-90b-vision-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Create message with text and image
+message = HumanMessage(
+    content=[
+        {"type": "text", "text": "What's in this image? Describe it in detail."},
+        load_image("./sunset.jpg"),
+    ]
+)
+
+# Get response
+response = llm.invoke([message])
+print(response.content)
+```
+
+**Output:**
+```
+The image shows a beautiful sunset over the ocean. The sky is painted
+in shades of orange, pink, and purple, with wispy clouds scattered
+across the horizon. The sun is partially visible, casting a warm
+golden glow across the calm water...
+```
+
+---
+
+## Part 4: Comparing Multiple Images
+
+Vision models can analyze multiple images in one request:
+
+```python
+from langchain_core.messages import HumanMessage
+from langchain_oci import ChatOCIGenAI, load_image
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.2-90b-vision-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Compare two images
+message = HumanMessage(
+    content=[
+        {"type": "text", "text": "Compare these two images. What are the similarities and differences?"},
+        load_image("./living_room_before.jpg"),
+        load_image("./living_room_after.jpg"),
+    ]
+)
+
+response = llm.invoke([message])
+print(response.content)
+```
+
+### Product Comparison Example
+
+```python
+# Compare product images
+message = HumanMessage(
+    content=[
+        {"type": "text", "text": "Which laptop appears more suitable for gaming? Why?"},
+        load_image("./laptop_a.jpg"),
+        load_image("./laptop_b.jpg"),
+    ]
+)
+```
+
+---
+
+## Part 5: Gemini Multimodal - PDF Processing
+
+Google Gemini models can process PDFs natively:
+
+```python
+import base64
+from langchain_core.messages import HumanMessage
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="google.gemini-2.0-flash",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Load and encode PDF
+with open("document.pdf", "rb") as f:
+    pdf_data = base64.b64encode(f.read()).decode("utf-8")
+
+message = HumanMessage(
+    content=[
+        {"type": "text", "text": "Summarize this PDF document."},
+        {
+            "type": "media",
+            "data": pdf_data,
+            "mime_type": "application/pdf"
+        },
+    ]
+)
+
+response = llm.invoke([message])
+print(response.content)
+```
+
+### PDF Use Cases
+
+```python
+# Extract key points
+"Extract the main points from this contract."
+
+# Data extraction
+"Extract all dates, amounts, and party names from this invoice."
+
+# Question answering
+"According to this document, what are the payment terms?"
+
+# Translation
+"Translate this PDF from Spanish to English."
+```
+
+---
+
+## Part 6: Video Analysis with Gemini
+
+Gemini models can analyze video content:
+
+```python
+import base64
+from langchain_core.messages import HumanMessage
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="google.gemini-2.0-flash",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Load and encode video
+with open("clip.mp4", "rb") as f:
+    video_data = base64.b64encode(f.read()).decode("utf-8")
+
+message = HumanMessage(
+    content=[
+        {"type": "text", "text": "Describe what's happening in this video."},
+        {
+            "type": "media",
+            "data": video_data,
+            "mime_type": "video/mp4"
+        },
+    ]
+)
+
+response = llm.invoke([message])
+print(response.content)
+```
+
+### Video Analysis Use Cases
+
+```python
+# Action recognition
+"What activities are shown in this video?"
+
+# Safety analysis
+"Are there any safety hazards visible in this workplace footage?"
+
+# Content moderation
+"Does this video contain any inappropriate content?"
+
+# Event summarization
+"Summarize the key moments from this meeting recording."
+```
+
+---
+
+## Part 7: Audio Analysis with Gemini
+
+Gemini can also transcribe and analyze audio:
+
+```python
+import base64
+from langchain_core.messages import HumanMessage
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="google.gemini-2.0-flash",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Load and encode audio
+with open("recording.mp3", "rb") as f:
+    audio_data = base64.b64encode(f.read()).decode("utf-8")
+
+message = HumanMessage(
+    content=[
+        {"type": "text", "text": "Transcribe this audio and summarize the key points."},
+        {
+            "type": "media",
+            "data": audio_data,
+            "mime_type": "audio/mp3"
+        },
+    ]
+)
+
+response = llm.invoke([message])
+print(response.content)
+```
+
+---
+
+## Part 8: Provider-Specific Vision Support
+
+### Meta Llama Vision
+
+Best for: General image analysis, detailed descriptions
+
+```python
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.2-90b-vision-instruct",  # or 11b for faster
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+```
+
+### Google Gemini
+
+Best for: Multimodal (PDF, video, audio), complex reasoning
+
+```python
+llm = ChatOCIGenAI(
+    model_id="google.gemini-2.0-flash",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+```
+
+### xAI Grok Vision
+
+Best for: Fast reasoning with vision
+
+```python
+llm = ChatOCIGenAI(
+    model_id="xai.grok-4",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+```
+
+### Cohere Command A Vision
+
+Best for: Document understanding, RAG with images
+
+```python
+llm = ChatOCIGenAI(
+    model_id="cohere.command-a-vision",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+```
+
+---
+
+## Summary
+
+In this tutorial, you learned:
+
+1. **Vision models** - 13+ models that support image input
+2. **Loading images** - `load_image()` for files, `encode_image()` for bytes
+3. **Image analysis** - Single and multi-image analysis
+4. **Gemini multimodal** - PDF, video, and audio processing
+5. **Provider differences** - Choosing the right model for your use case
+
+## Next Steps
+
+- **[Tutorial 03: Building AI Agents](../03-building-ai-agents/)** - Create autonomous agents with tools
+- **[Tutorial 10: Embeddings](../10-embeddings/)** - Image embeddings for search
+
+## API Reference
+
+| Function | Description |
+|----------|-------------|
+| `load_image(path)` | Load image file as content block |
+| `encode_image(bytes, mime_type)` | Encode bytes as content block |
+| `to_data_uri(image, mime_type)` | Convert to data URI string |
+| `is_vision_model(model_id)` | Check if model supports vision |
+| `VISION_MODELS` | List of vision-capable models |
+
+## Troubleshooting
+
+### "Content type not supported"
+- Ensure you're using a vision-capable model
+- Check the image format is supported (PNG, JPEG, GIF, WebP)
+
+### "Image too large"
+- Resize the image before encoding
+- Maximum size varies by model (typically 20MB)
+
+### "PDF not rendering"
+- PDF support is Gemini-only
+- Ensure the file is a valid PDF
+
+### "Video analysis slow"
+- Video analysis is computationally intensive
+- Consider extracting key frames for faster processing
diff --git a/libs/oci/tutorials/02-vision-and-multimodal/code/image_analysis.py b/libs/oci/tutorials/02-vision-and-multimodal/code/image_analysis.py
new file mode 100644
index 00000000..8a4788b2
--- /dev/null
+++ b/libs/oci/tutorials/02-vision-and-multimodal/code/image_analysis.py
@@ -0,0 +1,77 @@
+# Tutorial 02: Image Analysis Example
+# Demonstrates single and multi-image analysis with vision models
+
+from langchain_core.messages import HumanMessage
+
+from langchain_oci import ChatOCIGenAI, load_image
+from langchain_oci.utils.vision import VISION_MODELS, is_vision_model
+
+# Configuration
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+def list_vision_models():
+    """List all available vision-capable models."""
+    print("Vision-capable models:")
+    for model in VISION_MODELS:
+        print(f"  - {model}")
+
+
+def check_model_capability(model_id: str):
+    """Check if a model supports vision."""
+    if is_vision_model(model_id):
+        print(f"{model_id} supports vision")
+    else:
+        print(f"{model_id} does NOT support vision")
+
+
+def analyze_single_image(image_path: str):
+    """Analyze a single image."""
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.2-90b-vision-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    message = HumanMessage(
+        content=[
+            {"type": "text", "text": "Describe this image in detail."},
+            load_image(image_path),
+        ]
+    )
+
+    response = llm.invoke([message])
+    print(f"Analysis: {response.content}")
+
+
+def compare_images(image_path_1: str, image_path_2: str):
+    """Compare two images."""
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.2-90b-vision-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    message = HumanMessage(
+        content=[
+            {"type": "text", "text": "Compare these two images. What are the key differences?"},
+            load_image(image_path_1),
+            load_image(image_path_2),
+        ]
+    )
+
+    response = llm.invoke([message])
+    print(f"Comparison: {response.content}")
+
+
+if __name__ == "__main__":
+    # List available vision models
+    list_vision_models()
+
+    # Check model capability
+    check_model_capability("meta.llama-3.2-90b-vision-instruct")
+    check_model_capability("meta.llama-3.3-70b-instruct")
+
+    # Uncomment to analyze an image:
+    # analyze_single_image("path/to/your/image.jpg")
diff --git a/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py b/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py
new file mode 100644
index 00000000..c3a34ef0
--- /dev/null
+++ b/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py
@@ -0,0 +1,84 @@
+# Tutorial 02: PDF Processing with Gemini
+# Demonstrates how to analyze PDF documents using Google Gemini
+
+import base64
+
+from langchain_core.messages import HumanMessage
+
+from langchain_oci import ChatOCIGenAI
+
+# Configuration
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+def analyze_pdf(pdf_path: str, prompt: str):
+    """Analyze a PDF document with Gemini."""
+    # Create Gemini model
+    llm = ChatOCIGenAI(
+        model_id="google.gemini-2.0-flash",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    # Load and encode PDF
+    with open(pdf_path, "rb") as f:
+        pdf_data = base64.b64encode(f.read()).decode("utf-8")
+
+    # Create message with PDF
+    message = HumanMessage(
+        content=[
+            {"type": "text", "text": prompt},
+            {
+                "type": "media",
+                "data": pdf_data,
+                "mime_type": "application/pdf"
+            },
+        ]
+    )
+
+    # Get response
+    response = llm.invoke([message])
+    return response.content
+
+
+def summarize_document(pdf_path: str):
+    """Summarize a PDF document."""
+    return analyze_pdf(pdf_path, "Summarize this document in 3-5 bullet points.")
+
+
+def extract_key_data(pdf_path: str):
+    """Extract key data from a PDF (e.g., invoice, contract)."""
+    return analyze_pdf(
+        pdf_path,
+        "Extract the following from this document: "
+        "1. All dates mentioned "
+        "2. All monetary amounts "
+        "3. Names of parties involved "
+        "Format as a structured list."
+    )
+
+
+def answer_question(pdf_path: str, question: str):
+    """Answer a question about a PDF document."""
+    return analyze_pdf(pdf_path, question)
+
+
+if __name__ == "__main__":
+    # Example usage (uncomment and provide a PDF path):
+    # pdf_file = "path/to/your/document.pdf"
+
+    # Summarize
+    # summary = summarize_document(pdf_file)
+    # print(f"Summary:\n{summary}")
+
+    # Extract data
+    # data = extract_key_data(pdf_file)
+    # print(f"Extracted Data:\n{data}")
+
+    # Ask a question
+    # answer = answer_question(pdf_file, "What are the payment terms?")
+    # print(f"Answer:\n{answer}")
+
+    print("PDF Processing Example")
+    print("Uncomment the code above and provide a PDF path to test.")
diff --git a/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py b/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py
new file mode 100644
index 00000000..ad506759
--- /dev/null
+++ b/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py
@@ -0,0 +1,98 @@
+# Tutorial 02: Video Analysis with Gemini
+# Demonstrates how to analyze video content using Google Gemini
+
+import base64
+
+from langchain_core.messages import HumanMessage
+
+from langchain_oci import ChatOCIGenAI
+
+# Configuration
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+def analyze_video(video_path: str, prompt: str):
+    """Analyze a video file with Gemini."""
+    llm = ChatOCIGenAI(
+        model_id="google.gemini-2.0-flash",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+        model_kwargs={
+            "max_tokens": 2000,  # Videos may need longer responses
+        }
+    )
+
+    # Load and encode video
+    with open(video_path, "rb") as f:
+        video_data = base64.b64encode(f.read()).decode("utf-8")
+
+    # Determine mime type from extension
+    if video_path.endswith(".mp4"):
+        mime_type = "video/mp4"
+    elif video_path.endswith(".webm"):
+        mime_type = "video/webm"
+    elif video_path.endswith(".mov"):
+        mime_type = "video/quicktime"
+    else:
+        mime_type = "video/mp4"  # Default
+
+    # Create message with video
+    message = HumanMessage(
+        content=[
+            {"type": "text", "text": prompt},
+            {
+                "type": "media",
+                "data": video_data,
+                "mime_type": mime_type
+            },
+        ]
+    )
+
+    response = llm.invoke([message])
+    return response.content
+
+
+def describe_video(video_path: str):
+    """Get a detailed description of video content."""
+    return analyze_video(
+        video_path,
+        "Describe what's happening in this video. "
+        "Include: actions, people/objects, setting, and timeline of events."
+    )
+
+
+def extract_key_moments(video_path: str):
+    """Extract key moments from a video."""
+    return analyze_video(
+        video_path,
+        "Identify and describe the key moments in this video. "
+        "For each moment, provide: timestamp (if visible), what happens, "
+        "and why it's significant."
+    )
+
+
+def check_for_safety_issues(video_path: str):
+    """Analyze video for safety/compliance issues."""
+    return analyze_video(
+        video_path,
+        "Analyze this video for any safety hazards or compliance issues. "
+        "List any concerns found with descriptions."
+    )
+
+
+if __name__ == "__main__":
+    # Example usage (uncomment and provide a video path):
+    # video_file = "path/to/your/video.mp4"
+
+    # Describe video
+    # description = describe_video(video_file)
+    # print(f"Description:\n{description}")
+
+    # Extract key moments
+    # moments = extract_key_moments(video_file)
+    # print(f"Key Moments:\n{moments}")
+
+    print("Video Analysis Example")
+    print("Uncomment the code above and provide a video path to test.")
+    print("Note: Video analysis can take longer due to file size.")
diff --git a/libs/oci/tutorials/03-building-ai-agents/README.md b/libs/oci/tutorials/03-building-ai-agents/README.md
new file mode 100644
index 00000000..c9ab8a9c
--- /dev/null
+++ b/libs/oci/tutorials/03-building-ai-agents/README.md
@@ -0,0 +1,354 @@
+# Tutorial 03: Building AI Agents
+
+Learn how to create autonomous AI agents that can use tools, maintain memory, and interact with users.
+
+## What You'll Build
+
+By the end of this tutorial, you'll be able to:
+- Create a ReAct agent with `create_oci_agent()`
+- Define tools for your agent to use
+- Add memory with checkpointing
+- Implement human-in-the-loop workflows
+- Integrate with LangGraph for complex agent patterns
+
+## Prerequisites
+
+- Completed [Tutorial 01: Getting Started](../01-getting-started/)
+- Install LangGraph: `pip install langgraph`
+
+## Concepts Covered
+
+| Concept | Description |
+|---------|-------------|
+| `create_oci_agent()` | Factory function to create agents |
+| `@tool` decorator | Define tools for agents |
+| Checkpointing | Persist conversation state |
+| Human-in-the-loop | Pause for user approval |
+| LangGraph | Graph-based agent orchestration |
+
+---
+
+## Part 1: What is an AI Agent?
+
+An **AI agent** is a system that:
+1. **Reasons** about what to do (using an LLM)
+2. **Acts** by calling tools
+3. **Observes** the results
+4. **Repeats** until the task is complete
+
+This is called the **ReAct** pattern (Reason + Act).
+
+```
+┌─────────┐     ┌─────────┐     ┌─────────┐
+│ Reason  │────▶│   Act   │────▶│ Observe │
+│  (LLM)  │◀────│ (Tools) │◀────│(Results)│
+└─────────┘     └─────────┘     └─────────┘
+```
+
+---
+
+## Part 2: Creating Your First Agent
+
+### Step 1: Define Tools
+
+Tools are functions your agent can call. Use the `@tool` decorator:
+
+```python
+from langchain_core.tools import tool
+
+@tool
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The city name (e.g., "Chicago", "Paris")
+    """
+    # In production, call a real weather API
+    return f"Weather in {city}: 72°F, sunny"
+
+@tool
+def search_web(query: str) -> str:
+    """Search the web for information.
+
+    Args:
+        query: The search query
+    """
+    # In production, call a real search API
+    return f"Search results for '{query}': Found 10 relevant articles."
+```
+
+### Step 2: Create the Agent
+
+```python
+from langchain_oci import create_oci_agent
+
+agent = create_oci_agent(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    tools=[get_weather, search_web],
+    compartment_id="ocid1.compartment.oc1..xxx",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    system_prompt="You are a helpful assistant with access to weather and search tools.",
+)
+```
+
+### Step 3: Run the Agent
+
+```python
+from langchain_core.messages import HumanMessage
+
+result = agent.invoke({
+    "messages": [HumanMessage(content="What's the weather in Chicago?")]
+})
+
+# Get the final response
+final_message = result["messages"][-1]
+print(final_message.content)
+```
+
+**Output:**
+```
+The weather in Chicago is 72°F and sunny!
+```
+
+---
+
+## Part 3: Understanding Agent Execution
+
+### What Happens Inside
+
+When you invoke an agent:
+
+1. **User message** → Agent receives "What's the weather in Chicago?"
+2. **LLM reasons** → "I need to call the get_weather tool"
+3. **Tool called** → `get_weather("Chicago")` returns "72°F, sunny"
+4. **LLM responds** → "The weather in Chicago is 72°F and sunny!"
+
+### View All Messages
+
+```python
+result = agent.invoke({
+    "messages": [HumanMessage(content="What's the weather in Chicago?")]
+})
+
+for msg in result["messages"]:
+    print(f"{msg.type}: {msg.content[:100] if msg.content else '(tool call)'}")
+```
+
+---
+
+## Part 4: Environment Variables for Convenience
+
+Instead of passing credentials every time, use environment variables:
+
+```bash
+export OCI_COMPARTMENT_ID="ocid1.compartment.oc1..xxx"
+export OCI_REGION="us-chicago-1"
+# or
+export OCI_SERVICE_ENDPOINT="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+```
+
+Then create agents simply:
+
+```python
+from langchain_oci import create_oci_agent
+
+agent = create_oci_agent(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    tools=[get_weather],
+    # No need for compartment_id or service_endpoint!
+)
+```
+
+---
+
+## Part 5: Adding Memory with Checkpointing
+
+Checkpointing allows your agent to remember previous conversations:
+
+```python
+from langgraph.checkpoint.memory import MemorySaver
+from langchain_oci import create_oci_agent
+
+# Create a checkpointer
+checkpointer = MemorySaver()
+
+agent = create_oci_agent(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    tools=[get_weather],
+    checkpointer=checkpointer,
+)
+
+# First conversation
+result1 = agent.invoke(
+    {"messages": [HumanMessage(content="What's the weather in Chicago?")]},
+    config={"configurable": {"thread_id": "user_123"}},
+)
+
+# Later, continue the same conversation
+result2 = agent.invoke(
+    {"messages": [HumanMessage(content="What about New York?")]},
+    config={"configurable": {"thread_id": "user_123"}},
+)
+# Agent remembers the previous context!
+```
+
+### Persistent Checkpointers
+
+For production, use a database-backed checkpointer:
+
+```python
+from langgraph.checkpoint.sqlite import SqliteSaver
+
+# SQLite (local)
+checkpointer = SqliteSaver.from_conn_string("agent_memory.db")
+
+# PostgreSQL (production)
+# from langgraph.checkpoint.postgres import PostgresSaver
+# checkpointer = PostgresSaver.from_conn_string("postgresql://...")
+```
+
+---
+
+## Part 6: Human-in-the-Loop Workflows
+
+Pause the agent for human approval before taking actions:
+
+```python
+from langchain_oci import create_oci_agent
+from langgraph.checkpoint.memory import MemorySaver
+
+agent = create_oci_agent(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    tools=[get_weather, dangerous_action],
+    checkpointer=MemorySaver(),
+    interrupt_before=["tools"],  # Pause before executing tools
+)
+
+# Start the conversation
+result = agent.invoke(
+    {"messages": [HumanMessage(content="Delete all files")]},
+    config={"configurable": {"thread_id": "review_123"}},
+)
+
+# Agent is paused before calling tools
+# Review the pending tool call
+pending_tool = result["messages"][-1].tool_calls[0]
+print(f"Agent wants to call: {pending_tool['name']}")
+print(f"With args: {pending_tool['args']}")
+
+# If approved, continue execution
+if user_approves():
+    result = agent.invoke(
+        None,  # Continue from where we left off
+        config={"configurable": {"thread_id": "review_123"}},
+    )
+```
+
+---
+
+## Part 7: Multi-Tool Orchestration
+
+Agents can use multiple tools in sequence:
+
+```python
+@tool
+def get_stock_price(ticker: str) -> str:
+    """Get the current stock price for a ticker symbol."""
+    return f"{ticker}: $150.00"
+
+@tool
+def get_company_news(company: str) -> str:
+    """Get recent news about a company."""
+    return f"Latest news for {company}: Q4 earnings exceeded expectations."
+
+@tool
+def calculate(expression: str) -> str:
+    """Calculate a mathematical expression."""
+    return str(eval(expression))  # Use a safe evaluator in production
+
+agent = create_oci_agent(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    tools=[get_stock_price, get_company_news, calculate],
+    max_sequential_tool_calls=8,  # Allow up to 8 tool calls per turn
+)
+
+result = agent.invoke({
+    "messages": [HumanMessage(
+        content="What's Apple's stock price and what's the latest news? "
+        "Also, if I buy 10 shares, how much would that cost?"
+    )]
+})
+```
+
+The agent will:
+1. Call `get_stock_price("AAPL")`
+2. Call `get_company_news("Apple")`
+3. Call `calculate("150.00 * 10")`
+4. Synthesize a final answer
+
+---
+
+## Part 8: Preventing Infinite Loops
+
+Sometimes agents get stuck calling the same tool repeatedly. Use these safeguards:
+
+```python
+agent = create_oci_agent(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    tools=[my_tools],
+    max_sequential_tool_calls=8,     # Stop after 8 tool calls
+    tool_result_guidance=True,       # Guide model to use tool results
+)
+```
+
+### How It Works
+
+- **`max_sequential_tool_calls`**: Limits the number of tool calls per conversation turn
+- **`tool_result_guidance`**: Injects a system message telling the model to respond naturally after receiving tool results
+- **Infinite loop detection**: Automatically detects when the same tool is called with the same arguments repeatedly
+
+---
+
+## Summary
+
+In this tutorial, you learned:
+
+1. **ReAct pattern** - How agents reason and act
+2. **`create_oci_agent()`** - Factory function for creating agents
+3. **Tools** - Functions agents can call with `@tool`
+4. **Checkpointing** - Memory persistence across conversations
+5. **Human-in-the-loop** - Pausing for approval
+6. **Multi-tool orchestration** - Complex workflows with multiple tools
+7. **Loop prevention** - Safeguards against infinite loops
+
+## Next Steps
+
+- **[Tutorial 04: Tool Calling Mastery](../04-tool-calling-mastery/)** - Deep dive into tool calling
+- **[Tutorial 05: Structured Output](../05-structured-output/)** - Get structured responses
+
+## API Reference
+
+| Function/Parameter | Description |
+|--------------------|-------------|
+| `create_oci_agent()` | Create a ReAct agent |
+| `model_id` | OCI model identifier |
+| `tools` | List of tools the agent can use |
+| `checkpointer` | LangGraph checkpointer for persistence |
+| `interrupt_before` | Nodes to pause before |
+| `max_sequential_tool_calls` | Maximum tool calls per turn |
+| `tool_result_guidance` | Guide model to use tool results |
+
+## Troubleshooting
+
+### "Agent keeps calling the same tool"
+- Increase `max_sequential_tool_calls` if legitimate
+- Enable `tool_result_guidance=True`
+- Check that tool return values are informative
+
+### "Tool not found"
+- Ensure tool has a docstring (required for description)
+- Check tool is in the `tools` list
+
+### "Agent doesn't remember previous messages"
+- Ensure you're using the same `thread_id`
+- Verify checkpointer is configured correctly
diff --git a/libs/oci/tutorials/03-building-ai-agents/code/agent_with_memory.py b/libs/oci/tutorials/03-building-ai-agents/code/agent_with_memory.py
new file mode 100644
index 00000000..79be3f63
--- /dev/null
+++ b/libs/oci/tutorials/03-building-ai-agents/code/agent_with_memory.py
@@ -0,0 +1,93 @@
+# Tutorial 03: Agent with Memory (Checkpointing)
+# Demonstrates how to persist conversation state across invocations
+
+from langchain_core.messages import HumanMessage
+from langchain_core.tools import tool
+from langgraph.checkpoint.memory import MemorySaver
+
+from langchain_oci import create_oci_agent
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+@tool
+def remember_preference(preference: str) -> str:
+    """Remember a user's preference.
+
+    Args:
+        preference: The preference to remember
+    """
+    return f"I'll remember that you prefer: {preference}"
+
+
+@tool
+def get_recommendation(category: str) -> str:
+    """Get a recommendation based on user's preferences.
+
+    Args:
+        category: The category to get a recommendation for (food, music, movies)
+    """
+    recommendations = {
+        "food": "Based on your preferences, I recommend trying the new Italian restaurant downtown.",
+        "music": "You might enjoy the latest album by The Weeknd based on your taste.",
+        "movies": "I recommend watching 'Oppenheimer' - it matches your interest in drama.",
+    }
+    return recommendations.get(category, f"No recommendations available for {category}")
+
+
+def main():
+    # Create checkpointer for memory persistence
+    checkpointer = MemorySaver()
+
+    # Create agent with checkpointing
+    agent = create_oci_agent(
+        model_id="meta.llama-4-scout-17b-16e-instruct",
+        tools=[remember_preference, get_recommendation],
+        compartment_id=COMPARTMENT_ID,
+        service_endpoint=SERVICE_ENDPOINT,
+        checkpointer=checkpointer,  # Enable memory
+        system_prompt="You are a personal assistant that remembers user preferences "
+        "and provides personalized recommendations.",
+    )
+
+    # Conversation thread ID - same ID = same conversation
+    thread_id = "user_alice_123"
+    config = {"configurable": {"thread_id": thread_id}}
+
+    # First message - set a preference
+    print("Turn 1: Setting preference")
+    result1 = agent.invoke(
+        {"messages": [HumanMessage(content="I love Italian food and classic rock music.")]},
+        config=config,
+    )
+    print(f"Agent: {result1['messages'][-1].content}\n")
+
+    # Second message - ask for a recommendation
+    # The agent remembers the previous context!
+    print("Turn 2: Asking for food recommendation")
+    result2 = agent.invoke(
+        {"messages": [HumanMessage(content="Can you recommend a restaurant?")]},
+        config=config,
+    )
+    print(f"Agent: {result2['messages'][-1].content}\n")
+
+    # Third message - continue the conversation
+    print("Turn 3: Asking for music recommendation")
+    result3 = agent.invoke(
+        {"messages": [HumanMessage(content="What about music?")]},
+        config=config,
+    )
+    print(f"Agent: {result3['messages'][-1].content}\n")
+
+    # Different thread = different conversation
+    print("New Thread: Different user")
+    result_new = agent.invoke(
+        {"messages": [HumanMessage(content="What do I like?")]},
+        config={"configurable": {"thread_id": "user_bob_456"}},
+    )
+    print(f"Agent (no memory of Alice): {result_new['messages'][-1].content}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libs/oci/tutorials/03-building-ai-agents/code/basic_agent.py b/libs/oci/tutorials/03-building-ai-agents/code/basic_agent.py
new file mode 100644
index 00000000..0a134678
--- /dev/null
+++ b/libs/oci/tutorials/03-building-ai-agents/code/basic_agent.py
@@ -0,0 +1,87 @@
+# Tutorial 03: Basic Agent Example
+# Demonstrates creating a simple ReAct agent with tools
+
+from langchain_core.messages import HumanMessage
+from langchain_core.tools import tool
+
+from langchain_oci import create_oci_agent
+
+# Configuration - can also use environment variables:
+# export OCI_COMPARTMENT_ID="ocid1.compartment.oc1..xxx"
+# export OCI_REGION="us-chicago-1"
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+# Define tools using the @tool decorator
+@tool
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The city name (e.g., "Chicago", "Paris", "Tokyo")
+    """
+    # In production, call a real weather API
+    weather_data = {
+        "Chicago": "72°F, sunny",
+        "New York": "68°F, cloudy",
+        "Los Angeles": "85°F, clear",
+        "London": "55°F, rainy",
+        "Tokyo": "70°F, partly cloudy",
+    }
+    return weather_data.get(city, f"Weather data not available for {city}")
+
+
+@tool
+def get_time(city: str) -> str:
+    """Get the current time in a city.
+
+    Args:
+        city: The city name
+    """
+    # In production, use a timezone library
+    times = {
+        "Chicago": "2:00 PM CST",
+        "New York": "3:00 PM EST",
+        "Los Angeles": "12:00 PM PST",
+        "London": "8:00 PM GMT",
+        "Tokyo": "5:00 AM JST (next day)",
+    }
+    return times.get(city, f"Time data not available for {city}")
+
+
+def main():
+    # Create the agent
+    agent = create_oci_agent(
+        model_id="meta.llama-4-scout-17b-16e-instruct",
+        tools=[get_weather, get_time],
+        compartment_id=COMPARTMENT_ID,
+        service_endpoint=SERVICE_ENDPOINT,
+        system_prompt="You are a helpful travel assistant. "
+        "Use the available tools to answer questions about weather and time.",
+    )
+
+    # Run the agent
+    result = agent.invoke({
+        "messages": [HumanMessage(content="What's the weather and time in Tokyo?")]
+    })
+
+    # Print all messages to see the agent's reasoning
+    print("Agent Execution Trace:")
+    print("-" * 50)
+    for msg in result["messages"]:
+        msg_type = msg.type.upper()
+        if hasattr(msg, "tool_calls") and msg.tool_calls:
+            print(f"{msg_type}: [Tool calls: {[tc['name'] for tc in msg.tool_calls]}]")
+        elif hasattr(msg, "tool_call_id"):
+            print(f"{msg_type} (tool result): {msg.content[:100]}")
+        else:
+            print(f"{msg_type}: {msg.content[:200] if msg.content else '(empty)'}")
+    print("-" * 50)
+
+    # Final answer
+    print(f"\nFinal Answer: {result['messages'][-1].content}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libs/oci/tutorials/03-building-ai-agents/code/human_in_loop.py b/libs/oci/tutorials/03-building-ai-agents/code/human_in_loop.py
new file mode 100644
index 00000000..1756a016
--- /dev/null
+++ b/libs/oci/tutorials/03-building-ai-agents/code/human_in_loop.py
@@ -0,0 +1,94 @@
+# Tutorial 03: Human-in-the-Loop Agent
+# Demonstrates pausing agent execution for human approval
+
+from langchain_core.messages import HumanMessage
+from langchain_core.tools import tool
+from langgraph.checkpoint.memory import MemorySaver
+
+from langchain_oci import create_oci_agent
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+@tool
+def send_email(to: str, subject: str, body: str) -> str:
+    """Send an email to a recipient.
+
+    Args:
+        to: Email recipient address
+        subject: Email subject line
+        body: Email body content
+    """
+    # In production, actually send the email
+    return f"Email sent successfully to {to}"
+
+
+@tool
+def delete_file(filename: str) -> str:
+    """Delete a file from the system.
+
+    Args:
+        filename: Name of the file to delete
+    """
+    # In production, actually delete the file
+    return f"File '{filename}' has been deleted"
+
+
+def get_user_approval(tool_name: str, args: dict) -> bool:
+    """Simulate user approval (in production, use a real UI)."""
+    print(f"\n⚠️  Agent wants to execute: {tool_name}")
+    print(f"   Arguments: {args}")
+    response = input("   Approve? (y/n): ").lower().strip()
+    return response == "y"
+
+
+def main():
+    # Create checkpointer (required for human-in-the-loop)
+    checkpointer = MemorySaver()
+
+    # Create agent with interrupt_before to pause before tool execution
+    agent = create_oci_agent(
+        model_id="meta.llama-4-scout-17b-16e-instruct",
+        tools=[send_email, delete_file],
+        compartment_id=COMPARTMENT_ID,
+        service_endpoint=SERVICE_ENDPOINT,
+        checkpointer=checkpointer,
+        interrupt_before=["tools"],  # Pause before executing any tool
+        system_prompt="You are an assistant that can send emails and manage files. "
+        "Always confirm actions with the user before proceeding.",
+    )
+
+    thread_id = "approval_thread_001"
+    config = {"configurable": {"thread_id": thread_id}}
+
+    # User requests an action
+    print("User: Send an email to john@example.com about the meeting tomorrow")
+    result = agent.invoke(
+        {"messages": [HumanMessage(
+            content="Send an email to john@example.com saying 'Meeting tomorrow at 10am'"
+        )]},
+        config=config,
+    )
+
+    # Check if agent is waiting for tool execution
+    last_message = result["messages"][-1]
+    if hasattr(last_message, "tool_calls") and last_message.tool_calls:
+        for tool_call in last_message.tool_calls:
+            approved = get_user_approval(tool_call["name"], tool_call["args"])
+
+            if approved:
+                # Continue execution
+                print("\n✅ Approved! Continuing execution...")
+                result = agent.invoke(None, config=config)
+                print(f"\nAgent: {result['messages'][-1].content}")
+            else:
+                # Reject the action
+                print("\n❌ Rejected! Action will not be executed.")
+                # In production, you might want to tell the agent it was rejected
+    else:
+        print(f"\nAgent: {last_message.content}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libs/oci/tutorials/04-tool-calling-mastery/README.md b/libs/oci/tutorials/04-tool-calling-mastery/README.md
new file mode 100644
index 00000000..76d0ae30
--- /dev/null
+++ b/libs/oci/tutorials/04-tool-calling-mastery/README.md
@@ -0,0 +1,430 @@
+# Tutorial 04: Tool Calling Mastery
+
+Master the art of tool calling - from basic definitions to parallel execution and complex workflows.
+
+## What You'll Build
+
+By the end of this tutorial, you'll be able to:
+- Define tools with the `@tool` decorator
+- Use `bind_tools()` for chat models
+- Handle tool call responses
+- Enable parallel tool execution
+- Configure tool calling behavior
+- Implement complex multi-step workflows
+
+## Prerequisites
+
+- Completed [Tutorial 01: Getting Started](../01-getting-started/)
+- Completed [Tutorial 03: Building AI Agents](../03-building-ai-agents/) (recommended)
+
+## Concepts Covered
+
+| Concept | Description |
+|---------|-------------|
+| `@tool` decorator | Define tools from functions |
+| `bind_tools()` | Attach tools to chat models |
+| `tool_calls` | Tool call requests from AI |
+| `parallel_tool_calls` | Execute multiple tools at once |
+| `max_sequential_tool_calls` | Limit consecutive tool calls |
+| `tool_result_guidance` | Help model use tool results |
+
+---
+
+## Part 1: Defining Tools
+
+### Using the `@tool` Decorator
+
+The simplest way to create a tool:
+
+```python
+from langchain_core.tools import tool
+
+@tool
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The city name (e.g., "Chicago", "Paris")
+
+    Returns:
+        Weather description
+    """
+    return f"Weather in {city}: 72°F, sunny"
+```
+
+**Important:** The docstring becomes the tool's description - make it clear!
+
+### With Type Hints (Pydantic)
+
+For more complex parameters:
+
+```python
+from pydantic import BaseModel, Field
+from langchain_core.tools import tool
+
+class SearchParams(BaseModel):
+    """Parameters for web search."""
+    query: str = Field(description="The search query")
+    max_results: int = Field(default=10, description="Maximum results to return")
+    language: str = Field(default="en", description="Result language code")
+
+@tool(args_schema=SearchParams)
+def search_web(query: str, max_results: int = 10, language: str = "en") -> str:
+    """Search the web for information."""
+    return f"Found {max_results} results for '{query}' in {language}"
+```
+
+---
+
+## Part 2: Binding Tools to Chat Models
+
+### The `bind_tools()` Method
+
+Attach tools to a chat model:
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Bind tools to the model
+llm_with_tools = llm.bind_tools([get_weather, search_web])
+```
+
+### Invoking with Tools
+
+```python
+from langchain_core.messages import HumanMessage
+
+response = llm_with_tools.invoke([
+    HumanMessage(content="What's the weather in Tokyo?")
+])
+
+# Check if the model wants to call a tool
+if response.tool_calls:
+    print(f"Tool to call: {response.tool_calls[0]['name']}")
+    print(f"Arguments: {response.tool_calls[0]['args']}")
+else:
+    print(f"Direct response: {response.content}")
+```
+
+---
+
+## Part 3: Handling Tool Responses
+
+### The Tool Calling Flow
+
+```
+1. User message
+2. Model returns tool_calls (what to call)
+3. You execute the tool
+4. Send ToolMessage with result
+5. Model uses result to respond
+```
+
+### Complete Example
+
+```python
+from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
+from langchain_oci import ChatOCIGenAI
+
+@tool
+def calculate(expression: str) -> str:
+    """Calculate a math expression."""
+    return str(eval(expression))  # Use safe eval in production!
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+llm_with_tools = llm.bind_tools([calculate])
+
+# Step 1: User asks a question
+messages = [HumanMessage(content="What is 25 * 47?")]
+response = llm_with_tools.invoke(messages)
+
+# Step 2: Model wants to call a tool
+if response.tool_calls:
+    tool_call = response.tool_calls[0]
+    print(f"Calling: {tool_call['name']}({tool_call['args']})")
+
+    # Step 3: Execute the tool
+    result = calculate.invoke(tool_call['args'])
+
+    # Step 4: Add AI message and tool result to history
+    messages.append(response)  # AI message with tool_calls
+    messages.append(ToolMessage(
+        content=result,
+        tool_call_id=tool_call['id']
+    ))
+
+    # Step 5: Get final response
+    final_response = llm_with_tools.invoke(messages)
+    print(f"Answer: {final_response.content}")
+```
+
+**Output:**
+```
+Calling: calculate({'expression': '25 * 47'})
+Answer: 25 multiplied by 47 equals 1175.
+```
+
+---
+
+## Part 4: Parallel Tool Execution
+
+Some models (like Llama 4) can call multiple tools at once:
+
+### Enabling Parallel Tool Calls
+
+```python
+llm = ChatOCIGenAI(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+llm_with_tools = llm.bind_tools(
+    [get_weather, get_time, search_web],
+    parallel_tool_calls=True,  # Enable parallel calls
+)
+```
+
+### Handling Multiple Tool Calls
+
+```python
+response = llm_with_tools.invoke([
+    HumanMessage(content="What's the weather and time in Chicago and New York?")
+])
+
+# Multiple tool calls may be returned
+for tool_call in response.tool_calls:
+    print(f"Tool: {tool_call['name']}, Args: {tool_call['args']}")
+```
+
+**Output:**
+```
+Tool: get_weather, Args: {'city': 'Chicago'}
+Tool: get_weather, Args: {'city': 'New York'}
+Tool: get_time, Args: {'city': 'Chicago'}
+Tool: get_time, Args: {'city': 'New York'}
+```
+
+### Execute All Tools in Parallel
+
+```python
+import asyncio
+
+async def execute_tools_parallel(response, tools_dict):
+    """Execute multiple tool calls in parallel."""
+    results = []
+
+    for tool_call in response.tool_calls:
+        tool = tools_dict[tool_call['name']]
+        result = tool.invoke(tool_call['args'])
+        results.append(ToolMessage(
+            content=result,
+            tool_call_id=tool_call['id']
+        ))
+
+    return results
+```
+
+---
+
+## Part 5: Controlling Tool Calling Behavior
+
+### `max_sequential_tool_calls`
+
+Limit how many tools can be called in a single conversation turn:
+
+```python
+llm = ChatOCIGenAI(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+    max_sequential_tool_calls=5,  # Stop after 5 tool calls
+)
+```
+
+### `tool_result_guidance`
+
+Help models use tool results naturally (especially useful for Meta Llama):
+
+```python
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+    tool_result_guidance=True,  # Inject guidance after tool results
+)
+```
+
+This injects a system message telling the model:
+> "Respond with a helpful, natural language answer that incorporates the tool results."
+
+### `tool_choice`
+
+Force or prevent tool usage:
+
+```python
+# Force a specific tool
+llm_with_tools = llm.bind_tools(tools, tool_choice="get_weather")
+
+# Force any tool to be called
+llm_with_tools = llm.bind_tools(tools, tool_choice="required")
+
+# Prevent tool calls
+llm_with_tools = llm.bind_tools(tools, tool_choice="none")
+
+# Let model decide (default)
+llm_with_tools = llm.bind_tools(tools, tool_choice="auto")
+```
+
+---
+
+## Part 6: Infinite Loop Detection
+
+The system automatically detects when the same tool is called repeatedly:
+
+### How It Works
+
+```python
+# If the model calls get_weather("Chicago") twice in a row
+# with the exact same arguments, the system will:
+# 1. Detect the infinite loop
+# 2. Force the model to stop calling tools
+# 3. Require a natural language response
+```
+
+### Customize Loop Prevention
+
+```python
+llm = ChatOCIGenAI(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+    max_sequential_tool_calls=8,   # Higher limit for complex workflows
+    tool_result_guidance=True,     # Help model use results
+)
+```
+
+---
+
+## Part 7: Complex Multi-Step Workflows
+
+### Research Assistant Example
+
+```python
+from langchain_core.tools import tool
+
+@tool
+def search_papers(topic: str) -> str:
+    """Search for academic papers on a topic."""
+    return f"Found 5 papers on '{topic}': [Paper1, Paper2, ...]"
+
+@tool
+def get_paper_summary(paper_id: str) -> str:
+    """Get the summary of a specific paper."""
+    return f"Summary of {paper_id}: This paper discusses..."
+
+@tool
+def save_notes(content: str) -> str:
+    """Save research notes."""
+    return f"Notes saved: {content[:50]}..."
+
+# Create workflow
+llm = ChatOCIGenAI(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+    max_sequential_tool_calls=10,
+)
+
+llm_with_tools = llm.bind_tools([search_papers, get_paper_summary, save_notes])
+
+# The model will:
+# 1. Search for papers
+# 2. Get summaries of interesting ones
+# 3. Save notes
+# All in sequence!
+```
+
+---
+
+## Part 8: Best Practices
+
+### Tool Design
+
+1. **Clear docstrings** - The description is crucial for the model
+2. **Typed arguments** - Use type hints for clarity
+3. **Reasonable defaults** - Make tools easy to use
+4. **Informative returns** - Return enough context for the model
+
+### Performance
+
+1. **Batch similar operations** - Group related tool calls
+2. **Use parallel calls** - When tools are independent
+3. **Set limits** - Use `max_sequential_tool_calls` to prevent runaway
+
+### Debugging
+
+```python
+# Print tool calls for debugging
+def debug_tool_calls(response):
+    if response.tool_calls:
+        for tc in response.tool_calls:
+            print(f"[DEBUG] Tool: {tc['name']}")
+            print(f"[DEBUG] Args: {tc['args']}")
+            print(f"[DEBUG] ID: {tc['id']}")
+```
+
+---
+
+## Summary
+
+In this tutorial, you learned:
+
+1. **Tool definition** - `@tool` decorator with docstrings
+2. **`bind_tools()`** - Attach tools to chat models
+3. **Tool call flow** - Request → Execute → ToolMessage → Response
+4. **Parallel execution** - Multiple tools at once
+5. **Behavior control** - `max_sequential_tool_calls`, `tool_result_guidance`
+6. **Loop prevention** - Automatic infinite loop detection
+7. **Complex workflows** - Multi-step tool orchestration
+
+## Next Steps
+
+- **[Tutorial 05: Structured Output](../05-structured-output/)** - Get typed responses
+- **[Tutorial 07: Async for Production](../07-async-for-production/)** - Async tool execution
+
+## API Reference
+
+| Method/Parameter | Description |
+|------------------|-------------|
+| `@tool` | Decorator to create tools |
+| `bind_tools(tools)` | Attach tools to model |
+| `parallel_tool_calls` | Enable parallel tool calls |
+| `tool_choice` | Control tool selection |
+| `max_sequential_tool_calls` | Limit tool calls per turn |
+| `tool_result_guidance` | Guide model to use results |
+
+## Troubleshooting
+
+### "Tool not being called"
+- Check the tool's docstring is clear and descriptive
+- Verify the user's request clearly relates to the tool
+
+### "Wrong arguments passed"
+- Add `Field(description=...)` to parameters
+- Use Pydantic models for complex arguments
+
+### "Model ignores tool results"
+- Enable `tool_result_guidance=True`
+- Check tool returns informative results
diff --git a/libs/oci/tutorials/04-tool-calling-mastery/code/basic_tools.py b/libs/oci/tutorials/04-tool-calling-mastery/code/basic_tools.py
new file mode 100644
index 00000000..fd6c5bee
--- /dev/null
+++ b/libs/oci/tutorials/04-tool-calling-mastery/code/basic_tools.py
@@ -0,0 +1,102 @@
+# Tutorial 04: Basic Tool Calling Example
+# Demonstrates defining tools and using bind_tools()
+
+from langchain_core.messages import HumanMessage, ToolMessage
+from langchain_core.tools import tool
+from pydantic import BaseModel, Field
+
+from langchain_oci import ChatOCIGenAI
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+# Simple tool with @tool decorator
+@tool
+def get_weather(city: str) -> str:
+    """Get the current weather for a city.
+
+    Args:
+        city: The city name (e.g., "Chicago", "Tokyo")
+    """
+    weather_data = {
+        "Chicago": "72°F, sunny",
+        "Tokyo": "68°F, cloudy",
+        "London": "55°F, rainy",
+    }
+    return weather_data.get(city, f"Weather data not available for {city}")
+
+
+# Tool with Pydantic schema for complex parameters
+class CalculatorInput(BaseModel):
+    """Input for the calculator tool."""
+    expression: str = Field(description="Mathematical expression to evaluate (e.g., '2 + 2')")
+
+
+@tool(args_schema=CalculatorInput)
+def calculate(expression: str) -> str:
+    """Calculate a mathematical expression."""
+    try:
+        # WARNING: Use a safe evaluator in production!
+        result = eval(expression)
+        return f"Result: {result}"
+    except Exception as e:
+        return f"Error: {e}"
+
+
+def main():
+    # Create chat model
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-scout-17b-16e-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    # Bind tools to the model
+    llm_with_tools = llm.bind_tools([get_weather, calculate])
+
+    # Test 1: Weather query
+    print("Test 1: Weather Query")
+    print("-" * 40)
+    messages = [HumanMessage(content="What's the weather in Tokyo?")]
+    response = llm_with_tools.invoke(messages)
+
+    if response.tool_calls:
+        tool_call = response.tool_calls[0]
+        print(f"Tool requested: {tool_call['name']}")
+        print(f"Arguments: {tool_call['args']}")
+
+        # Execute the tool
+        result = get_weather.invoke(tool_call['args'])
+        print(f"Tool result: {result}")
+
+        # Send result back to model
+        messages.append(response)
+        messages.append(ToolMessage(content=result, tool_call_id=tool_call['id']))
+
+        final_response = llm_with_tools.invoke(messages)
+        print(f"Final answer: {final_response.content}")
+
+    # Test 2: Calculator query
+    print("\nTest 2: Calculator Query")
+    print("-" * 40)
+    messages = [HumanMessage(content="What is 123 * 456?")]
+    response = llm_with_tools.invoke(messages)
+
+    if response.tool_calls:
+        tool_call = response.tool_calls[0]
+        print(f"Tool requested: {tool_call['name']}")
+        print(f"Arguments: {tool_call['args']}")
+
+        result = calculate.invoke(tool_call['args'])
+        print(f"Tool result: {result}")
+
+        messages.append(response)
+        messages.append(ToolMessage(content=result, tool_call_id=tool_call['id']))
+
+        final_response = llm_with_tools.invoke(messages)
+        print(f"Final answer: {final_response.content}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libs/oci/tutorials/04-tool-calling-mastery/code/parallel_tools.py b/libs/oci/tutorials/04-tool-calling-mastery/code/parallel_tools.py
new file mode 100644
index 00000000..0ceab9ce
--- /dev/null
+++ b/libs/oci/tutorials/04-tool-calling-mastery/code/parallel_tools.py
@@ -0,0 +1,105 @@
+# Tutorial 04: Parallel Tool Calling Example
+# Demonstrates calling multiple tools in parallel
+
+from langchain_core.messages import HumanMessage, ToolMessage
+from langchain_core.tools import tool
+
+from langchain_oci import ChatOCIGenAI
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+@tool
+def get_weather(city: str) -> str:
+    """Get the current weather for a city."""
+    weather = {
+        "Chicago": "72°F, sunny",
+        "New York": "68°F, partly cloudy",
+        "Los Angeles": "85°F, clear",
+    }
+    return weather.get(city, f"No data for {city}")
+
+
+@tool
+def get_time(city: str) -> str:
+    """Get the current time in a city."""
+    times = {
+        "Chicago": "2:00 PM CST",
+        "New York": "3:00 PM EST",
+        "Los Angeles": "12:00 PM PST",
+    }
+    return times.get(city, f"No time data for {city}")
+
+
+@tool
+def get_population(city: str) -> str:
+    """Get the population of a city."""
+    populations = {
+        "Chicago": "2.7 million",
+        "New York": "8.3 million",
+        "Los Angeles": "3.9 million",
+    }
+    return populations.get(city, f"No population data for {city}")
+
+
+def execute_tools(tool_calls: list, tools_dict: dict) -> list:
+    """Execute multiple tool calls and return ToolMessages."""
+    results = []
+    for tc in tool_calls:
+        tool_func = tools_dict[tc['name']]
+        result = tool_func.invoke(tc['args'])
+        results.append(ToolMessage(content=result, tool_call_id=tc['id']))
+    return results
+
+
+def main():
+    # Create chat model
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-scout-17b-16e-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    # Tools dictionary for lookup
+    tools = [get_weather, get_time, get_population]
+    tools_dict = {t.name: t for t in tools}
+
+    # Bind tools with parallel calls enabled
+    llm_with_tools = llm.bind_tools(
+        tools,
+        parallel_tool_calls=True,  # Enable parallel execution
+    )
+
+    # Query that requires multiple tools
+    print("Query: Tell me about the weather, time, and population of Chicago and New York")
+    print("-" * 60)
+
+    messages = [HumanMessage(
+        content="Tell me the weather, current time, and population of Chicago and New York."
+    )]
+
+    response = llm_with_tools.invoke(messages)
+
+    if response.tool_calls:
+        print(f"\nModel requested {len(response.tool_calls)} tool calls:")
+        for tc in response.tool_calls:
+            print(f"  - {tc['name']}({tc['args']})")
+
+        # Execute all tools
+        print("\nExecuting tools...")
+        tool_results = execute_tools(response.tool_calls, tools_dict)
+
+        for tc, result in zip(response.tool_calls, tool_results):
+            print(f"  - {tc['name']}: {result.content}")
+
+        # Send results back to model
+        messages.append(response)
+        messages.extend(tool_results)
+
+        final_response = llm_with_tools.invoke(messages)
+        print(f"\nFinal Answer:\n{final_response.content}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libs/oci/tutorials/04-tool-calling-mastery/code/tool_workflows.py b/libs/oci/tutorials/04-tool-calling-mastery/code/tool_workflows.py
new file mode 100644
index 00000000..fd4fe5c8
--- /dev/null
+++ b/libs/oci/tutorials/04-tool-calling-mastery/code/tool_workflows.py
@@ -0,0 +1,103 @@
+# Tutorial 04: Multi-Step Tool Workflow Example
+# Demonstrates complex workflows with multiple sequential tool calls
+
+from langchain_core.messages import HumanMessage, ToolMessage
+from langchain_core.tools import tool
+
+from langchain_oci import ChatOCIGenAI
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+# Research workflow tools
+@tool
+def search_articles(topic: str) -> str:
+    """Search for articles on a topic. Returns article IDs."""
+    # Simulated search results
+    return f"Found articles on '{topic}': [article_001, article_002, article_003]"
+
+
+@tool
+def get_article_content(article_id: str) -> str:
+    """Get the content of a specific article."""
+    articles = {
+        "article_001": "AI is transforming healthcare with new diagnostic tools...",
+        "article_002": "Machine learning models now predict patient outcomes...",
+        "article_003": "Hospitals adopting AI see 30% improvement in efficiency...",
+    }
+    return articles.get(article_id, f"Article {article_id} not found")
+
+
+@tool
+def summarize_text(text: str) -> str:
+    """Summarize a piece of text."""
+    # Simulated summarization
+    return f"Summary: {text[:100]}... (key points extracted)"
+
+
+@tool
+def save_research_note(note: str) -> str:
+    """Save a research note to the database."""
+    return f"Note saved successfully: '{note[:50]}...'"
+
+
+def run_workflow(llm_with_tools, messages: list, tools_dict: dict, max_iterations: int = 10):
+    """Run a multi-step tool workflow until completion."""
+
+    for iteration in range(max_iterations):
+        print(f"\n--- Iteration {iteration + 1} ---")
+
+        response = llm_with_tools.invoke(messages)
+
+        if not response.tool_calls:
+            # No more tool calls - we have the final answer
+            print("Final answer reached!")
+            return response.content
+
+        print(f"Tool calls: {[tc['name'] for tc in response.tool_calls]}")
+
+        # Execute tools
+        messages.append(response)
+        for tc in response.tool_calls:
+            tool_func = tools_dict[tc['name']]
+            result = tool_func.invoke(tc['args'])
+            print(f"  {tc['name']} -> {result[:60]}...")
+            messages.append(ToolMessage(content=result, tool_call_id=tc['id']))
+
+    return "Max iterations reached"
+
+
+def main():
+    # Create chat model with workflow-appropriate settings
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-scout-17b-16e-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+        max_sequential_tool_calls=10,  # Allow multi-step workflows
+        tool_result_guidance=True,     # Help model use results
+    )
+
+    tools = [search_articles, get_article_content, summarize_text, save_research_note]
+    tools_dict = {t.name: t for t in tools}
+
+    llm_with_tools = llm.bind_tools(tools)
+
+    # Complex research request
+    print("Request: Research AI in healthcare, summarize findings, and save notes")
+    print("=" * 60)
+
+    messages = [HumanMessage(
+        content="Research AI in healthcare. Get the content of the first article you find, "
+                "summarize it, and save a research note with the key findings."
+    )]
+
+    final_answer = run_workflow(llm_with_tools, messages, tools_dict)
+
+    print("\n" + "=" * 60)
+    print("FINAL ANSWER:")
+    print(final_answer)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libs/oci/tutorials/05-structured-output/README.md b/libs/oci/tutorials/05-structured-output/README.md
new file mode 100644
index 00000000..f35f1c88
--- /dev/null
+++ b/libs/oci/tutorials/05-structured-output/README.md
@@ -0,0 +1,410 @@
+# Tutorial 05: Structured Output
+
+Get predictable, typed responses from language models using schemas.
+
+## What You'll Build
+
+By the end of this tutorial, you'll be able to:
+- Use `with_structured_output()` for typed responses
+- Define schemas with Pydantic models
+- Use JSON mode for flexible output
+- Handle validation errors gracefully
+- Build real-world data extraction pipelines
+
+## Prerequisites
+
+- Completed [Tutorial 01: Getting Started](../01-getting-started/)
+- Completed [Tutorial 04: Tool Calling Mastery](../04-tool-calling-mastery/) (recommended)
+
+## Concepts Covered
+
+| Concept | Description |
+|---------|-------------|
+| `with_structured_output()` | Get typed responses |
+| Pydantic schemas | Define output structure |
+| `json_mode` | Flexible JSON output |
+| `json_schema` | JSON Schema-based output |
+| `include_raw` | Access raw response |
+
+---
+
+## Part 1: Why Structured Output?
+
+Without structured output:
+```python
+response = llm.invoke("Extract the name and email from: John Doe john@example.com")
+# Output: "The name is John Doe and the email is john@example.com"
+# Hard to parse programmatically!
+```
+
+With structured output:
+```python
+response = structured_llm.invoke("Extract: John Doe john@example.com")
+# Output: Contact(name="John Doe", email="john@example.com")
+# Directly usable in code!
+```
+
+---
+
+## Part 2: Using Pydantic Schemas
+
+### Define Your Schema
+
+```python
+from pydantic import BaseModel, Field
+from typing import List, Optional
+
+class Contact(BaseModel):
+    """A contact with name and email."""
+    name: str = Field(description="The person's full name")
+    email: str = Field(description="The email address")
+    phone: Optional[str] = Field(default=None, description="Phone number if available")
+```
+
+### Create Structured Model
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Create structured version
+structured_llm = llm.with_structured_output(Contact)
+```
+
+### Get Typed Responses
+
+```python
+result = structured_llm.invoke(
+    "Extract contact info: John Doe, john.doe@example.com, 555-123-4567"
+)
+
+print(type(result))    # <class 'Contact'>
+print(result.name)     # "John Doe"
+print(result.email)    # "john.doe@example.com"
+print(result.phone)    # "555-123-4567"
+```
+
+---
+
+## Part 3: Complex Schemas
+
+### Nested Structures
+
+```python
+from pydantic import BaseModel, Field
+from typing import List
+
+class Address(BaseModel):
+    """A physical address."""
+    street: str
+    city: str
+    state: str
+    zip_code: str
+
+class Company(BaseModel):
+    """A company with employees."""
+    name: str
+    industry: str
+    headquarters: Address
+    employee_count: int
+
+# Works with nested structures
+structured_llm = llm.with_structured_output(Company)
+
+result = structured_llm.invoke("""
+    Extract company info: Acme Corp is a technology company based at
+    123 Tech Blvd, San Francisco, CA 94102. They have about 500 employees.
+""")
+
+print(result.name)                    # "Acme Corp"
+print(result.headquarters.city)       # "San Francisco"
+print(result.employee_count)          # 500
+```
+
+### Lists and Enums
+
+```python
+from enum import Enum
+from pydantic import BaseModel, Field
+from typing import List
+
+class Sentiment(str, Enum):
+    POSITIVE = "positive"
+    NEGATIVE = "negative"
+    NEUTRAL = "neutral"
+
+class Review(BaseModel):
+    """A product review analysis."""
+    summary: str = Field(description="Brief summary of the review")
+    sentiment: Sentiment = Field(description="Overall sentiment")
+    keywords: List[str] = Field(description="Key topics mentioned")
+    rating: int = Field(ge=1, le=5, description="Rating 1-5")
+
+structured_llm = llm.with_structured_output(Review)
+
+result = structured_llm.invoke("""
+    Analyze this review: "Great product! The battery life is amazing
+    and the camera quality exceeded my expectations. Highly recommend!"
+""")
+
+print(result.sentiment)    # Sentiment.POSITIVE
+print(result.keywords)     # ["battery life", "camera quality"]
+print(result.rating)       # 5
+```
+
+---
+
+## Part 4: Output Methods
+
+### Method 1: Function Calling (Default)
+
+Uses tool calling under the hood. Most reliable.
+
+```python
+structured_llm = llm.with_structured_output(
+    Contact,
+    method="function_calling",  # Default
+)
+```
+
+### Method 2: JSON Mode
+
+Returns raw JSON, parsed by Pydantic:
+
+```python
+structured_llm = llm.with_structured_output(
+    Contact,
+    method="json_mode",
+)
+```
+
+### Method 3: JSON Schema
+
+Uses OCI's native JSON schema support:
+
+```python
+structured_llm = llm.with_structured_output(
+    Contact,
+    method="json_schema",
+)
+```
+
+### When to Use Each
+
+| Method | Best For | Notes |
+|--------|----------|-------|
+| `function_calling` | Most use cases | Default, most reliable |
+| `json_mode` | Simple schemas | Faster, less validation |
+| `json_schema` | Complex schemas | Native OCI support |
+
+---
+
+## Part 5: Include Raw Response
+
+Access both the parsed result and raw AI response:
+
+```python
+structured_llm = llm.with_structured_output(
+    Contact,
+    include_raw=True,
+)
+
+response = structured_llm.invoke("Extract: John Doe john@example.com")
+
+# Response is a dict with both
+print(response["parsed"])     # Contact(name="John Doe", email="john@example.com")
+print(response["raw"])        # AIMessage with raw content
+```
+
+Useful for:
+- Debugging
+- Logging
+- Accessing additional metadata
+
+---
+
+## Part 6: Error Handling
+
+### Validation Errors
+
+```python
+from pydantic import ValidationError
+
+try:
+    result = structured_llm.invoke("This text has no contact info")
+except ValidationError as e:
+    print(f"Validation failed: {e}")
+    # Handle gracefully
+```
+
+### Robust Extraction Pattern
+
+```python
+from typing import Optional
+from pydantic import BaseModel, Field
+
+class ExtractionResult(BaseModel):
+    """Wrapper for extraction with confidence."""
+    data: Optional[Contact] = Field(default=None)
+    confidence: float = Field(ge=0, le=1, description="Extraction confidence")
+    notes: str = Field(default="", description="Any issues or notes")
+
+structured_llm = llm.with_structured_output(ExtractionResult)
+
+result = structured_llm.invoke("Maybe John? Not sure about email")
+
+if result.confidence > 0.8:
+    process(result.data)
+else:
+    flag_for_review(result)
+```
+
+---
+
+## Part 7: Real-World Examples
+
+### Data Extraction from Documents
+
+```python
+class Invoice(BaseModel):
+    """Extracted invoice data."""
+    invoice_number: str
+    date: str
+    vendor_name: str
+    total_amount: float
+    line_items: List[LineItem]
+
+structured_llm = llm.with_structured_output(Invoice)
+
+# Extract from invoice text
+invoice_data = structured_llm.invoke(invoice_text)
+```
+
+### Classification
+
+```python
+class Classification(BaseModel):
+    """Document classification."""
+    category: str = Field(description="Document category")
+    subcategory: str = Field(description="Specific subcategory")
+    confidence: float
+    tags: List[str]
+
+structured_llm = llm.with_structured_output(Classification)
+
+result = structured_llm.invoke(f"Classify this document: {document_text}")
+```
+
+### Entity Extraction
+
+```python
+class Entities(BaseModel):
+    """Named entities from text."""
+    people: List[str] = Field(default_factory=list)
+    organizations: List[str] = Field(default_factory=list)
+    locations: List[str] = Field(default_factory=list)
+    dates: List[str] = Field(default_factory=list)
+
+structured_llm = llm.with_structured_output(Entities)
+
+entities = structured_llm.invoke(article_text)
+print(f"People mentioned: {entities.people}")
+```
+
+---
+
+## Part 8: Best Practices
+
+### Schema Design
+
+1. **Clear descriptions** - Help the model understand each field
+2. **Use Optional** - For fields that might not exist
+3. **Add constraints** - `ge`, `le`, `min_length`, `max_length`
+4. **Use enums** - For categorical fields
+
+### Example: Well-Designed Schema
+
+```python
+from pydantic import BaseModel, Field
+from typing import List, Optional
+from enum import Enum
+
+class Priority(str, Enum):
+    LOW = "low"
+    MEDIUM = "medium"
+    HIGH = "high"
+    CRITICAL = "critical"
+
+class Task(BaseModel):
+    """A task extracted from text."""
+    title: str = Field(
+        min_length=1,
+        max_length=200,
+        description="Brief task title"
+    )
+    description: Optional[str] = Field(
+        default=None,
+        description="Detailed description if available"
+    )
+    priority: Priority = Field(
+        default=Priority.MEDIUM,
+        description="Task priority level"
+    )
+    due_date: Optional[str] = Field(
+        default=None,
+        description="Due date in ISO format (YYYY-MM-DD)"
+    )
+    assignee: Optional[str] = Field(
+        default=None,
+        description="Person assigned to the task"
+    )
+```
+
+---
+
+## Summary
+
+In this tutorial, you learned:
+
+1. **Why structured output** - Predictable, typed responses
+2. **Pydantic schemas** - Define output structure
+3. **`with_structured_output()`** - Create structured models
+4. **Output methods** - function_calling, json_mode, json_schema
+5. **Error handling** - Validation and confidence
+6. **Real-world patterns** - Extraction, classification, entities
+
+## Next Steps
+
+- **[Tutorial 07: Async for Production](../07-async-for-production/)** - Async structured extraction
+- **[Tutorial 10: Embeddings](../10-embeddings/)** - Semantic search with extracted data
+
+## API Reference
+
+| Method/Parameter | Description |
+|------------------|-------------|
+| `with_structured_output(schema)` | Create structured model |
+| `method` | "function_calling", "json_mode", "json_schema" |
+| `include_raw` | Include raw response in output |
+| Pydantic `Field()` | Add descriptions and constraints |
+
+## Troubleshooting
+
+### "Validation failed"
+- Check if all required fields are extractable from input
+- Use `Optional` for fields that might not exist
+- Add `default=None` or `default_factory=list`
+
+### "Wrong type returned"
+- Verify Pydantic schema is correct
+- Check `Field(description=...)` is clear
+- Try `method="function_calling"` for better reliability
+
+### "Incomplete extraction"
+- Make input text clearer
+- Add examples in the prompt
+- Use `include_raw=True` to debug
diff --git a/libs/oci/tutorials/05-structured-output/code/data_classification.py b/libs/oci/tutorials/05-structured-output/code/data_classification.py
new file mode 100644
index 00000000..3c83a8e3
--- /dev/null
+++ b/libs/oci/tutorials/05-structured-output/code/data_classification.py
@@ -0,0 +1,107 @@
+# Tutorial 05: Document Classification Example
+# Demonstrates using structured output for classification tasks
+
+from enum import Enum
+from typing import List
+
+from pydantic import BaseModel, Field
+
+from langchain_oci import ChatOCIGenAI
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+class DocumentCategory(str, Enum):
+    LEGAL = "legal"
+    FINANCIAL = "financial"
+    TECHNICAL = "technical"
+    MARKETING = "marketing"
+    HR = "hr"
+    OTHER = "other"
+
+
+class UrgencyLevel(str, Enum):
+    LOW = "low"
+    MEDIUM = "medium"
+    HIGH = "high"
+    CRITICAL = "critical"
+
+
+class DocumentClassification(BaseModel):
+    """Classification result for a document."""
+    category: DocumentCategory = Field(description="Primary document category")
+    subcategory: str = Field(description="Specific subcategory within the main category")
+    urgency: UrgencyLevel = Field(description="How urgent is this document")
+    confidence: float = Field(ge=0, le=1, description="Classification confidence 0-1")
+    key_topics: List[str] = Field(description="Main topics covered in the document")
+    summary: str = Field(max_length=200, description="Brief summary of the document")
+    action_required: bool = Field(description="Does this require immediate action")
+
+
+def classify_document(llm, document_text: str) -> DocumentClassification:
+    """Classify a document using structured output."""
+    classifier = llm.with_structured_output(DocumentClassification)
+
+    prompt = f"""Classify the following document. Analyze its content to determine
+the category, urgency, key topics, and whether action is required.
+
+Document:
+---
+{document_text}
+---
+
+Provide a structured classification."""
+
+    return classifier.invoke(prompt)
+
+
+def main():
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    # Test documents
+    documents = [
+        """
+        URGENT: Server Outage Notice
+        Our primary database server is experiencing critical failures.
+        All teams must immediately halt deployments and await further notice.
+        Estimated time to resolution: 4 hours.
+        Contact the SRE team for updates.
+        """,
+
+        """
+        Q4 2025 Financial Report
+        Revenue increased by 15% compared to Q3. Operating expenses
+        remained stable. Net profit margin improved to 12%.
+        Recommend continued investment in R&D and marketing.
+        """,
+
+        """
+        Employee Handbook Update
+        Section 5.3 regarding remote work policy has been updated.
+        Employees may now work remotely up to 3 days per week
+        with manager approval. Please review and acknowledge.
+        """,
+    ]
+
+    for i, doc in enumerate(documents, 1):
+        print(f"\nDocument {i}")
+        print("=" * 50)
+
+        result = classify_document(llm, doc)
+
+        print(f"Category: {result.category.value}")
+        print(f"Subcategory: {result.subcategory}")
+        print(f"Urgency: {result.urgency.value}")
+        print(f"Confidence: {result.confidence:.2f}")
+        print(f"Key Topics: {', '.join(result.key_topics)}")
+        print(f"Action Required: {result.action_required}")
+        print(f"Summary: {result.summary}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libs/oci/tutorials/05-structured-output/code/pydantic_schemas.py b/libs/oci/tutorials/05-structured-output/code/pydantic_schemas.py
new file mode 100644
index 00000000..a29b5270
--- /dev/null
+++ b/libs/oci/tutorials/05-structured-output/code/pydantic_schemas.py
@@ -0,0 +1,101 @@
+# Tutorial 05: Pydantic Schema Examples
+# Demonstrates structured output with various schema patterns
+
+from enum import Enum
+from typing import List, Optional
+
+from pydantic import BaseModel, Field
+
+from langchain_oci import ChatOCIGenAI
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+# Simple schema
+class Contact(BaseModel):
+    """A contact with name and email."""
+    name: str = Field(description="The person's full name")
+    email: str = Field(description="The email address")
+    phone: Optional[str] = Field(default=None, description="Phone number if available")
+
+
+# Nested schema
+class Address(BaseModel):
+    """A physical address."""
+    street: str = Field(description="Street address")
+    city: str = Field(description="City name")
+    state: str = Field(description="State or province")
+    zip_code: str = Field(description="Postal code")
+
+
+class Company(BaseModel):
+    """A company with address."""
+    name: str = Field(description="Company name")
+    industry: str = Field(description="Industry sector")
+    headquarters: Address = Field(description="Main office location")
+    employee_count: int = Field(ge=1, description="Number of employees")
+
+
+# Schema with enum
+class Sentiment(str, Enum):
+    POSITIVE = "positive"
+    NEGATIVE = "negative"
+    NEUTRAL = "neutral"
+
+
+class ReviewAnalysis(BaseModel):
+    """Analysis of a product review."""
+    summary: str = Field(description="Brief summary")
+    sentiment: Sentiment = Field(description="Overall sentiment")
+    keywords: List[str] = Field(description="Key topics")
+    rating: int = Field(ge=1, le=5, description="Inferred rating 1-5")
+
+
+def main():
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    # Test 1: Simple extraction
+    print("Test 1: Contact Extraction")
+    print("-" * 40)
+    contact_llm = llm.with_structured_output(Contact)
+    contact = contact_llm.invoke(
+        "Extract contact: John Doe, john.doe@example.com, (555) 123-4567"
+    )
+    print(f"Name: {contact.name}")
+    print(f"Email: {contact.email}")
+    print(f"Phone: {contact.phone}")
+
+    # Test 2: Nested structure
+    print("\nTest 2: Company Extraction")
+    print("-" * 40)
+    company_llm = llm.with_structured_output(Company)
+    company = company_llm.invoke("""
+        Extract: TechCorp is a software company located at
+        456 Innovation Way, Austin, TX 78701. They employ about 250 people.
+    """)
+    print(f"Company: {company.name}")
+    print(f"Industry: {company.industry}")
+    print(f"City: {company.headquarters.city}")
+    print(f"Employees: {company.employee_count}")
+
+    # Test 3: With enum
+    print("\nTest 3: Review Analysis")
+    print("-" * 40)
+    review_llm = llm.with_structured_output(ReviewAnalysis)
+    review = review_llm.invoke("""
+        Analyze: "Terrible experience! The product broke after one week
+        and customer service was unhelpful. Do not buy!"
+    """)
+    print(f"Summary: {review.summary}")
+    print(f"Sentiment: {review.sentiment.value}")
+    print(f"Keywords: {review.keywords}")
+    print(f"Rating: {review.rating}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libs/oci/tutorials/07-async-for-production/README.md b/libs/oci/tutorials/07-async-for-production/README.md
new file mode 100644
index 00000000..a324966f
--- /dev/null
+++ b/libs/oci/tutorials/07-async-for-production/README.md
@@ -0,0 +1,411 @@
+# Tutorial 07: Async for Production
+
+Build high-performance applications using async patterns with OCI Generative AI.
+
+## What You'll Build
+
+By the end of this tutorial, you'll be able to:
+- Use `ainvoke()` for async single requests
+- Use `astream()` for async streaming
+- Use `abatch()` for parallel batch processing
+- Build FastAPI endpoints with OCI GenAI
+- Handle errors in async code
+
+## Prerequisites
+
+- Completed [Tutorial 01: Getting Started](../01-getting-started/)
+- Basic understanding of Python async/await
+- Install: `pip install fastapi uvicorn`
+
+## Concepts Covered
+
+| Concept | Description |
+|---------|-------------|
+| `ainvoke()` | Async single request |
+| `astream()` | Async streaming |
+| `abatch()` | Async batch processing |
+| `asyncio.gather()` | Run multiple requests concurrently |
+| FastAPI integration | Production web service |
+
+---
+
+## Part 1: Why Async?
+
+### The Problem with Sync
+
+```python
+# Synchronous - blocks while waiting
+response1 = llm.invoke("Question 1")  # Wait ~1s
+response2 = llm.invoke("Question 2")  # Wait ~1s
+response3 = llm.invoke("Question 3")  # Wait ~1s
+# Total: ~3 seconds
+```
+
+### The Async Solution
+
+```python
+# Asynchronous - runs concurrently
+responses = await asyncio.gather(
+    llm.ainvoke("Question 1"),
+    llm.ainvoke("Question 2"),
+    llm.ainvoke("Question 3"),
+)
+# Total: ~1 second (3x faster!)
+```
+
+---
+
+## Part 2: Basic Async Operations
+
+### Single Async Request (`ainvoke`)
+
+```python
+import asyncio
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+async def main():
+    response = await llm.ainvoke("What is the capital of France?")
+    print(response.content)
+
+# Run the async function
+asyncio.run(main())
+```
+
+### Async Streaming (`astream`)
+
+```python
+async def stream_response():
+    async for chunk in llm.astream("Tell me a story"):
+        print(chunk.content, end="", flush=True)
+
+asyncio.run(stream_response())
+```
+
+### Async Batch (`abatch`)
+
+```python
+async def batch_process():
+    questions = [
+        "What is Python?",
+        "What is JavaScript?",
+        "What is Rust?",
+    ]
+
+    responses = await llm.abatch(questions)
+
+    for q, r in zip(questions, responses):
+        print(f"Q: {q}")
+        print(f"A: {r.content[:100]}...\n")
+
+asyncio.run(batch_process())
+```
+
+---
+
+## Part 3: Concurrent Requests
+
+### Using `asyncio.gather()`
+
+Process multiple independent requests concurrently:
+
+```python
+import asyncio
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+async def process_concurrent():
+    # All three run at the same time
+    results = await asyncio.gather(
+        llm.ainvoke("Explain machine learning in one sentence"),
+        llm.ainvoke("Explain deep learning in one sentence"),
+        llm.ainvoke("Explain neural networks in one sentence"),
+    )
+
+    for i, result in enumerate(results, 1):
+        print(f"{i}. {result.content}")
+
+asyncio.run(process_concurrent())
+```
+
+### Handling Errors in Concurrent Requests
+
+```python
+async def safe_invoke(llm, prompt: str):
+    """Wrapper that catches errors for individual requests."""
+    try:
+        return await llm.ainvoke(prompt)
+    except Exception as e:
+        return f"Error: {e}"
+
+async def process_with_error_handling():
+    prompts = ["Good prompt", "Another good prompt", ""]  # Empty will fail
+
+    results = await asyncio.gather(
+        *[safe_invoke(llm, p) for p in prompts],
+        return_exceptions=True  # Don't fail all on one error
+    )
+
+    for prompt, result in zip(prompts, results):
+        if isinstance(result, Exception):
+            print(f"Failed: {prompt} - {result}")
+        else:
+            print(f"Success: {prompt}")
+```
+
+---
+
+## Part 4: Rate Limiting and Throttling
+
+### Semaphore for Concurrency Control
+
+```python
+import asyncio
+
+async def process_with_limit(llm, prompts: list, max_concurrent: int = 5):
+    """Process prompts with limited concurrency."""
+    semaphore = asyncio.Semaphore(max_concurrent)
+
+    async def limited_invoke(prompt: str):
+        async with semaphore:
+            return await llm.ainvoke(prompt)
+
+    return await asyncio.gather(*[limited_invoke(p) for p in prompts])
+
+# Process 100 prompts, but only 5 at a time
+prompts = [f"Question {i}" for i in range(100)]
+results = await process_with_limit(llm, prompts, max_concurrent=5)
+```
+
+---
+
+## Part 5: Async Streaming Patterns
+
+### Collect Full Response from Stream
+
+```python
+async def stream_to_string(llm, prompt: str) -> str:
+    """Stream response and collect full text."""
+    chunks = []
+    async for chunk in llm.astream(prompt):
+        chunks.append(chunk.content)
+    return "".join(chunks)
+```
+
+### Stream with Progress
+
+```python
+async def stream_with_progress(llm, prompt: str):
+    """Stream with token counting."""
+    token_count = 0
+    async for chunk in llm.astream(prompt):
+        token_count += 1
+        print(chunk.content, end="", flush=True)
+    print(f"\n[Received {token_count} chunks]")
+```
+
+---
+
+## Part 6: ChatOCIModelDeployment Async
+
+For custom model deployments, async methods are fully supported:
+
+```python
+from langchain_oci import ChatOCIModelDeployment
+
+# vLLM or TGI deployment
+chat = ChatOCIModelDeployment(
+    endpoint="https://your-deployment.oci.oraclecloud.com/predict",
+    model="meta-llama/Meta-Llama-3-8B-Instruct",
+)
+
+async def use_deployment():
+    # Async invoke
+    response = await chat.ainvoke("Hello!")
+
+    # Async stream
+    async for chunk in chat.astream("Tell me a story"):
+        print(chunk.content, end="")
+```
+
+---
+
+## Part 7: FastAPI Integration
+
+### Basic FastAPI Endpoint
+
+```python
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+from langchain_oci import ChatOCIGenAI
+
+app = FastAPI()
+
+# Initialize model once (reuse across requests)
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+
+class ChatRequest(BaseModel):
+    message: str
+
+
+class ChatResponse(BaseModel):
+    response: str
+
+
+@app.post("/chat", response_model=ChatResponse)
+async def chat(request: ChatRequest):
+    """Non-streaming chat endpoint."""
+    response = await llm.ainvoke(request.message)
+    return ChatResponse(response=response.content)
+
+
+@app.post("/chat/stream")
+async def chat_stream(request: ChatRequest):
+    """Streaming chat endpoint."""
+    async def generate():
+        async for chunk in llm.astream(request.message):
+            yield chunk.content
+
+    return StreamingResponse(generate(), media_type="text/plain")
+
+
+# Run with: uvicorn main:app --reload
+```
+
+### Concurrent Batch Endpoint
+
+```python
+class BatchRequest(BaseModel):
+    messages: list[str]
+
+
+@app.post("/chat/batch")
+async def chat_batch(request: BatchRequest):
+    """Process multiple messages concurrently."""
+    responses = await llm.abatch(request.messages)
+    return {"responses": [r.content for r in responses]}
+```
+
+---
+
+## Part 8: Best Practices
+
+### 1. Reuse Client Instances
+
+```python
+# Good: Create once, reuse
+llm = ChatOCIGenAI(...)  # Module level
+
+async def handler():
+    return await llm.ainvoke(...)
+
+# Bad: Create new client per request
+async def handler():
+    llm = ChatOCIGenAI(...)  # Expensive!
+    return await llm.ainvoke(...)
+```
+
+### 2. Use Timeouts
+
+```python
+import asyncio
+
+async def with_timeout(llm, prompt: str, timeout: float = 30.0):
+    try:
+        return await asyncio.wait_for(
+            llm.ainvoke(prompt),
+            timeout=timeout
+        )
+    except asyncio.TimeoutError:
+        return "Request timed out"
+```
+
+### 3. Graceful Shutdown
+
+```python
+from contextlib import asynccontextmanager
+from fastapi import FastAPI
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup
+    print("Starting up...")
+    yield
+    # Shutdown
+    print("Shutting down...")
+    # Cleanup resources if needed
+
+app = FastAPI(lifespan=lifespan)
+```
+
+### 4. Error Handling
+
+```python
+from fastapi import HTTPException
+
+@app.post("/chat")
+async def chat(request: ChatRequest):
+    try:
+        response = await llm.ainvoke(request.message)
+        return {"response": response.content}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+```
+
+---
+
+## Summary
+
+In this tutorial, you learned:
+
+1. **Why async** - Concurrent requests for better performance
+2. **`ainvoke()`** - Async single requests
+3. **`astream()`** - Async streaming
+4. **`abatch()`** - Async batch processing
+5. **Concurrency control** - Semaphores and rate limiting
+6. **FastAPI integration** - Production web services
+7. **Best practices** - Reuse, timeouts, error handling
+
+## Next Steps
+
+- **[Tutorial 06: Model Deployments](../06-model-deployments/)** - Custom model endpoints
+- **[Tutorial 10: Embeddings](../10-embeddings/)** - Async embedding operations
+
+## API Reference
+
+| Method | Description |
+|--------|-------------|
+| `ainvoke(input)` | Async single request |
+| `astream(input)` | Async streaming response |
+| `abatch(inputs)` | Async batch processing |
+| `asyncio.gather()` | Run multiple coroutines concurrently |
+
+## Troubleshooting
+
+### "RuntimeError: Event loop is already running"
+- Use `await` instead of `asyncio.run()` inside async contexts
+- In Jupyter: `await llm.ainvoke(...)` directly
+
+### "Too many concurrent requests"
+- Implement rate limiting with semaphores
+- Use `abatch()` instead of many `ainvoke()` calls
+
+### "Connection timeout"
+- Increase timeout: `asyncio.wait_for(..., timeout=60.0)`
+- Check network connectivity
diff --git a/libs/oci/tutorials/07-async-for-production/code/async_basics.py b/libs/oci/tutorials/07-async-for-production/code/async_basics.py
new file mode 100644
index 00000000..9b2a18a5
--- /dev/null
+++ b/libs/oci/tutorials/07-async-for-production/code/async_basics.py
@@ -0,0 +1,59 @@
+# Tutorial 07: Async Basics Example
+# Demonstrates ainvoke, astream, and abatch
+
+import asyncio
+
+from langchain_oci import ChatOCIGenAI
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+async def main():
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    # 1. Single async request
+    print("1. Single Async Request (ainvoke)")
+    print("-" * 40)
+    response = await llm.ainvoke("What is the capital of Japan?")
+    print(f"Response: {response.content}\n")
+
+    # 2. Async streaming
+    print("2. Async Streaming (astream)")
+    print("-" * 40)
+    print("Response: ", end="")
+    async for chunk in llm.astream("Count from 1 to 5"):
+        print(chunk.content, end="", flush=True)
+    print("\n")
+
+    # 3. Async batch
+    print("3. Async Batch (abatch)")
+    print("-" * 40)
+    questions = [
+        "What is Python?",
+        "What is Java?",
+        "What is Go?",
+    ]
+    responses = await llm.abatch(questions)
+    for q, r in zip(questions, responses):
+        print(f"Q: {q}")
+        print(f"A: {r.content[:100]}...\n")
+
+    # 4. Concurrent requests with gather
+    print("4. Concurrent Requests (asyncio.gather)")
+    print("-" * 40)
+    results = await asyncio.gather(
+        llm.ainvoke("What is 2+2?"),
+        llm.ainvoke("What is 3+3?"),
+        llm.ainvoke("What is 4+4?"),
+    )
+    for i, result in enumerate(results, 1):
+        print(f"Result {i}: {result.content}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/libs/oci/tutorials/07-async-for-production/code/batch_processing.py b/libs/oci/tutorials/07-async-for-production/code/batch_processing.py
new file mode 100644
index 00000000..4177f4d5
--- /dev/null
+++ b/libs/oci/tutorials/07-async-for-production/code/batch_processing.py
@@ -0,0 +1,83 @@
+# Tutorial 07: Batch Processing Example
+# Demonstrates efficient batch processing with rate limiting
+
+import asyncio
+import time
+from typing import List
+
+from langchain_oci import ChatOCIGenAI
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+async def process_with_rate_limit(
+    llm: ChatOCIGenAI,
+    prompts: List[str],
+    max_concurrent: int = 5,
+) -> List[str]:
+    """Process prompts with concurrency limit."""
+    semaphore = asyncio.Semaphore(max_concurrent)
+    results = []
+
+    async def limited_invoke(prompt: str, index: int):
+        async with semaphore:
+            print(f"Processing {index + 1}/{len(prompts)}: {prompt[:30]}...")
+            try:
+                response = await llm.ainvoke(prompt)
+                return (index, response.content)
+            except Exception as e:
+                return (index, f"Error: {e}")
+
+    # Create tasks for all prompts
+    tasks = [limited_invoke(p, i) for i, p in enumerate(prompts)]
+
+    # Process all with limited concurrency
+    completed = await asyncio.gather(*tasks)
+
+    # Sort by original index
+    completed.sort(key=lambda x: x[0])
+    return [content for _, content in completed]
+
+
+async def main():
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+        model_kwargs={"max_tokens": 100},
+    )
+
+    # Generate test prompts
+    prompts = [
+        f"In one sentence, what is {topic}?"
+        for topic in [
+            "Python", "JavaScript", "Rust", "Go", "TypeScript",
+            "Java", "C++", "Swift", "Kotlin", "Ruby"
+        ]
+    ]
+
+    print(f"Processing {len(prompts)} prompts with max 3 concurrent requests")
+    print("=" * 60)
+
+    start_time = time.perf_counter()
+
+    # Process with rate limiting
+    results = await process_with_rate_limit(llm, prompts, max_concurrent=3)
+
+    elapsed = time.perf_counter() - start_time
+
+    print("\n" + "=" * 60)
+    print("Results:")
+    print("-" * 60)
+
+    for prompt, result in zip(prompts, results):
+        topic = prompt.split("what is ")[1].rstrip("?")
+        print(f"{topic}: {result[:80]}...")
+
+    print(f"\nCompleted in {elapsed:.2f} seconds")
+    print(f"Average time per request: {elapsed / len(prompts):.2f} seconds")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/libs/oci/tutorials/07-async-for-production/code/fastapi_app.py b/libs/oci/tutorials/07-async-for-production/code/fastapi_app.py
new file mode 100644
index 00000000..7e7a3379
--- /dev/null
+++ b/libs/oci/tutorials/07-async-for-production/code/fastapi_app.py
@@ -0,0 +1,112 @@
+# Tutorial 07: FastAPI Integration Example
+# Demonstrates building a production chat API with OCI GenAI
+#
+# Run with: uvicorn fastapi_app:app --reload
+
+from contextlib import asynccontextmanager
+from typing import AsyncGenerator
+
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+
+from langchain_oci import ChatOCIGenAI
+
+# Configuration
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+# Global LLM instance (reused across requests)
+llm: ChatOCIGenAI = None  # type: ignore
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Manage application lifecycle."""
+    global llm
+    # Startup: Initialize the LLM
+    print("Initializing OCI GenAI client...")
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+    print("Client initialized!")
+    yield
+    # Shutdown: Cleanup if needed
+    print("Shutting down...")
+
+
+app = FastAPI(
+    title="OCI GenAI Chat API",
+    description="Chat API powered by OCI Generative AI",
+    lifespan=lifespan,
+)
+
+
+# Request/Response models
+class ChatRequest(BaseModel):
+    message: str
+    max_tokens: int = 500
+
+
+class ChatResponse(BaseModel):
+    response: str
+
+
+class BatchRequest(BaseModel):
+    messages: list[str]
+
+
+class BatchResponse(BaseModel):
+    responses: list[str]
+
+
+# Endpoints
+@app.get("/health")
+async def health_check():
+    """Health check endpoint."""
+    return {"status": "healthy"}
+
+
+@app.post("/chat", response_model=ChatResponse)
+async def chat(request: ChatRequest):
+    """Non-streaming chat endpoint."""
+    try:
+        response = await llm.ainvoke(request.message)
+        return ChatResponse(response=response.content)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/chat/stream")
+async def chat_stream(request: ChatRequest):
+    """Streaming chat endpoint."""
+
+    async def generate() -> AsyncGenerator[str, None]:
+        try:
+            async for chunk in llm.astream(request.message):
+                yield chunk.content
+        except Exception as e:
+            yield f"\n\nError: {e}"
+
+    return StreamingResponse(
+        generate(),
+        media_type="text/plain",
+    )
+
+
+@app.post("/chat/batch", response_model=BatchResponse)
+async def chat_batch(request: BatchRequest):
+    """Process multiple messages concurrently."""
+    try:
+        responses = await llm.abatch(request.messages)
+        return BatchResponse(responses=[r.content for r in responses])
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# Example usage when running directly
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/libs/oci/tutorials/10-embeddings/README.md b/libs/oci/tutorials/10-embeddings/README.md
new file mode 100644
index 00000000..6a9042cb
--- /dev/null
+++ b/libs/oci/tutorials/10-embeddings/README.md
@@ -0,0 +1,352 @@
+# Tutorial 10: Text and Image Embeddings
+
+Create vector embeddings for semantic search, RAG, and similarity applications.
+
+## What You'll Build
+
+By the end of this tutorial, you'll be able to:
+- Create text embeddings with `OCIGenAIEmbeddings`
+- Create image embeddings for cross-modal search
+- Use embeddings for semantic similarity
+- Build a basic RAG pipeline
+
+## Prerequisites
+
+- Completed [Tutorial 01: Getting Started](../01-getting-started/)
+- Basic understanding of vector embeddings
+
+## Concepts Covered
+
+| Concept | Description |
+|---------|-------------|
+| `OCIGenAIEmbeddings` | Main embeddings class |
+| `embed_documents()` | Embed multiple texts |
+| `embed_query()` | Embed a search query |
+| `embed_image()` | Embed a single image |
+| `embed_image_batch()` | Embed multiple images |
+
+---
+
+## Part 1: What Are Embeddings?
+
+Embeddings convert text (or images) into numerical vectors that capture semantic meaning:
+
+```
+"The cat sat on the mat"  →  [0.12, -0.34, 0.56, ...]  (1536 dimensions)
+"A feline rested on a rug" →  [0.11, -0.33, 0.55, ...]  (similar vector!)
+```
+
+Similar meanings = similar vectors. This enables:
+- **Semantic search** - Find relevant content by meaning, not keywords
+- **RAG** - Retrieve context for LLMs
+- **Clustering** - Group similar items
+- **Recommendations** - Find similar products/content
+
+---
+
+## Part 2: Creating Text Embeddings
+
+### Basic Usage
+
+```python
+from langchain_oci import OCIGenAIEmbeddings
+
+embeddings = OCIGenAIEmbeddings(
+    model_id="cohere.embed-english-v3.0",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Embed a single query
+query_vector = embeddings.embed_query("What is machine learning?")
+print(f"Vector dimension: {len(query_vector)}")  # 1024
+
+# Embed multiple documents
+doc_vectors = embeddings.embed_documents([
+    "Machine learning is a branch of AI.",
+    "Deep learning uses neural networks.",
+    "Natural language processing handles text.",
+])
+print(f"Number of vectors: {len(doc_vectors)}")  # 3
+```
+
+### Available Models
+
+| Model | Dimensions | Best For |
+|-------|------------|----------|
+| `cohere.embed-english-v3.0` | 1024 | English text |
+| `cohere.embed-multilingual-v3.0` | 1024 | Multiple languages |
+| `cohere.embed-v4.0` | 256-1536 | Text + Images (multimodal) |
+
+---
+
+## Part 3: Input Types
+
+Different input types optimize embeddings for specific use cases:
+
+```python
+# For documents being indexed
+embeddings = OCIGenAIEmbeddings(
+    model_id="cohere.embed-english-v3.0",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+    input_type="SEARCH_DOCUMENT",  # Default
+)
+
+# For search queries
+embeddings_query = OCIGenAIEmbeddings(
+    model_id="cohere.embed-english-v3.0",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+    input_type="SEARCH_QUERY",
+)
+```
+
+### Input Type Reference
+
+| Input Type | Use Case |
+|------------|----------|
+| `SEARCH_DOCUMENT` | Documents being indexed |
+| `SEARCH_QUERY` | User search queries |
+| `CLASSIFICATION` | Text classification |
+| `CLUSTERING` | Text clustering |
+| `IMAGE` | Image inputs (use embed_image instead) |
+
+---
+
+## Part 4: Image Embeddings
+
+Multimodal models like `cohere.embed-v4.0` can embed both text and images into the same vector space:
+
+```python
+from langchain_oci import OCIGenAIEmbeddings
+
+# Use multimodal model
+embeddings = OCIGenAIEmbeddings(
+    model_id="cohere.embed-v4.0",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Embed an image
+image_vector = embeddings.embed_image("./photo.jpg")
+print(f"Image vector dimensions: {len(image_vector)}")
+
+# Embed multiple images
+image_vectors = embeddings.embed_image_batch([
+    "./photo1.jpg",
+    "./photo2.jpg",
+    "./photo3.jpg",
+])
+```
+
+### Image Input Formats
+
+```python
+# From file path
+vector = embeddings.embed_image("./photo.jpg")
+
+# From bytes
+with open("photo.png", "rb") as f:
+    vector = embeddings.embed_image(f.read(), mime_type="image/png")
+
+# From data URI
+vector = embeddings.embed_image("data:image/png;base64,iVBORw0...")
+```
+
+---
+
+## Part 5: Cross-Modal Search
+
+With multimodal embeddings, you can:
+- Search images using text queries
+- Search text using image queries
+
+```python
+from langchain_oci import OCIGenAIEmbeddings
+import numpy as np
+
+embeddings = OCIGenAIEmbeddings(
+    model_id="cohere.embed-v4.0",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Index images
+image_paths = ["sunset.jpg", "beach.jpg", "mountain.jpg", "city.jpg"]
+image_vectors = embeddings.embed_image_batch(image_paths)
+
+# Search with text query
+query = "A beautiful sunset over the ocean"
+query_vector = embeddings.embed_query(query)
+
+# Find most similar image
+def cosine_similarity(a, b):
+    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
+
+similarities = [cosine_similarity(query_vector, img_vec) for img_vec in image_vectors]
+best_match_idx = np.argmax(similarities)
+print(f"Best match: {image_paths[best_match_idx]}")  # sunset.jpg
+```
+
+---
+
+## Part 6: Output Dimensions
+
+For `cohere.embed-v4.0`, you can control the output dimensions:
+
+```python
+# Smaller vectors (faster, less accurate)
+embeddings = OCIGenAIEmbeddings(
+    model_id="cohere.embed-v4.0",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+    output_dimensions=256,  # Options: 256, 512, 1024, 1536
+)
+```
+
+| Dimensions | Trade-off |
+|------------|-----------|
+| 256 | Fastest, lowest storage, less accurate |
+| 512 | Balanced |
+| 1024 | Good accuracy |
+| 1536 | Most accurate (default for embed-v4.0) |
+
+---
+
+## Part 7: RAG Pattern
+
+Use embeddings for Retrieval Augmented Generation:
+
+```python
+from langchain_oci import ChatOCIGenAI, OCIGenAIEmbeddings
+from langchain_core.messages import HumanMessage
+import numpy as np
+
+# Setup
+embeddings = OCIGenAIEmbeddings(
+    model_id="cohere.embed-english-v3.0",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Knowledge base
+documents = [
+    "Oracle Cloud Infrastructure provides AI services.",
+    "OCI Generative AI supports multiple LLM providers.",
+    "LangChain is a framework for LLM applications.",
+]
+doc_vectors = embeddings.embed_documents(documents)
+
+# Query and retrieve
+query = "What AI services does Oracle offer?"
+query_vector = embeddings.embed_query(query)
+
+# Find most relevant documents
+def cosine_similarity(a, b):
+    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
+
+similarities = [cosine_similarity(query_vector, dv) for dv in doc_vectors]
+top_indices = np.argsort(similarities)[-2:][::-1]  # Top 2
+context = "\n".join([documents[i] for i in top_indices])
+
+# Generate answer with context
+prompt = f"""Use the following context to answer the question.
+
+Context:
+{context}
+
+Question: {query}
+
+Answer:"""
+
+response = llm.invoke(prompt)
+print(response.content)
+```
+
+---
+
+## Part 8: Vector Store Integration
+
+Use with LangChain vector stores:
+
+```python
+from langchain_community.vectorstores import FAISS
+from langchain_oci import OCIGenAIEmbeddings
+
+embeddings = OCIGenAIEmbeddings(
+    model_id="cohere.embed-english-v3.0",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+# Create vector store from documents
+documents = [
+    "Document 1 content...",
+    "Document 2 content...",
+    "Document 3 content...",
+]
+
+vectorstore = FAISS.from_texts(documents, embeddings)
+
+# Search
+results = vectorstore.similarity_search("search query", k=2)
+for doc in results:
+    print(doc.page_content)
+```
+
+---
+
+## Summary
+
+In this tutorial, you learned:
+
+1. **What embeddings are** - Vector representations of meaning
+2. **Text embeddings** - `embed_documents()`, `embed_query()`
+3. **Image embeddings** - `embed_image()`, `embed_image_batch()`
+4. **Input types** - SEARCH_DOCUMENT, SEARCH_QUERY, etc.
+5. **Cross-modal search** - Text-to-image and image-to-text
+6. **RAG pattern** - Retrieval Augmented Generation
+7. **Vector stores** - FAISS integration
+
+## Next Steps
+
+- **[Tutorial 02: Vision & Multimodal](../02-vision-and-multimodal/)** - Image analysis
+- **[Tutorial 07: Async for Production](../07-async-for-production/)** - Async embedding operations
+
+## API Reference
+
+| Method | Description |
+|--------|-------------|
+| `embed_query(text)` | Embed a single query |
+| `embed_documents(texts)` | Embed multiple documents |
+| `embed_image(image)` | Embed a single image |
+| `embed_image_batch(images)` | Embed multiple images |
+
+### Models
+
+| Model | Type | Dimensions |
+|-------|------|------------|
+| `cohere.embed-english-v3.0` | Text only | 1024 |
+| `cohere.embed-multilingual-v3.0` | Text only | 1024 |
+| `cohere.embed-v4.0` | Text + Image | 256-1536 |
+
+## Troubleshooting
+
+### "Model does not support images"
+- Use `cohere.embed-v4.0` for image embeddings
+- Check `IMAGE_EMBEDDING_MODELS` registry
+
+### "Embedding dimension mismatch"
+- Ensure same model and `output_dimensions` for indexing and querying
+- Store dimension metadata with your vectors
+
+### "Batch too large"
+- Reduce batch size (default is 96)
+- Use `batch_size` parameter
diff --git a/libs/oci/tutorials/10-embeddings/code/image_embeddings.py b/libs/oci/tutorials/10-embeddings/code/image_embeddings.py
new file mode 100644
index 00000000..b381af69
--- /dev/null
+++ b/libs/oci/tutorials/10-embeddings/code/image_embeddings.py
@@ -0,0 +1,94 @@
+# Tutorial 10: Image Embeddings Example
+# Demonstrates multimodal embeddings for cross-modal search
+
+import numpy as np
+
+from langchain_oci import OCIGenAIEmbeddings
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+def cosine_similarity(a: list, b: list) -> float:
+    """Calculate cosine similarity between two vectors."""
+    a_arr = np.array(a)
+    b_arr = np.array(b)
+    return float(np.dot(a_arr, b_arr) / (np.linalg.norm(a_arr) * np.linalg.norm(b_arr)))
+
+
+def main():
+    # Create multimodal embeddings client
+    embeddings = OCIGenAIEmbeddings(
+        model_id="cohere.embed-v4.0",  # Multimodal model
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    print("Image Embeddings with cohere.embed-v4.0")
+    print("=" * 50)
+
+    # Note: Replace these with real image paths
+    image_paths = [
+        "sunset.jpg",
+        "beach.jpg",
+        "mountain.jpg",
+    ]
+
+    # Check if images exist (demo mode)
+    import os
+    images_exist = all(os.path.exists(p) for p in image_paths)
+
+    if not images_exist:
+        print("\nDemo mode: Image files not found.")
+        print("To test image embeddings:")
+        print("1. Place image files in the current directory")
+        print("2. Update image_paths list")
+        print("3. Run this script again")
+
+        # Demo with text embeddings instead
+        print("\nShowing cross-modal capability concept:")
+        print("-" * 50)
+
+        # Text descriptions that could match images
+        descriptions = [
+            "A beautiful sunset over the ocean",
+            "A sandy beach with palm trees",
+            "Snow-capped mountain peaks",
+        ]
+
+        print("If you had these images:")
+        for i, (img, desc) in enumerate(zip(image_paths, descriptions)):
+            print(f"  {img}: '{desc}'")
+
+        print("\nYou could search with text queries like:")
+        print("  'Show me sunset photos' -> Would match sunset.jpg")
+        print("  'Beach vacation' -> Would match beach.jpg")
+        print("  'Mountain scenery' -> Would match mountain.jpg")
+
+    else:
+        # Real image embedding
+        print("\nEmbedding images...")
+
+        # Embed all images
+        image_vectors = embeddings.embed_image_batch(image_paths)
+        print(f"Embedded {len(image_vectors)} images")
+        print(f"Vector dimensions: {len(image_vectors[0])}")
+
+        # Text query for cross-modal search
+        query = "A beautiful sunset over the ocean"
+        query_vector = embeddings.embed_query(query)
+        print(f"\nSearch query: '{query}'")
+
+        # Find most similar image
+        print("\nSimilarity scores:")
+        for path, vec in zip(image_paths, image_vectors):
+            sim = cosine_similarity(query_vector, vec)
+            print(f"  {sim:.4f} - {path}")
+
+        similarities = [cosine_similarity(query_vector, v) for v in image_vectors]
+        best_idx = np.argmax(similarities)
+        print(f"\nBest match: {image_paths[best_idx]}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libs/oci/tutorials/10-embeddings/code/rag_example.py b/libs/oci/tutorials/10-embeddings/code/rag_example.py
new file mode 100644
index 00000000..2338822f
--- /dev/null
+++ b/libs/oci/tutorials/10-embeddings/code/rag_example.py
@@ -0,0 +1,106 @@
+# Tutorial 10: RAG (Retrieval Augmented Generation) Example
+# Demonstrates using embeddings for context retrieval
+
+import numpy as np
+
+from langchain_oci import ChatOCIGenAI, OCIGenAIEmbeddings
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+def cosine_similarity(a: list, b: list) -> float:
+    """Calculate cosine similarity between two vectors."""
+    a_arr = np.array(a)
+    b_arr = np.array(b)
+    return float(np.dot(a_arr, b_arr) / (np.linalg.norm(a_arr) * np.linalg.norm(b_arr)))
+
+
+def retrieve_context(query: str, documents: list, doc_vectors: list, embeddings, top_k: int = 2) -> list:
+    """Retrieve most relevant documents for a query."""
+    query_vector = embeddings.embed_query(query)
+
+    # Calculate similarities
+    similarities = [(i, cosine_similarity(query_vector, dv)) for i, dv in enumerate(doc_vectors)]
+
+    # Sort by similarity (descending)
+    similarities.sort(key=lambda x: x[1], reverse=True)
+
+    # Return top-k documents
+    return [documents[i] for i, _ in similarities[:top_k]]
+
+
+def main():
+    # Create embeddings client
+    embeddings = OCIGenAIEmbeddings(
+        model_id="cohere.embed-english-v3.0",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    # Create LLM
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    # Knowledge base (in production, this would be much larger)
+    knowledge_base = [
+        "Oracle Cloud Infrastructure (OCI) provides enterprise cloud services including compute, storage, and networking.",
+        "OCI Generative AI service offers access to large language models from multiple providers including Meta, Cohere, and Google.",
+        "LangChain is an open-source framework for building applications with large language models.",
+        "The langchain-oci package provides LangChain integrations for OCI Generative AI services.",
+        "RAG (Retrieval Augmented Generation) combines retrieval systems with LLMs to provide accurate, grounded responses.",
+        "Vector embeddings represent text as numerical vectors, enabling semantic similarity search.",
+        "OCI offers dedicated AI clusters (DAC) for running custom model endpoints.",
+        "The ChatOCIGenAI class is the main interface for chat models in langchain-oci.",
+    ]
+
+    print("RAG Example: Retrieval Augmented Generation")
+    print("=" * 50)
+
+    # Index documents
+    print("\nIndexing knowledge base...")
+    doc_vectors = embeddings.embed_documents(knowledge_base)
+    print(f"Indexed {len(doc_vectors)} documents")
+
+    # Example questions
+    questions = [
+        "What is OCI Generative AI?",
+        "How do I use LangChain with Oracle Cloud?",
+        "What is RAG and how does it work?",
+    ]
+
+    for question in questions:
+        print(f"\n{'=' * 50}")
+        print(f"Question: {question}")
+        print("-" * 50)
+
+        # Retrieve relevant context
+        context_docs = retrieve_context(
+            question, knowledge_base, doc_vectors, embeddings, top_k=2
+        )
+
+        print("Retrieved context:")
+        for i, doc in enumerate(context_docs, 1):
+            print(f"  {i}. {doc[:80]}...")
+
+        # Generate answer with context
+        context = "\n".join(context_docs)
+        prompt = f"""Use the following context to answer the question.
+Be concise and only use information from the context.
+
+Context:
+{context}
+
+Question: {question}
+
+Answer:"""
+
+        response = llm.invoke(prompt)
+        print(f"\nAnswer: {response.content}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libs/oci/tutorials/10-embeddings/code/text_embeddings.py b/libs/oci/tutorials/10-embeddings/code/text_embeddings.py
new file mode 100644
index 00000000..06334c28
--- /dev/null
+++ b/libs/oci/tutorials/10-embeddings/code/text_embeddings.py
@@ -0,0 +1,65 @@
+# Tutorial 10: Text Embeddings Example
+# Demonstrates creating and using text embeddings
+
+import numpy as np
+
+from langchain_oci import OCIGenAIEmbeddings
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+def cosine_similarity(a: list, b: list) -> float:
+    """Calculate cosine similarity between two vectors."""
+    a_arr = np.array(a)
+    b_arr = np.array(b)
+    return float(np.dot(a_arr, b_arr) / (np.linalg.norm(a_arr) * np.linalg.norm(b_arr)))
+
+
+def main():
+    # Create embeddings client
+    embeddings = OCIGenAIEmbeddings(
+        model_id="cohere.embed-english-v3.0",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    # Test 1: Single query embedding
+    print("Test 1: Single Query Embedding")
+    print("-" * 40)
+    query = "What is machine learning?"
+    query_vector = embeddings.embed_query(query)
+    print(f"Query: {query}")
+    print(f"Vector dimensions: {len(query_vector)}")
+    print(f"First 5 values: {query_vector[:5]}")
+
+    # Test 2: Document embeddings
+    print("\nTest 2: Document Embeddings")
+    print("-" * 40)
+    documents = [
+        "Machine learning is a type of artificial intelligence.",
+        "Python is a popular programming language.",
+        "The weather is nice today.",
+        "Neural networks are used in deep learning.",
+    ]
+    doc_vectors = embeddings.embed_documents(documents)
+    print(f"Number of documents: {len(documents)}")
+    print(f"Number of vectors: {len(doc_vectors)}")
+
+    # Test 3: Semantic similarity
+    print("\nTest 3: Semantic Similarity")
+    print("-" * 40)
+    print(f"Query: '{query}'")
+    print("\nSimilarity scores:")
+    for doc, vec in zip(documents, doc_vectors):
+        sim = cosine_similarity(query_vector, vec)
+        print(f"  {sim:.4f} - {doc[:50]}...")
+
+    # Find most similar document
+    similarities = [cosine_similarity(query_vector, v) for v in doc_vectors]
+    best_idx = np.argmax(similarities)
+    print(f"\nMost similar: '{documents[best_idx]}'")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/libs/oci/tutorials/README.md b/libs/oci/tutorials/README.md
new file mode 100644
index 00000000..d9f6d319
--- /dev/null
+++ b/libs/oci/tutorials/README.md
@@ -0,0 +1,122 @@
+# langchain-oci Tutorials
+
+Welcome to the langchain-oci tutorials! These tutorials will take you from beginner to expert, progressively building your skills with OCI Generative AI and LangChain.
+
+## Learning Path
+
+```
+                           BEGINNER
+                              │
+                              ▼
+                    ┌─────────────────┐
+                    │  01. Getting    │  Authentication, ChatOCIGenAI
+                    │     Started     │  First chat, provider intro
+                    └────────┬────────┘
+                             │
+              ┌──────────────┴──────────────┐
+              ▼                              ▼
+    ┌─────────────────┐            ┌─────────────────┐
+    │  02. Vision &   │            │  03. Building   │  create_oci_agent()
+    │   Multimodal    │            │    AI Agents    │  checkpointing
+    └────────┬────────┘            └────────┬────────┘
+             │                              │
+             └──────────────┬───────────────┘
+                            │
+                       INTERMEDIATE
+                            │
+              ┌─────────────┴─────────────┐
+              ▼                            ▼
+    ┌─────────────────┐          ┌─────────────────┐
+    │  04. Tool       │          │  05. Structured │
+    │     Calling     │          │     Output      │
+    └────────┬────────┘          └────────┬────────┘
+             │                            │
+             └─────────────┬──────────────┘
+                           │
+                      ADVANCED
+                           │
+         ┌─────────────────┼─────────────────┐
+         ▼                 ▼                  ▼
+┌──────────────┐  ┌──────────────┐  ┌──────────────┐
+│ 06. Model    │  │ 07. Async &  │  │ 08. OpenAI   │
+│ Deployments  │  │ Production   │  │ Responses    │
+└──────────────┘  └──────────────┘  └──────────────┘
+                           │
+                    SPECIALIZED
+                           │
+              ┌────────────┴────────────┐
+              ▼                          ▼
+    ┌─────────────────┐        ┌─────────────────┐
+    │ 09. Provider    │        │ 10. Embeddings  │
+    │    Deep Dive    │        │  Text & Image   │
+    └─────────────────┘        └─────────────────┘
+```
+
+## Tutorial Index
+
+| # | Tutorial | Level | Description |
+|---|----------|-------|-------------|
+| 01 | [Getting Started](./01-getting-started/) | Beginner | Authentication, ChatOCIGenAI, providers |
+| 02 | [Vision & Multimodal](./02-vision-and-multimodal/) | Beginner | Image analysis, PDF, video, audio |
+| 03 | [Building AI Agents](./03-building-ai-agents/) | Beginner | ReAct agents, tools, memory |
+| 04 | [Tool Calling Mastery](./04-tool-calling-mastery/) | Intermediate | bind_tools, parallel calls, workflows |
+| 05 | [Structured Output](./05-structured-output/) | Intermediate | Pydantic schemas, JSON modes |
+| 06 | [Model Deployments](./06-model-deployments/) | Advanced | vLLM, TGI, custom endpoints |
+| 07 | [Async for Production](./07-async-for-production/) | Advanced | ainvoke, astream, FastAPI |
+| 08 | [OpenAI Responses API](./08-openai-responses-api/) | Advanced | ChatOCIOpenAI, conversation store |
+| 09 | [Provider Deep Dive](./09-provider-deep-dive/) | Specialized | Meta, Gemini, Cohere, xAI |
+| 10 | [Embeddings](./10-embeddings/) | Specialized | Text & image embeddings, RAG |
+
+## Quick Start
+
+If you're new to langchain-oci, start here:
+
+```bash
+pip install langchain-oci oci
+```
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="your-compartment-id",
+)
+
+response = llm.invoke("Hello!")
+print(response.content)
+```
+
+Then continue with [Tutorial 01: Getting Started](./01-getting-started/).
+
+## Feature Coverage
+
+| Feature | Tutorial(s) |
+|---------|-------------|
+| `ChatOCIGenAI` | 01, 02, 03, 04, 05, 07 |
+| `ChatOCIOpenAI` | 08 |
+| `ChatOCIModelDeployment` | 06, 07 |
+| `OCIGenAIEmbeddings` | 10 |
+| `create_oci_agent()` | 03 |
+| Vision (13 models) | 02 |
+| Gemini PDF/video/audio | 02 |
+| Tool calling | 03, 04 |
+| Parallel tool calls | 04 |
+| Structured output | 05 |
+| Async (ainvoke/astream/abatch) | 07 |
+| Image embeddings | 10 |
+
+## Prerequisites
+
+All tutorials assume:
+- Python 3.9+
+- OCI CLI configured (`~/.oci/config`)
+- Access to OCI Generative AI service
+- A valid compartment ID
+
+## Getting Help
+
+- [API Reference](../docs/API_REFERENCE.md)
+- [Main Documentation](../README.md)
+- [GitHub Issues](https://github.com/oracle/langchain-oracle/issues)

From 420a6dd5ea89865e677c37d8bf509b375f237337 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Thu, 19 Feb 2026 12:01:12 -0500
Subject: [PATCH 02/16] Update root README with feature matrix and badges

---
 README.md | 126 +++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 97 insertions(+), 29 deletions(-)

diff --git a/README.md b/README.md
index 3e8dc113..98732a0f 100644
--- a/README.md
+++ b/README.md
@@ -1,59 +1,127 @@
-# 🦜️🔗 LangChain 🤝 Oracle
+# LangChain Oracle
 
-Welcome to the official repository for LangChain integration with [Oracle Cloud Infrastructure (OCI)](https://cloud.oracle.com/) and [Oracle AI Vector Search](https://www.oracle.com/database/ai-vector-search/). This project provides native LangChain components for interacting with Oracle's AI services—providing support for **OCI Generative AI**, **OCI Data Science** and **Oracle AI Vector Search**.
+[![PyPI - langchain-oci](https://img.shields.io/pypi/v/langchain-oci?label=langchain-oci)](https://pypi.org/project/langchain-oci/)
+[![PyPI - langchain-oracledb](https://img.shields.io/pypi/v/langchain-oracledb?label=langchain-oracledb)](https://pypi.org/project/langchain-oracledb/)
+[![Python versions](https://img.shields.io/pypi/pyversions/langchain-oci)](https://pypi.org/project/langchain-oci/)
+[![License](https://img.shields.io/badge/License-UPL%201.0-green)](https://opensource.org/licenses/UPL)
 
-## Features
+Official LangChain integrations for [Oracle Cloud Infrastructure (OCI)](https://cloud.oracle.com/) and [Oracle AI Vector Search](https://www.oracle.com/database/ai-vector-search/).
 
-- **LLMs**: Includes LLM classes for OCI services like [Generative AI](https://cloud.oracle.com/ai-services/generative-ai) and [ModelDeployment Endpoints](https://cloud.oracle.com/ai-services/model-deployment), allowing you to leverage their language models within LangChain.
-- **Agents**: Includes Runnables to support [Oracle Generative AI Agents](https://www.oracle.com/artificial-intelligence/generative-ai/agents/), allowing you to leverage Generative AI Agents within LangChain and LangGraph.
-- **Vector Search**: Offers native integration with [Oracle AI Vector Search](https://www.oracle.com/database/ai-vector-search/) through a LangChain-compatible components. This enables pipelines that can:
-    - Load the documents from various sources using `OracleDocLoader`
-    - Summarize content within/outside the database using `OracleSummary`
-    - Generate embeddings within/outside the database using `OracleEmbeddings`
-    - Chunk according to different requirements using Advanced Oracle Capabilities from `OracleTextSplitter`
-    - Store, index, and query vectors using `OracleVS`
-- **More to come**: This repository will continue to expand and offer additional components for various OCI and Oracle AI services as development progresses.
+## Packages
 
-> This project merges and replaces earlier OCI and Oracle AI Vector Search integrations from the `langchain-community` repository and unifies contributions from Oracle teams.
-> All integrations in this package assume that you have the credentials setup to connect with oci and database services.
+| Package | Description | Install |
+|---------|-------------|---------|
+| [**langchain-oci**](./libs/oci/) | OCI Generative AI & Data Science | `pip install langchain-oci` |
+| [**langchain-oracledb**](./libs/oracledb/) | Oracle AI Vector Search | `pip install langchain-oracledb` |
 
 ---
 
-## Installation
+## langchain-oci Features
 
-For OCI services:
+Full-featured LangChain integration for OCI Generative AI services.
 
-```bash
-python -m pip install -U langchain-oci
+### Chat Models & Providers
+
+| Provider | Models | Vision | Tool Calling |
+|----------|--------|--------|--------------|
+| **Meta** | Llama 3.2, 3.3, 4 | ✅ | ✅ (parallel in Llama 4) |
+| **Google** | Gemini 2.0/2.5 Flash | ✅ | ✅ |
+| **xAI** | Grok 4 | ✅ | ✅ |
+| **Cohere** | Command R+, Command A | ✅ (V2) | ✅ |
+| **OpenAI** | GPT-4, o1 | - | ✅ |
+
+### Key Features
+
+| Feature | Description |
+|---------|-------------|
+| **Vision & Multimodal** | 13+ vision models, Gemini PDF/video/audio support |
+| **AI Agents** | `create_oci_agent()` with checkpointing & human-in-the-loop |
+| **Tool Calling** | Parallel tools, infinite loop detection, `tool_result_guidance` |
+| **Structured Output** | Pydantic schemas, `json_mode`, `json_schema` |
+| **Async Support** | `ainvoke()`, `astream()`, `abatch()` |
+| **Embeddings** | Text & image embeddings in same vector space |
+
+### Quick Example
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+response = llm.invoke("Hello!")
+print(response.content)
 ```
 
-For Oracle AI Vector Search services:
+**[See full documentation →](./libs/oci/)**
+
+**[Explore tutorials →](./libs/oci/tutorials/)**
+
+---
+
+## langchain-oracledb Features
+
+Native integration with Oracle AI Vector Search.
+
+| Component | Description |
+|-----------|-------------|
+| `OracleVS` | Vector store with similarity search |
+| `OracleDocLoader` | Document loading from various sources |
+| `OracleEmbeddings` | In-database embedding generation |
+| `OracleTextSplitter` | Advanced text chunking |
+| `OracleSummary` | In-database summarization |
+
+### Quick Example
+
+```python
+from langchain_oracledb import OracleVS
+
+vectorstore = OracleVS(
+    client=connection,
+    embedding_function=embeddings,
+    table_name="my_vectors",
+)
+
+results = vectorstore.similarity_search("search query", k=5)
+```
+
+**[See full documentation →](./libs/oracledb/)**
+
+---
+
+## Installation
 
 ```bash
-python -m pip install -U langchain-oracledb
+# OCI Generative AI
+pip install langchain-oci oci
+
+# Oracle AI Vector Search
+pip install langchain-oracledb oracledb
 ```
 
 ---
 
-## Quick Start
+## Documentation
 
-This repository includes three main integration categories. For detailed information, please refer to the respective libraries:
+- **[langchain-oci Documentation](./libs/oci/README.md)** - Chat models, embeddings, agents
+- **[langchain-oci Tutorials](./libs/oci/tutorials/)** - Step-by-step learning path
+- **[langchain-oracledb Documentation](./libs/oracledb/README.md)** - Vector search integration
 
-- [OCI Generative AI](https://github.com/oracle/langchain-oracle/tree/main/libs/oci)
-- [OCI Data Science (Model Deployment)](https://github.com/oracle/langchain-oracle/tree/main/libs/oci)
-- [Oracle AI Vector Search](https://github.com/oracle/langchain-oracle/tree/main/libs/oracledb)
+---
 
 ## Contributing
 
-This project welcomes contributions from the community. Before submitting a pull request, please [review our contribution guide](./CONTRIBUTING.md)
+This project welcomes contributions from the community. Before submitting a pull request, please [review our contribution guide](./CONTRIBUTING.md).
 
 ## Security
 
-Please consult the [security guide](./SECURITY.md) for our responsible security vulnerability disclosure process
+Please consult the [security guide](./SECURITY.md) for our responsible security vulnerability disclosure process.
 
 ## License
 
 Copyright (c) 2025 Oracle and/or its affiliates.
 
-Released under the Universal Permissive License v1.0 as shown at
-<https://oss.oracle.com/licenses/upl/>
+Released under the Universal Permissive License v1.0 as shown at <https://oss.oracle.com/licenses/upl/>

From 91ffa5b2f99138e5cf27750c4d7a16628485cee6 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Thu, 19 Feb 2026 12:10:25 -0500
Subject: [PATCH 03/16] Add P1 tutorials: Model Deployments, OpenAI API,
 Provider Deep Dive

Tutorial 06 (Model Deployments):
- ChatOCIModelDeployment, ChatOCIModelDeploymentVLLM, ChatOCIModelDeploymentTGI
- vLLM and TGI specific parameters
- Custom endpoint extension patterns
- Streaming and async operations

Tutorial 08 (OpenAI Responses API):
- ChatOCIOpenAI for OpenAI Responses API compatibility
- OCI authentication (session, resource principal, instance principal)
- Conversation stores for persistent memory
- Web search and hosted MCP integration
- Migration guide from OpenAI

Tutorial 09 (Provider Deep Dive):
- Provider architecture and auto-detection
- Meta Llama: vision, parallel tools, tool_result_guidance
- Google Gemini: PDF, video, audio multimodal
- Cohere: RAG with citations, V2 vision API
- xAI Grok: reasoning content access
- Provider selection guide and feature matrix
---
 .../tutorials/06-model-deployments/README.md  | 479 ++++++++++++++
 .../code/custom_endpoint.py                   | 174 +++++
 .../code/tgi_deployment.py                    | 115 ++++
 .../code/vllm_deployment.py                   | 114 ++++
 .../08-openai-responses-api/README.md         | 457 +++++++++++++
 .../code/conversation_store.py                | 127 ++++
 .../code/openai_compat.py                     | 122 ++++
 .../code/tools_and_search.py                  | 146 +++++
 .../tutorials/09-provider-deep-dive/README.md | 617 ++++++++++++++++++
 .../code/cohere_examples.py                   | 182 ++++++
 .../code/gemini_examples.py                   | 198 ++++++
 .../code/meta_examples.py                     | 175 +++++
 .../code/provider_comparison.py               | 202 ++++++
 13 files changed, 3108 insertions(+)
 create mode 100644 libs/oci/tutorials/06-model-deployments/README.md
 create mode 100644 libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py
 create mode 100644 libs/oci/tutorials/06-model-deployments/code/tgi_deployment.py
 create mode 100644 libs/oci/tutorials/06-model-deployments/code/vllm_deployment.py
 create mode 100644 libs/oci/tutorials/08-openai-responses-api/README.md
 create mode 100644 libs/oci/tutorials/08-openai-responses-api/code/conversation_store.py
 create mode 100644 libs/oci/tutorials/08-openai-responses-api/code/openai_compat.py
 create mode 100644 libs/oci/tutorials/08-openai-responses-api/code/tools_and_search.py
 create mode 100644 libs/oci/tutorials/09-provider-deep-dive/README.md
 create mode 100644 libs/oci/tutorials/09-provider-deep-dive/code/cohere_examples.py
 create mode 100644 libs/oci/tutorials/09-provider-deep-dive/code/gemini_examples.py
 create mode 100644 libs/oci/tutorials/09-provider-deep-dive/code/meta_examples.py
 create mode 100644 libs/oci/tutorials/09-provider-deep-dive/code/provider_comparison.py

diff --git a/libs/oci/tutorials/06-model-deployments/README.md b/libs/oci/tutorials/06-model-deployments/README.md
new file mode 100644
index 00000000..c3a09ec2
--- /dev/null
+++ b/libs/oci/tutorials/06-model-deployments/README.md
@@ -0,0 +1,479 @@
+# Tutorial 06: Custom Model Deployments
+
+Deploy and use custom models on OCI Data Science Model Deployments with LangChain.
+
+## What You'll Learn
+
+- Deploy custom models using OCI Data Science
+- Use `ChatOCIModelDeployment` for chat interfaces
+- Configure vLLM and TGI deployments
+- Handle authentication with `oracle-ads`
+- Enable streaming and async operations
+
+## Prerequisites
+
+- Completed [Tutorial 01: Getting Started](../01-getting-started/)
+- OCI Data Science Model Deployment endpoint
+- `oracle-ads` and `langchain-openai` installed
+
+```bash
+pip install oracle-ads langchain-openai langchain-oci
+```
+
+## Concepts Covered
+
+| Class | Description |
+|-------|-------------|
+| `ChatOCIModelDeployment` | Base chat model for OCI deployments |
+| `ChatOCIModelDeploymentVLLM` | vLLM-specific deployment |
+| `ChatOCIModelDeploymentTGI` | Text Generation Inference deployment |
+| `OCIModelDeploymentLLM` | Legacy LLM interface |
+
+---
+
+## Part 1: Understanding OCI Model Deployments
+
+OCI Data Science Model Deployments allow you to deploy custom models (fine-tuned LLMs, open-source models) on dedicated infrastructure. Unlike the managed GenAI service, you have full control over:
+
+- Model selection (any Hugging Face model)
+- Infrastructure sizing (GPU types, memory)
+- Inference framework (vLLM, TGI, custom)
+
+### Deployment Architecture
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                  OCI Data Science                        │
+│                                                          │
+│   ┌──────────────┐    ┌──────────────┐                  │
+│   │   vLLM       │    │     TGI      │                  │
+│   │  Deployment  │    │  Deployment  │                  │
+│   └──────┬───────┘    └──────┬───────┘                  │
+│          │                   │                           │
+│          └─────────┬─────────┘                          │
+│                    │                                     │
+│         ┌──────────┴──────────┐                         │
+│         │  /v1/chat/completions│                         │
+│         │    (OpenAI-compatible)│                        │
+│         └──────────────────────┘                         │
+└─────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Part 2: Authentication Setup
+
+OCI Model Deployments use `oracle-ads` for authentication.
+
+### Configure ADS Authentication
+
+```python
+import ads
+
+# Option 1: API Key (default, uses ~/.oci/config)
+ads.set_auth("api_key")
+
+# Option 2: Resource Principal (for OCI Functions, Jobs)
+ads.set_auth("resource_principal")
+
+# Option 3: Instance Principal (for Compute instances)
+ads.set_auth("instance_principal")
+
+# Option 4: Security Token (session-based)
+ads.set_auth("security_token")
+```
+
+### Verify Authentication
+
+```python
+# Check current auth method
+print(ads.common.auth.default_signer())
+```
+
+---
+
+## Part 3: Basic ChatOCIModelDeployment
+
+The base class works with any OpenAI-compatible endpoint.
+
+```python
+from langchain_oci import ChatOCIModelDeployment
+
+# Create chat model pointing to your deployment
+chat = ChatOCIModelDeployment(
+    endpoint="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict",
+    model="odsc-llm",  # Default model name for AQUA deployments
+    streaming=True,
+    model_kwargs={
+        "max_tokens": 512,
+        "temperature": 0.2,
+    },
+)
+
+# Simple invocation
+response = chat.invoke("What is machine learning?")
+print(response.content)
+```
+
+### With Custom Headers
+
+```python
+chat = ChatOCIModelDeployment(
+    endpoint="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict",
+    model="odsc-llm",
+    default_headers={
+        "route": "/v1/chat/completions",
+        "X-Custom-Header": "value",
+    },
+)
+```
+
+### Message Formats
+
+```python
+from langchain_core.messages import HumanMessage, SystemMessage
+
+messages = [
+    SystemMessage(content="You are a helpful assistant."),
+    HumanMessage(content="Explain quantum computing in simple terms."),
+]
+
+response = chat.invoke(messages)
+```
+
+---
+
+## Part 4: vLLM Deployments
+
+vLLM is optimized for high-throughput LLM inference with PagedAttention.
+
+```python
+from langchain_oci import ChatOCIModelDeploymentVLLM
+
+chat = ChatOCIModelDeploymentVLLM(
+    endpoint="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict",
+    model="odsc-llm",
+
+    # vLLM-specific parameters
+    temperature=0.2,
+    max_tokens=512,
+    top_p=0.95,
+    top_k=40,
+
+    # Sampling parameters
+    frequency_penalty=0.1,
+    presence_penalty=0.1,
+    repetition_penalty=1.1,
+
+    # Beam search (optional)
+    use_beam_search=False,
+    best_of=1,
+
+    # Token control
+    min_tokens=10,
+    ignore_eos=False,
+    skip_special_tokens=True,
+)
+
+response = chat.invoke("Write a haiku about coding.")
+print(response.content)
+```
+
+### vLLM Tool Calling
+
+If your vLLM deployment is configured with `--tool-call-parser`:
+
+```python
+from langchain_core.tools import tool
+
+@tool
+def get_weather(city: str) -> str:
+    """Get weather for a city."""
+    return f"Weather in {city}: 72F, sunny"
+
+chat = ChatOCIModelDeploymentVLLM(
+    endpoint="...",
+    tool_choice="auto",  # Enable tool calling
+)
+
+chat_with_tools = chat.bind_tools([get_weather])
+response = chat_with_tools.invoke("What's the weather in Chicago?")
+```
+
+### Custom Chat Templates
+
+```python
+chat = ChatOCIModelDeploymentVLLM(
+    endpoint="...",
+    chat_template="{% for message in messages %}...",  # Jinja2 template
+)
+```
+
+---
+
+## Part 5: TGI Deployments
+
+Text Generation Inference (TGI) from Hugging Face offers production-ready serving.
+
+```python
+from langchain_oci import ChatOCIModelDeploymentTGI
+
+chat = ChatOCIModelDeploymentTGI(
+    endpoint="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict",
+    model="odsc-llm",
+
+    # TGI parameters
+    temperature=0.2,
+    max_tokens=512,
+    top_p=0.9,
+
+    # Reproducibility
+    seed=42,
+
+    # Penalties
+    frequency_penalty=0.0,
+    presence_penalty=0.0,
+
+    # Log probabilities
+    logprobs=True,
+    top_logprobs=5,
+)
+
+response = chat.invoke("Explain the theory of relativity.")
+print(response.content)
+```
+
+---
+
+## Part 6: Streaming Responses
+
+All deployment classes support streaming.
+
+### Synchronous Streaming
+
+```python
+chat = ChatOCIModelDeployment(
+    endpoint="...",
+    streaming=True,
+)
+
+for chunk in chat.stream("Tell me a story about a robot."):
+    print(chunk.content, end="", flush=True)
+```
+
+### Async Streaming
+
+```python
+import asyncio
+
+async def stream_response():
+    chat = ChatOCIModelDeployment(
+        endpoint="...",
+        streaming=True,
+    )
+
+    async for chunk in chat.astream("Tell me a story about a robot."):
+        print(chunk.content, end="", flush=True)
+
+asyncio.run(stream_response())
+```
+
+---
+
+## Part 7: Async Operations
+
+Full async support for high-concurrency applications.
+
+```python
+import asyncio
+from langchain_oci import ChatOCIModelDeployment
+
+async def main():
+    chat = ChatOCIModelDeployment(endpoint="...", model="odsc-llm")
+
+    # Single async request
+    response = await chat.ainvoke("Hello!")
+    print(response.content)
+
+    # Concurrent requests
+    questions = [
+        "What is Python?",
+        "What is JavaScript?",
+        "What is Rust?",
+    ]
+
+    tasks = [chat.ainvoke(q) for q in questions]
+    responses = await asyncio.gather(*tasks)
+
+    for q, r in zip(questions, responses):
+        print(f"Q: {q}")
+        print(f"A: {r.content[:100]}...")
+
+asyncio.run(main())
+```
+
+---
+
+## Part 8: Structured Output
+
+Use JSON mode for structured responses.
+
+```python
+from pydantic import BaseModel, Field
+from langchain_oci import ChatOCIModelDeployment
+
+class MovieReview(BaseModel):
+    title: str = Field(description="Movie title")
+    rating: int = Field(description="Rating from 1-10")
+    summary: str = Field(description="Brief review summary")
+
+chat = ChatOCIModelDeployment(endpoint="...", model="odsc-llm")
+
+# Use JSON mode
+structured_chat = chat.with_structured_output(MovieReview, method="json_mode")
+
+response = structured_chat.invoke(
+    "Review the movie 'Inception'. Respond in JSON with "
+    "title, rating (1-10), and summary fields."
+)
+
+print(f"Title: {response.title}")
+print(f"Rating: {response.rating}/10")
+print(f"Summary: {response.summary}")
+```
+
+---
+
+## Part 9: Custom Endpoint Handling
+
+Extend the base class for custom inference endpoints.
+
+```python
+from langchain_oci import ChatOCIModelDeployment
+from langchain_core.outputs import ChatResult, ChatGeneration
+from langchain_core.messages import AIMessage
+
+class MyCustomDeployment(ChatOCIModelDeployment):
+    """Custom deployment with non-standard response format."""
+
+    def _construct_json_body(self, messages: list, params: dict) -> dict:
+        """Custom request payload."""
+        return {
+            "inputs": messages,
+            "parameters": params,
+            "custom_field": "value",
+        }
+
+    def _process_response(self, response_json: dict) -> ChatResult:
+        """Custom response parsing."""
+        # Extract text from custom response format
+        text = response_json.get("output", {}).get("generated_text", "")
+
+        return ChatResult(
+            generations=[
+                ChatGeneration(
+                    message=AIMessage(content=text),
+                    generation_info={"custom": True},
+                )
+            ]
+        )
+
+# Use custom deployment
+chat = MyCustomDeployment(endpoint="...", model="my-model")
+response = chat.invoke("Hello!")
+```
+
+---
+
+## Part 10: Legacy LLM Interface
+
+For text completion (non-chat) workloads.
+
+```python
+from langchain_oci import OCIModelDeploymentLLM
+
+llm = OCIModelDeploymentLLM(
+    endpoint="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict",
+    model="odsc-llm",
+    streaming=True,
+    model_kwargs={
+        "max_tokens": 256,
+        "temperature": 0.7,
+    },
+)
+
+# Text completion
+response = llm.invoke("Complete this sentence: The future of AI is")
+print(response)
+
+# Streaming
+for chunk in llm.stream("Write a poem about:"):
+    print(chunk, end="", flush=True)
+```
+
+---
+
+## Summary
+
+You learned how to:
+
+- Configure authentication with `oracle-ads`
+- Use `ChatOCIModelDeployment` for any OpenAI-compatible endpoint
+- Configure vLLM deployments with `ChatOCIModelDeploymentVLLM`
+- Configure TGI deployments with `ChatOCIModelDeploymentTGI`
+- Stream responses synchronously and asynchronously
+- Get structured output with JSON mode
+- Extend base classes for custom endpoints
+
+## Next Steps
+
+- [Tutorial 07: Async for Production](../07-async-for-production/) - Scale with async patterns
+- [Tutorial 08: OpenAI Responses API](../08-openai-responses-api/) - Use ChatOCIOpenAI
+
+## API Reference
+
+| Class | Description |
+|-------|-------------|
+| `ChatOCIModelDeployment` | Base class for OCI model deployments |
+| `ChatOCIModelDeploymentVLLM` | vLLM-specific parameters |
+| `ChatOCIModelDeploymentTGI` | TGI-specific parameters |
+| `OCIModelDeploymentLLM` | Text completion interface |
+| `OCIModelDeploymentVLLM` | vLLM text completion |
+| `OCIModelDeploymentTGI` | TGI text completion |
+
+## Troubleshooting
+
+### Authentication Errors
+
+```
+Could not authenticate
+```
+- Run `ads.set_auth("api_key")` with correct profile
+- Verify `~/.oci/config` is properly configured
+- Check IAM policies for model deployment access
+
+### Connection Timeout
+
+```
+ConnectTimeout
+```
+- Verify endpoint URL is correct
+- Check network connectivity to OCI
+- Ensure deployment is in ACTIVE state
+
+### 401 Unauthorized
+
+```
+TokenExpiredError
+```
+- Refresh security token: `oci session authenticate`
+- Check resource principal permissions
+- Verify compartment access policies
+
+### Model Not Found
+
+```
+Model 'xxx' not found
+```
+- Use `"odsc-llm"` for AQUA deployments
+- Check model name matches deployment configuration
diff --git a/libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py b/libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py
new file mode 100644
index 00000000..eb39d0fc
--- /dev/null
+++ b/libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py
@@ -0,0 +1,174 @@
+# Tutorial 06: Custom Endpoint Example
+# Demonstrates extending ChatOCIModelDeployment for custom inference formats
+
+import ads
+from langchain_core.messages import AIMessage
+from langchain_core.outputs import ChatGeneration, ChatResult
+
+from langchain_oci import ChatOCIModelDeployment
+
+# Configure authentication
+ads.set_auth("api_key")
+
+
+class CustomFormatDeployment(ChatOCIModelDeployment):
+    """Custom deployment handler for non-standard response formats.
+
+    This example shows how to extend the base class to handle
+    custom request/response formats from your model deployment.
+    """
+
+    custom_param: str = "default_value"  # Add custom parameters
+
+    def _construct_json_body(self, messages: list, params: dict) -> dict:
+        """Construct custom request payload.
+
+        Override this method if your endpoint expects a different
+        request format than the standard OpenAI chat completions format.
+        """
+        # Convert LangChain messages to custom format
+        formatted_messages = []
+        for msg in messages:
+            if hasattr(msg, "content"):
+                formatted_messages.append(
+                    {
+                        "role": msg.type,  # "human", "ai", "system"
+                        "text": msg.content,
+                    }
+                )
+
+        return {
+            "conversation": formatted_messages,
+            "config": {
+                "max_tokens": params.get("max_tokens", 512),
+                "temperature": params.get("temperature", 0.7),
+                "custom_param": self.custom_param,
+            },
+        }
+
+    def _process_response(self, response_json: dict) -> ChatResult:
+        """Process custom response format.
+
+        Override this method if your endpoint returns a different
+        response format than the standard OpenAI chat completions format.
+        """
+        # Example custom response format:
+        # {
+        #     "output": {
+        #         "generated_text": "...",
+        #         "tokens_used": 100
+        #     },
+        #     "status": "success"
+        # }
+
+        output = response_json.get("output", {})
+        text = output.get("generated_text", "")
+        tokens = output.get("tokens_used", 0)
+
+        return ChatResult(
+            generations=[
+                ChatGeneration(
+                    message=AIMessage(content=text),
+                    generation_info={
+                        "tokens_used": tokens,
+                        "custom_format": True,
+                    },
+                )
+            ],
+            llm_output={
+                "model_name": self.model,
+                "token_usage": {"total_tokens": tokens},
+            },
+        )
+
+
+class BatchInferenceDeployment(ChatOCIModelDeployment):
+    """Custom deployment for batch inference endpoints.
+
+    Some deployments support batch processing of multiple prompts
+    in a single request for efficiency.
+    """
+
+    batch_size: int = 5
+
+    def _construct_json_body(self, messages: list, params: dict) -> dict:
+        """Construct batch request payload."""
+        # Extract user messages for batch processing
+        prompts = []
+        for msg in messages:
+            if hasattr(msg, "content"):
+                prompts.append(msg.content)
+
+        return {
+            "prompts": prompts,
+            "batch_config": {
+                "max_batch_size": self.batch_size,
+                "return_all": True,
+            },
+            **params,
+        }
+
+
+def demo_custom_deployment():
+    """Demonstrate custom deployment usage."""
+    print("Custom Deployment Example")
+    print("=" * 50)
+
+    # Replace with your endpoint
+    endpoint = "https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict"
+
+    chat = CustomFormatDeployment(
+        endpoint=endpoint,
+        model="my-custom-model",
+        custom_param="special_value",
+        model_kwargs={
+            "max_tokens": 256,
+            "temperature": 0.5,
+        },
+    )
+
+    print(f"Custom deployment configured:")
+    print(f"  - Endpoint: {endpoint}")
+    print(f"  - Model: {chat.model}")
+    print(f"  - Custom param: {chat.custom_param}")
+
+    # Uncomment to actually invoke (requires real endpoint):
+    # response = chat.invoke("Hello, custom model!")
+    # print(f"Response: {response.content}")
+
+
+def show_extension_patterns():
+    """Show common extension patterns."""
+    print("\nCommon Extension Patterns")
+    print("=" * 50)
+
+    patterns = """
+    1. Custom Request Format:
+       Override _construct_json_body() to change how messages
+       are formatted in the HTTP request body.
+
+    2. Custom Response Parsing:
+       Override _process_response() to parse non-standard
+       response formats from your model.
+
+    3. Custom Streaming:
+       Override _process_stream_response() for custom
+       streaming response formats.
+
+    4. Custom Headers:
+       Use default_headers parameter or override _headers()
+       for custom HTTP headers.
+
+    5. Custom Parameters:
+       Add Pydantic fields to your subclass for
+       deployment-specific configuration.
+    """
+    print(patterns)
+
+
+if __name__ == "__main__":
+    print("Custom Endpoint Examples")
+    print("Demonstrates extending ChatOCIModelDeployment\n")
+
+    demo_custom_deployment()
+    show_extension_patterns()
diff --git a/libs/oci/tutorials/06-model-deployments/code/tgi_deployment.py b/libs/oci/tutorials/06-model-deployments/code/tgi_deployment.py
new file mode 100644
index 00000000..8c8c5e62
--- /dev/null
+++ b/libs/oci/tutorials/06-model-deployments/code/tgi_deployment.py
@@ -0,0 +1,115 @@
+# Tutorial 06: TGI Deployment Example
+# Demonstrates ChatOCIModelDeploymentTGI for Hugging Face TGI endpoints
+
+import ads
+from langchain_core.messages import HumanMessage, SystemMessage
+
+from langchain_oci import ChatOCIModelDeploymentTGI
+
+# Configure authentication
+ads.set_auth("api_key")
+
+# Replace with your deployment endpoint
+ENDPOINT = "https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict"
+
+
+def basic_tgi_chat():
+    """Basic chat with TGI deployment."""
+    print("Basic TGI Chat")
+    print("=" * 50)
+
+    chat = ChatOCIModelDeploymentTGI(
+        endpoint=ENDPOINT,
+        model="odsc-llm",
+        temperature=0.2,
+        max_tokens=512,
+        top_p=0.9,
+    )
+
+    messages = [
+        SystemMessage(content="You are a helpful assistant."),
+        HumanMessage(content="Explain the difference between REST and GraphQL."),
+    ]
+
+    response = chat.invoke(messages)
+    print(response.content)
+
+
+def reproducible_generation():
+    """Using seed for reproducible outputs."""
+    print("\nReproducible Generation with Seed")
+    print("=" * 50)
+
+    chat = ChatOCIModelDeploymentTGI(
+        endpoint=ENDPOINT,
+        model="odsc-llm",
+        temperature=0.5,
+        max_tokens=100,
+        seed=42,  # Set seed for reproducibility
+    )
+
+    # Generate twice with same seed
+    prompt = "Generate a random 6-digit code:"
+
+    response1 = chat.invoke(prompt)
+    print(f"First generation:  {response1.content.strip()}")
+
+    response2 = chat.invoke(prompt)
+    print(f"Second generation: {response2.content.strip()}")
+
+    print("(With same seed, outputs should be identical)")
+
+
+def logprobs_analysis():
+    """Getting log probabilities for token analysis."""
+    print("\nLog Probabilities Analysis")
+    print("=" * 50)
+
+    chat = ChatOCIModelDeploymentTGI(
+        endpoint=ENDPOINT,
+        model="odsc-llm",
+        temperature=0.0,
+        max_tokens=50,
+        logprobs=True,
+        top_logprobs=3,
+    )
+
+    response = chat.invoke("The capital of France is")
+    print(f"Response: {response.content}")
+
+    # Access log probabilities from response metadata
+    if response.response_metadata.get("logprobs"):
+        print("\nTop token probabilities available in response_metadata")
+
+
+def streaming_tgi():
+    """Streaming with TGI deployment."""
+    print("\nStreaming TGI Response")
+    print("=" * 50)
+
+    chat = ChatOCIModelDeploymentTGI(
+        endpoint=ENDPOINT,
+        model="odsc-llm",
+        streaming=True,
+        temperature=0.7,
+        max_tokens=200,
+    )
+
+    print("Response: ", end="")
+    for chunk in chat.stream("Write a haiku about machine learning."):
+        print(chunk.content, end="", flush=True)
+    print()
+
+
+if __name__ == "__main__":
+    print("TGI Deployment Examples")
+    print("Note: Replace ENDPOINT with your actual deployment URL")
+    print()
+
+    # Uncomment to run (requires actual deployment):
+    # basic_tgi_chat()
+    # reproducible_generation()
+    # logprobs_analysis()
+    # streaming_tgi()
+
+    print("Examples are commented out - configure ENDPOINT and uncomment to run.")
diff --git a/libs/oci/tutorials/06-model-deployments/code/vllm_deployment.py b/libs/oci/tutorials/06-model-deployments/code/vllm_deployment.py
new file mode 100644
index 00000000..0927ba89
--- /dev/null
+++ b/libs/oci/tutorials/06-model-deployments/code/vllm_deployment.py
@@ -0,0 +1,114 @@
+# Tutorial 06: vLLM Deployment Example
+# Demonstrates ChatOCIModelDeploymentVLLM for high-throughput inference
+
+import ads
+from langchain_core.messages import HumanMessage, SystemMessage
+
+from langchain_oci import ChatOCIModelDeploymentVLLM
+
+# Configure authentication (uses ~/.oci/config by default)
+ads.set_auth("api_key")
+
+# Replace with your deployment endpoint
+ENDPOINT = "https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict"
+
+
+def basic_chat():
+    """Basic chat with vLLM deployment."""
+    print("Basic vLLM Chat")
+    print("=" * 50)
+
+    chat = ChatOCIModelDeploymentVLLM(
+        endpoint=ENDPOINT,
+        model="odsc-llm",
+        temperature=0.2,
+        max_tokens=512,
+        top_p=0.95,
+    )
+
+    messages = [
+        SystemMessage(content="You are a helpful coding assistant."),
+        HumanMessage(content="Write a Python function to check if a number is prime."),
+    ]
+
+    response = chat.invoke(messages)
+    print(response.content)
+
+
+def streaming_chat():
+    """Streaming response from vLLM deployment."""
+    print("\nStreaming vLLM Chat")
+    print("=" * 50)
+
+    chat = ChatOCIModelDeploymentVLLM(
+        endpoint=ENDPOINT,
+        model="odsc-llm",
+        streaming=True,
+        temperature=0.7,
+        max_tokens=256,
+    )
+
+    print("Response: ", end="")
+    for chunk in chat.stream("Tell me a short story about a robot learning to paint."):
+        print(chunk.content, end="", flush=True)
+    print()
+
+
+def advanced_parameters():
+    """Using advanced vLLM sampling parameters."""
+    print("\nAdvanced vLLM Parameters")
+    print("=" * 50)
+
+    chat = ChatOCIModelDeploymentVLLM(
+        endpoint=ENDPOINT,
+        model="odsc-llm",
+        # Sampling parameters
+        temperature=0.8,
+        top_p=0.9,
+        top_k=40,
+        # Penalties
+        frequency_penalty=0.2,
+        presence_penalty=0.1,
+        repetition_penalty=1.1,
+        # Token control
+        max_tokens=200,
+        min_tokens=50,
+        # Output control
+        skip_special_tokens=True,
+        spaces_between_special_tokens=True,
+    )
+
+    response = chat.invoke("Generate a creative product name for a smart water bottle.")
+    print(f"Generated name: {response.content}")
+
+
+def beam_search_generation():
+    """Using beam search for more deterministic output."""
+    print("\nBeam Search Generation")
+    print("=" * 50)
+
+    chat = ChatOCIModelDeploymentVLLM(
+        endpoint=ENDPOINT,
+        model="odsc-llm",
+        use_beam_search=True,
+        best_of=3,
+        temperature=0.0,  # Usually 0 for beam search
+        max_tokens=100,
+    )
+
+    response = chat.invoke("Translate to French: The quick brown fox jumps over the lazy dog.")
+    print(f"Translation: {response.content}")
+
+
+if __name__ == "__main__":
+    print("vLLM Deployment Examples")
+    print("Note: Replace ENDPOINT with your actual deployment URL")
+    print()
+
+    # Uncomment to run (requires actual deployment):
+    # basic_chat()
+    # streaming_chat()
+    # advanced_parameters()
+    # beam_search_generation()
+
+    print("Examples are commented out - configure ENDPOINT and uncomment to run.")
diff --git a/libs/oci/tutorials/08-openai-responses-api/README.md b/libs/oci/tutorials/08-openai-responses-api/README.md
new file mode 100644
index 00000000..47584e8c
--- /dev/null
+++ b/libs/oci/tutorials/08-openai-responses-api/README.md
@@ -0,0 +1,457 @@
+# Tutorial 08: OpenAI Responses API
+
+Use OpenAI-compatible models through OCI with conversation persistence and advanced features.
+
+## What You'll Learn
+
+- Configure `ChatOCIOpenAI` for OpenAI Responses API
+- Use conversation stores for persistent memory
+- Authenticate with various OCI methods
+- Access web search and MCP tools
+- Migrate from OpenAI to OCI
+
+## Prerequisites
+
+- Completed [Tutorial 01: Getting Started](../01-getting-started/)
+- Access to OCI Generative AI with OpenAI-compatible models
+- Additional packages installed
+
+```bash
+pip install oci-openai langchain-openai langchain-oci
+```
+
+## Concepts Covered
+
+| Class/Feature | Description |
+|---------------|-------------|
+| `ChatOCIOpenAI` | OCI client for OpenAI Responses API |
+| Conversation Store | Persistent conversation memory |
+| Web Search | Built-in web search tool |
+| Hosted MCP | Model Context Protocol integration |
+
+---
+
+## Part 1: What is ChatOCIOpenAI?
+
+`ChatOCIOpenAI` provides access to OpenAI-compatible models through OCI's Generative AI service. It extends LangChain's `ChatOpenAI` with:
+
+- OCI authentication (API keys, session tokens, principals)
+- Conversation stores for persistent memory
+- Regional endpoints across OCI
+- Access to web search and hosted MCP tools
+
+### Architecture
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                   Your Application                       │
+│                                                          │
+│   ┌──────────────────────────────────────────────────┐  │
+│   │              ChatOCIOpenAI                        │  │
+│   │  ┌──────────┐  ┌──────────┐  ┌──────────────┐   │  │
+│   │  │ OCI Auth │  │ Conv.    │  │ OpenAI       │   │  │
+│   │  │ Handler  │  │ Store    │  │ Responses API│   │  │
+│   │  └──────────┘  └──────────┘  └──────────────┘   │  │
+│   └──────────────────────────────────────────────────┘  │
+│                          │                               │
+└──────────────────────────┼───────────────────────────────┘
+                           │
+                           ▼
+              ┌────────────────────────┐
+              │  OCI Generative AI     │
+              │  OpenAI-Compatible     │
+              │  Endpoint              │
+              └────────────────────────┘
+```
+
+---
+
+## Part 2: Authentication Setup
+
+`ChatOCIOpenAI` uses the `oci-openai` package for authentication.
+
+### Session Token Authentication
+
+```python
+from oci_openai import OciSessionAuth
+from langchain_oci import ChatOCIOpenAI
+
+# Authenticate with OCI CLI session
+# First run: oci session authenticate --profile-name MY_PROFILE
+auth = OciSessionAuth(profile_name="MY_PROFILE")
+
+client = ChatOCIOpenAI(
+    auth=auth,
+    compartment_id="ocid1.compartment.oc1..xxx",
+    region="us-chicago-1",
+    model="openai.gpt-4.1",
+)
+```
+
+### Resource Principal Authentication
+
+For OCI Functions, Jobs, and other OCI resources:
+
+```python
+from oci_openai import OciResourcePrincipalAuth
+from langchain_oci import ChatOCIOpenAI
+
+auth = OciResourcePrincipalAuth()
+
+client = ChatOCIOpenAI(
+    auth=auth,
+    compartment_id="ocid1.compartment.oc1..xxx",
+    region="us-chicago-1",
+    model="openai.gpt-4.1",
+)
+```
+
+### Instance Principal Authentication
+
+For OCI Compute instances:
+
+```python
+from oci_openai import OciInstancePrincipalAuth
+from langchain_oci import ChatOCIOpenAI
+
+auth = OciInstancePrincipalAuth()
+
+client = ChatOCIOpenAI(
+    auth=auth,
+    compartment_id="ocid1.compartment.oc1..xxx",
+    region="us-chicago-1",
+    model="openai.gpt-4.1",
+)
+```
+
+---
+
+## Part 3: Basic Usage
+
+### Simple Invocation
+
+```python
+from oci_openai import OciSessionAuth
+from langchain_oci import ChatOCIOpenAI
+
+client = ChatOCIOpenAI(
+    auth=OciSessionAuth(profile_name="DEFAULT"),
+    compartment_id="ocid1.compartment.oc1..xxx",
+    region="us-chicago-1",
+    model="openai.gpt-4.1",
+)
+
+# Simple message
+response = client.invoke("What is the capital of France?")
+print(response.content)
+```
+
+### With System Message
+
+```python
+messages = [
+    ("system", "You are a helpful translator. Translate user messages to French."),
+    ("human", "Hello, how are you?"),
+]
+
+response = client.invoke(messages)
+print(response.content)  # "Bonjour, comment allez-vous?"
+```
+
+### Prompt Chaining
+
+```python
+from langchain_core.prompts import ChatPromptTemplate
+
+prompt = ChatPromptTemplate.from_messages([
+    ("system", "You are a helpful assistant that translates {input_language} to {output_language}."),
+    ("human", "{input}"),
+])
+
+chain = prompt | client
+
+response = chain.invoke({
+    "input_language": "English",
+    "output_language": "German",
+    "input": "I love programming.",
+})
+print(response.content)  # "Ich liebe Programmieren."
+```
+
+---
+
+## Part 4: Conversation Stores
+
+Persist conversations across sessions using OCI Conversation Stores.
+
+### Creating a Conversation Store
+
+First, create a conversation store in OCI Console or via CLI:
+
+```bash
+oci generative-ai conversation-store create \
+    --compartment-id ocid1.compartment.oc1..xxx \
+    --display-name "My Conversation Store" \
+    --region us-chicago-1
+```
+
+### Using Conversation Store
+
+```python
+from oci_openai import OciSessionAuth
+from langchain_oci import ChatOCIOpenAI
+
+# With conversation store for persistent memory
+client = ChatOCIOpenAI(
+    auth=OciSessionAuth(profile_name="DEFAULT"),
+    compartment_id="ocid1.compartment.oc1..xxx",
+    region="us-chicago-1",
+    model="openai.gpt-4.1",
+    conversation_store_id="ocid1.conversationstore.oc1..xxx",
+)
+
+# First conversation
+response1 = client.invoke("My name is Alice.")
+print(response1.content)
+
+# Later conversation - model remembers
+response2 = client.invoke("What is my name?")
+print(response2.content)  # "Your name is Alice."
+```
+
+---
+
+## Part 5: Function Calling
+
+Bind tools to the model for function calling.
+
+```python
+from pydantic import BaseModel, Field
+from langchain_oci import ChatOCIOpenAI
+
+class GetWeather(BaseModel):
+    """Get weather for a location."""
+    location: str = Field(..., description="The city and state, e.g. San Francisco, CA")
+
+client = ChatOCIOpenAI(
+    auth=auth,
+    compartment_id="ocid1.compartment.oc1..xxx",
+    region="us-chicago-1",
+    model="openai.gpt-4.1",
+)
+
+# Bind tools
+llm_with_tools = client.bind_tools([GetWeather])
+
+# Model will call the function
+response = llm_with_tools.invoke("What is the weather like in San Francisco?")
+
+# Access tool calls
+for tool_call in response.tool_calls:
+    print(f"Function: {tool_call['name']}")
+    print(f"Arguments: {tool_call['args']}")
+```
+
+---
+
+## Part 6: Web Search
+
+Use the built-in web search capability.
+
+```python
+from langchain_oci import ChatOCIOpenAI
+
+client = ChatOCIOpenAI(
+    auth=auth,
+    compartment_id="ocid1.compartment.oc1..xxx",
+    region="us-chicago-1",
+    model="openai.gpt-4.1",
+)
+
+# Enable web search
+web_search_tool = {"type": "web_search_preview"}
+llm_with_search = client.bind_tools([web_search_tool])
+
+# Ask about current events
+response = llm_with_search.invoke("What was a positive news story from today?")
+print(response.content)
+```
+
+---
+
+## Part 7: Hosted MCP (Model Context Protocol)
+
+Access external knowledge sources via hosted MCP servers.
+
+```python
+from langchain_oci import ChatOCIOpenAI
+
+client = ChatOCIOpenAI(
+    auth=auth,
+    compartment_id="ocid1.compartment.oc1..xxx",
+    region="us-chicago-1",
+    model="openai.gpt-4.1",
+)
+
+# Bind MCP tool
+mcp_tool = {
+    "type": "mcp",
+    "server_label": "deepwiki",
+    "server_url": "https://mcp.deepwiki.com/mcp",
+    "require_approval": "never",
+}
+
+llm_with_mcp = client.bind_tools([mcp_tool])
+
+# Query external knowledge
+response = llm_with_mcp.invoke(
+    "What transport protocols does the 2025-03-26 version of the MCP "
+    "spec (modelcontextprotocol/modelcontextprotocol) support?"
+)
+print(response.content)
+```
+
+---
+
+## Part 8: Endpoint Configuration
+
+Configure custom endpoints for different regions or setups.
+
+### Using Region
+
+```python
+client = ChatOCIOpenAI(
+    auth=auth,
+    compartment_id="ocid1.compartment.oc1..xxx",
+    region="us-chicago-1",  # Automatically constructs endpoint
+    model="openai.gpt-4.1",
+)
+```
+
+### Using Service Endpoint
+
+```python
+client = ChatOCIOpenAI(
+    auth=auth,
+    compartment_id="ocid1.compartment.oc1..xxx",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    model="openai.gpt-4.1",
+)
+```
+
+### Using Base URL
+
+```python
+client = ChatOCIOpenAI(
+    auth=auth,
+    compartment_id="ocid1.compartment.oc1..xxx",
+    base_url="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/v1",
+    model="openai.gpt-4.1",
+)
+```
+
+---
+
+## Part 9: Migration from OpenAI
+
+Migrating from OpenAI to OCI is straightforward.
+
+### Before (OpenAI)
+
+```python
+from langchain_openai import ChatOpenAI
+
+client = ChatOpenAI(
+    api_key="sk-...",
+    model="gpt-4",
+)
+
+response = client.invoke("Hello!")
+```
+
+### After (OCI)
+
+```python
+from oci_openai import OciSessionAuth
+from langchain_oci import ChatOCIOpenAI
+
+client = ChatOCIOpenAI(
+    auth=OciSessionAuth(profile_name="DEFAULT"),
+    compartment_id="ocid1.compartment.oc1..xxx",
+    region="us-chicago-1",
+    model="openai.gpt-4.1",  # OCI model name
+)
+
+response = client.invoke("Hello!")
+```
+
+### Key Differences
+
+| Aspect | OpenAI | OCI |
+|--------|--------|-----|
+| Authentication | API key | OCI auth (session, principal) |
+| Model names | `gpt-4` | `openai.gpt-4.1` |
+| Endpoint | OpenAI servers | OCI regional endpoints |
+| Conversation store | N/A | Built-in support |
+
+---
+
+## Summary
+
+You learned how to:
+
+- Configure `ChatOCIOpenAI` with various authentication methods
+- Use conversation stores for persistent memory
+- Perform function calling with custom tools
+- Access web search capabilities
+- Integrate hosted MCP servers
+- Migrate from OpenAI to OCI
+
+## Next Steps
+
+- [Tutorial 09: Provider Deep Dive](../09-provider-deep-dive/) - Explore provider-specific features
+- [Tutorial 10: Embeddings](../10-embeddings/) - Text and image embeddings
+
+## API Reference
+
+| Class/Function | Description |
+|----------------|-------------|
+| `ChatOCIOpenAI` | OpenAI Responses API client for OCI |
+| `OciSessionAuth` | Session token authentication |
+| `OciResourcePrincipalAuth` | Resource principal auth |
+| `OciInstancePrincipalAuth` | Instance principal auth |
+
+## Troubleshooting
+
+### Import Error
+
+```
+ImportError: oci-openai not found
+```
+- Install: `pip install oci-openai`
+
+### Authentication Failed
+
+```
+401 Unauthorized
+```
+- Refresh session: `oci session authenticate --profile-name MY_PROFILE`
+- Check profile name matches config
+
+### Conversation Store Not Found
+
+```
+NotAuthorizedOrNotFound: conversation_store_id
+```
+- Verify conversation store OCID is correct
+- Ensure compartment has access to the store
+- Check IAM policies for conversation store operations
+
+### Model Not Available
+
+```
+Model 'openai.gpt-4.1' not found
+```
+- Check model is available in your region
+- Verify compartment has GenAI access
+- Use `oci generative-ai model list` to see available models
diff --git a/libs/oci/tutorials/08-openai-responses-api/code/conversation_store.py b/libs/oci/tutorials/08-openai-responses-api/code/conversation_store.py
new file mode 100644
index 00000000..df7bcd1b
--- /dev/null
+++ b/libs/oci/tutorials/08-openai-responses-api/code/conversation_store.py
@@ -0,0 +1,127 @@
+# Tutorial 08: Conversation Store Example
+# Demonstrates persistent conversation memory with ChatOCIOpenAI
+
+# Note: Requires oci-openai package
+# pip install oci-openai langchain-openai langchain-oci
+
+# Configuration - replace with your values
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+REGION = "us-chicago-1"
+MODEL = "openai.gpt-4.1"
+CONVERSATION_STORE_ID = "ocid1.conversationstore.oc1..your-store-id"
+
+
+def setup_client_with_store():
+    """Set up ChatOCIOpenAI with conversation store."""
+    from oci_openai import OciSessionAuth
+
+    from langchain_oci import ChatOCIOpenAI
+
+    auth = OciSessionAuth(profile_name="DEFAULT")
+
+    client = ChatOCIOpenAI(
+        auth=auth,
+        compartment_id=COMPARTMENT_ID,
+        region=REGION,
+        model=MODEL,
+        conversation_store_id=CONVERSATION_STORE_ID,
+    )
+
+    return client
+
+
+def persistent_memory_demo():
+    """Demonstrate persistent conversation memory."""
+    print("Persistent Memory Demo")
+    print("=" * 50)
+
+    client = setup_client_with_store()
+
+    # First message - introduce yourself
+    print("User: My name is Alice and I work as a data scientist.")
+    response1 = client.invoke("My name is Alice and I work as a data scientist.")
+    print(f"Assistant: {response1.content}")
+
+    # Second message - model should remember
+    print("\nUser: What is my name and profession?")
+    response2 = client.invoke("What is my name and profession?")
+    print(f"Assistant: {response2.content}")
+
+    # Third message - continue context
+    print("\nUser: What programming languages should I learn for my job?")
+    response3 = client.invoke("What programming languages should I learn for my job?")
+    print(f"Assistant: {response3.content}")
+
+
+def multi_session_demo():
+    """Demonstrate memory persistence across sessions."""
+    print("\nMulti-Session Demo")
+    print("=" * 50)
+
+    # Session 1
+    print("--- Session 1 ---")
+    client1 = setup_client_with_store()
+
+    print("User: Remember this number: 42")
+    response1 = client1.invoke("Remember this number: 42")
+    print(f"Assistant: {response1.content}")
+
+    # Simulate new session (create new client)
+    print("\n--- Session 2 (new client instance) ---")
+    client2 = setup_client_with_store()
+
+    print("User: What number did I ask you to remember?")
+    response2 = client2.invoke("What number did I ask you to remember?")
+    print(f"Assistant: {response2.content}")
+
+
+def conversation_store_info():
+    """Information about conversation stores."""
+    print("\nConversation Store Information")
+    print("=" * 50)
+
+    info = """
+    Conversation Stores in OCI:
+
+    1. Creating a Store:
+       oci generative-ai conversation-store create \\
+           --compartment-id <compartment-ocid> \\
+           --display-name "My Store" \\
+           --region us-chicago-1
+
+    2. Listing Stores:
+       oci generative-ai conversation-store list \\
+           --compartment-id <compartment-ocid> \\
+           --region us-chicago-1
+
+    3. Getting Store Details:
+       oci generative-ai conversation-store get \\
+           --conversation-store-id <store-ocid> \\
+           --region us-chicago-1
+
+    4. Deleting a Store:
+       oci generative-ai conversation-store delete \\
+           --conversation-store-id <store-ocid> \\
+           --region us-chicago-1
+
+    Benefits:
+    - Persistent memory across sessions
+    - Managed by OCI (no external database needed)
+    - Secure and compliant with OCI policies
+    - Automatic conversation management
+    """
+    print(info)
+
+
+if __name__ == "__main__":
+    print("Conversation Store Examples")
+    print("Note: Requires oci-openai package and a conversation store\n")
+
+    # Show information
+    conversation_store_info()
+
+    # Uncomment to run (requires actual OCI setup):
+    # persistent_memory_demo()
+    # multi_session_demo()
+
+    print("\nExamples are commented out - configure credentials and uncomment to run.")
diff --git a/libs/oci/tutorials/08-openai-responses-api/code/openai_compat.py b/libs/oci/tutorials/08-openai-responses-api/code/openai_compat.py
new file mode 100644
index 00000000..06ca29c2
--- /dev/null
+++ b/libs/oci/tutorials/08-openai-responses-api/code/openai_compat.py
@@ -0,0 +1,122 @@
+# Tutorial 08: OpenAI Compatibility Example
+# Demonstrates ChatOCIOpenAI for OpenAI Responses API
+
+from langchain_core.prompts import ChatPromptTemplate
+
+# Note: Requires oci-openai package
+# pip install oci-openai langchain-openai langchain-oci
+
+# Configuration - replace with your values
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+REGION = "us-chicago-1"
+MODEL = "openai.gpt-4.1"
+
+
+def setup_client():
+    """Set up ChatOCIOpenAI client with session auth."""
+    from oci_openai import OciSessionAuth
+
+    from langchain_oci import ChatOCIOpenAI
+
+    # Session auth requires: oci session authenticate --profile-name DEFAULT
+    auth = OciSessionAuth(profile_name="DEFAULT")
+
+    client = ChatOCIOpenAI(
+        auth=auth,
+        compartment_id=COMPARTMENT_ID,
+        region=REGION,
+        model=MODEL,
+    )
+
+    return client
+
+
+def basic_usage():
+    """Basic invocation example."""
+    print("Basic Usage")
+    print("=" * 50)
+
+    client = setup_client()
+
+    # Simple string message
+    response = client.invoke("What is the capital of Japan?")
+    print(f"Response: {response.content}")
+
+
+def message_formats():
+    """Different message format examples."""
+    print("\nMessage Formats")
+    print("=" * 50)
+
+    client = setup_client()
+
+    # Tuple format (role, content)
+    messages = [
+        ("system", "You are a helpful coding assistant."),
+        ("human", "Write a Python function to reverse a string."),
+    ]
+
+    response = client.invoke(messages)
+    print(f"Response:\n{response.content}")
+
+
+def prompt_chaining():
+    """Using prompt templates with ChatOCIOpenAI."""
+    print("\nPrompt Chaining")
+    print("=" * 50)
+
+    client = setup_client()
+
+    # Create a prompt template
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            (
+                "system",
+                "You are a helpful assistant that translates "
+                "{input_language} to {output_language}. "
+                "Only output the translation, nothing else.",
+            ),
+            ("human", "{input}"),
+        ]
+    )
+
+    # Create chain
+    chain = prompt | client
+
+    # Invoke with variables
+    response = chain.invoke(
+        {
+            "input_language": "English",
+            "output_language": "Spanish",
+            "input": "Hello, how are you today?",
+        }
+    )
+
+    print(f"Translation: {response.content}")
+
+
+def streaming_response():
+    """Streaming example."""
+    print("\nStreaming Response")
+    print("=" * 50)
+
+    client = setup_client()
+
+    print("Response: ", end="")
+    for chunk in client.stream("Tell me a short joke about programming."):
+        print(chunk.content, end="", flush=True)
+    print()
+
+
+if __name__ == "__main__":
+    print("ChatOCIOpenAI Examples")
+    print("Note: Requires oci-openai package and valid OCI session\n")
+
+    # Uncomment to run (requires actual OCI setup):
+    # basic_usage()
+    # message_formats()
+    # prompt_chaining()
+    # streaming_response()
+
+    print("Examples are commented out - configure credentials and uncomment to run.")
+    print("First authenticate: oci session authenticate --profile-name DEFAULT")
diff --git a/libs/oci/tutorials/08-openai-responses-api/code/tools_and_search.py b/libs/oci/tutorials/08-openai-responses-api/code/tools_and_search.py
new file mode 100644
index 00000000..5537b92d
--- /dev/null
+++ b/libs/oci/tutorials/08-openai-responses-api/code/tools_and_search.py
@@ -0,0 +1,146 @@
+# Tutorial 08: Tools and Web Search Example
+# Demonstrates function calling, web search, and MCP with ChatOCIOpenAI
+
+from pydantic import BaseModel, Field
+
+# Note: Requires oci-openai package
+# pip install oci-openai langchain-openai langchain-oci
+
+# Configuration - replace with your values
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+REGION = "us-chicago-1"
+MODEL = "openai.gpt-4.1"
+
+
+def setup_client():
+    """Set up ChatOCIOpenAI client."""
+    from oci_openai import OciSessionAuth
+
+    from langchain_oci import ChatOCIOpenAI
+
+    auth = OciSessionAuth(profile_name="DEFAULT")
+
+    client = ChatOCIOpenAI(
+        auth=auth,
+        compartment_id=COMPARTMENT_ID,
+        region=REGION,
+        model=MODEL,
+    )
+
+    return client
+
+
+# Define tools as Pydantic models
+class GetWeather(BaseModel):
+    """Get the current weather for a location."""
+
+    location: str = Field(..., description="The city and state, e.g. San Francisco, CA")
+    unit: str = Field(default="fahrenheit", description="Temperature unit: celsius or fahrenheit")
+
+
+class SearchDatabase(BaseModel):
+    """Search a database for information."""
+
+    query: str = Field(..., description="The search query")
+    table: str = Field(..., description="The database table to search")
+    limit: int = Field(default=10, description="Maximum number of results")
+
+
+def function_calling_demo():
+    """Demonstrate function calling with custom tools."""
+    print("Function Calling Demo")
+    print("=" * 50)
+
+    client = setup_client()
+
+    # Bind tools to the model
+    llm_with_tools = client.bind_tools([GetWeather, SearchDatabase])
+
+    # Ask a question that requires tool use
+    response = llm_with_tools.invoke("What is the weather like in San Francisco?")
+
+    print(f"Response content: {response.content}")
+    print(f"\nTool calls:")
+    for tool_call in response.tool_calls:
+        print(f"  - Function: {tool_call['name']}")
+        print(f"    Arguments: {tool_call['args']}")
+
+
+def web_search_demo():
+    """Demonstrate web search capability."""
+    print("\nWeb Search Demo")
+    print("=" * 50)
+
+    client = setup_client()
+
+    # Enable web search
+    web_search_tool = {"type": "web_search_preview"}
+    llm_with_search = client.bind_tools([web_search_tool])
+
+    # Ask about current events
+    response = llm_with_search.invoke("What are the latest developments in AI today?")
+
+    print(f"Response: {response.content}")
+
+
+def hosted_mcp_demo():
+    """Demonstrate hosted MCP integration."""
+    print("\nHosted MCP Demo")
+    print("=" * 50)
+
+    client = setup_client()
+
+    # Configure MCP tool
+    mcp_tool = {
+        "type": "mcp",
+        "server_label": "deepwiki",
+        "server_url": "https://mcp.deepwiki.com/mcp",
+        "require_approval": "never",
+    }
+
+    llm_with_mcp = client.bind_tools([mcp_tool])
+
+    # Query using MCP
+    response = llm_with_mcp.invoke(
+        "What is the Model Context Protocol and what are its main features?"
+    )
+
+    print(f"Response: {response.content}")
+
+
+def combined_tools_demo():
+    """Demonstrate combining multiple tools."""
+    print("\nCombined Tools Demo")
+    print("=" * 50)
+
+    client = setup_client()
+
+    # Combine custom tools with web search
+    tools = [
+        GetWeather,
+        {"type": "web_search_preview"},
+    ]
+
+    llm_with_tools = client.bind_tools(tools)
+
+    # The model can choose which tool to use
+    print("Query 1: Weather question")
+    response1 = llm_with_tools.invoke("What's the weather in Tokyo?")
+    print(f"Tool calls: {[tc['name'] for tc in response1.tool_calls]}")
+
+    print("\nQuery 2: Current events question")
+    response2 = llm_with_tools.invoke("What happened in tech news today?")
+    print(f"Response: {response2.content[:200]}...")
+
+
+if __name__ == "__main__":
+    print("Tools and Web Search Examples")
+    print("Note: Requires oci-openai package and valid OCI session\n")
+
+    # Uncomment to run (requires actual OCI setup):
+    # function_calling_demo()
+    # web_search_demo()
+    # hosted_mcp_demo()
+    # combined_tools_demo()
+
+    print("Examples are commented out - configure credentials and uncomment to run.")
diff --git a/libs/oci/tutorials/09-provider-deep-dive/README.md b/libs/oci/tutorials/09-provider-deep-dive/README.md
new file mode 100644
index 00000000..a0837c99
--- /dev/null
+++ b/libs/oci/tutorials/09-provider-deep-dive/README.md
@@ -0,0 +1,617 @@
+# Tutorial 09: Provider Deep Dive
+
+Understand the provider architecture and master provider-specific features.
+
+## What You'll Learn
+
+- Understand the provider abstraction pattern
+- Master Meta Llama features (vision, parallel tools)
+- Use Google Gemini multimodal capabilities
+- Work with Cohere Command models (RAG, V2 API)
+- Leverage xAI Grok reasoning features
+- Handle provider-specific quirks
+
+## Prerequisites
+
+- Completed [Tutorial 01: Getting Started](../01-getting-started/)
+- Completed [Tutorial 02: Vision & Multimodal](../02-vision-and-multimodal/)
+- Completed [Tutorial 04: Tool Calling Mastery](../04-tool-calling-mastery/)
+
+## Concepts Covered
+
+| Provider | Key Features |
+|----------|--------------|
+| `GenericProvider` | Base for Meta, xAI, OpenAI, Mistral |
+| `MetaProvider` | Llama 3.2/3.3/4, vision, parallel tools |
+| `GeminiProvider` | Multimodal (PDF, video, audio) |
+| `CohereProvider` | RAG, citations, V2 vision API |
+
+---
+
+## Part 1: Provider Architecture
+
+The provider system abstracts model-specific behaviors behind a common interface.
+
+### Architecture Overview
+
+```
+┌─────────────────────────────────────────────────────────┐
+│                   ChatOCIGenAI                          │
+│                                                          │
+│   ┌──────────────────────────────────────────────────┐  │
+│   │              Provider Selection                   │  │
+│   │                                                    │  │
+│   │  model_id="meta.llama-3.3-70b-instruct"          │  │
+│   │       ↓                                           │  │
+│   │  provider="meta" → MetaProvider()                │  │
+│   │                                                    │  │
+│   │  model_id="cohere.command-r-plus"                │  │
+│   │       ↓                                           │  │
+│   │  provider="cohere" → CohereProvider()            │  │
+│   └──────────────────────────────────────────────────┘  │
+│                                                          │
+│   Each provider handles:                                 │
+│   • Message format conversion                            │
+│   • Tool calling format                                  │
+│   • Response parsing                                     │
+│   • Streaming events                                     │
+│   • Provider-specific features                           │
+└─────────────────────────────────────────────────────────┘
+```
+
+### Provider Hierarchy
+
+```
+Provider (base)
+├── GenericProvider (Meta, xAI, OpenAI, Mistral)
+│   ├── MetaProvider (Llama-specific)
+│   └── GeminiProvider (Gemini-specific)
+└── CohereProvider (Cohere-specific)
+```
+
+### Auto-Detection
+
+Providers are auto-detected from model IDs:
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+# Auto-detects MetaProvider
+llm = ChatOCIGenAI(model_id="meta.llama-3.3-70b-instruct", ...)
+
+# Auto-detects CohereProvider
+llm = ChatOCIGenAI(model_id="cohere.command-r-plus", ...)
+
+# Auto-detects GeminiProvider
+llm = ChatOCIGenAI(model_id="google.gemini-2.0-flash", ...)
+```
+
+### Manual Override
+
+For DAC/imported models, specify the provider:
+
+```python
+llm = ChatOCIGenAI(
+    model_id="ocid1.generativeaiendpoint.oc1...",  # Endpoint OCID
+    provider="meta",  # "meta", "cohere", "google", "generic"
+    ...
+)
+```
+
+---
+
+## Part 2: Meta Llama Models
+
+Meta provides Llama models with vision and advanced tool calling.
+
+### Available Models
+
+| Model | Features |
+|-------|----------|
+| `meta.llama-3.2-11b-vision-instruct` | Vision |
+| `meta.llama-3.2-90b-vision-instruct` | Vision |
+| `meta.llama-3.3-70b-instruct` | Text, tools |
+| `meta.llama-4-scout-17b-16e-instruct` | Parallel tools |
+| `meta.llama-4-maverick-17b-128e-instruct-fp8` | Parallel tools |
+
+### Basic Usage
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.3-70b-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+response = llm.invoke("Explain quantum computing.")
+print(response.content)
+```
+
+### Vision with Llama 3.2
+
+```python
+from langchain_core.messages import HumanMessage
+from langchain_oci import ChatOCIGenAI, load_image
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-3.2-90b-vision-instruct",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+message = HumanMessage(
+    content=[
+        {"type": "text", "text": "Describe this image in detail."},
+        load_image("./photo.jpg"),
+    ]
+)
+
+response = llm.invoke([message])
+print(response.content)
+```
+
+### Parallel Tool Calls (Llama 4+)
+
+Llama 4 models support calling multiple tools simultaneously:
+
+```python
+from langchain_core.tools import tool
+from langchain_oci import ChatOCIGenAI
+
+@tool
+def get_weather(city: str) -> str:
+    """Get weather for a city."""
+    return f"Weather in {city}: 72F"
+
+@tool
+def get_time(city: str) -> str:
+    """Get current time in a city."""
+    return f"Time in {city}: 3:00 PM"
+
+llm = ChatOCIGenAI(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    ...
+)
+
+# Enable parallel tool calls
+llm_with_tools = llm.bind_tools(
+    [get_weather, get_time],
+    parallel_tool_calls=True,
+)
+
+# Both tools called in single response
+response = llm_with_tools.invoke(
+    "What's the weather and time in New York and London?"
+)
+
+for tc in response.tool_calls:
+    print(f"Tool: {tc['name']}, Args: {tc['args']}")
+```
+
+### Tool Result Guidance
+
+Help Meta models use tool results naturally:
+
+```python
+llm_with_tools = llm.bind_tools(
+    [get_weather],
+    tool_result_guidance=True,  # Helps model synthesize results
+    max_sequential_tool_calls=5,  # Prevents infinite loops
+)
+```
+
+---
+
+## Part 3: Google Gemini Models
+
+Gemini offers advanced multimodal capabilities.
+
+### Available Models
+
+| Model | Features |
+|-------|----------|
+| `google.gemini-2.0-flash` | Fast, multimodal |
+| `google.gemini-2.5-flash` | Latest, multimodal |
+| `google.gemini-2.5-pro` | Most capable |
+
+### Basic Usage
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="google.gemini-2.0-flash",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+response = llm.invoke("What are the key features of Python 3.12?")
+print(response.content)
+```
+
+### PDF Processing
+
+```python
+import base64
+from langchain_core.messages import HumanMessage
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(model_id="google.gemini-2.0-flash", ...)
+
+# Load PDF
+with open("document.pdf", "rb") as f:
+    pdf_data = base64.b64encode(f.read()).decode()
+
+message = HumanMessage(
+    content=[
+        {"type": "text", "text": "Summarize the key points from this document."},
+        {"type": "media", "data": pdf_data, "mime_type": "application/pdf"},
+    ]
+)
+
+response = llm.invoke([message])
+print(response.content)
+```
+
+### Video Analysis
+
+```python
+import base64
+from langchain_core.messages import HumanMessage
+
+# Load video
+with open("video.mp4", "rb") as f:
+    video_data = base64.b64encode(f.read()).decode()
+
+message = HumanMessage(
+    content=[
+        {"type": "text", "text": "Describe what happens in this video."},
+        {"type": "media", "data": video_data, "mime_type": "video/mp4"},
+    ]
+)
+
+response = llm.invoke([message])
+```
+
+### Audio Analysis
+
+```python
+import base64
+from langchain_core.messages import HumanMessage
+
+# Load audio
+with open("audio.mp3", "rb") as f:
+    audio_data = base64.b64encode(f.read()).decode()
+
+message = HumanMessage(
+    content=[
+        {"type": "text", "text": "Transcribe and summarize this audio."},
+        {"type": "media", "data": audio_data, "mime_type": "audio/mp3"},
+    ]
+)
+
+response = llm.invoke([message])
+```
+
+### Gemini-Specific Parameters
+
+```python
+llm = ChatOCIGenAI(
+    model_id="google.gemini-2.0-flash",
+    model_kwargs={
+        "max_tokens": 1024,  # Note: max_tokens, not max_output_tokens
+        "temperature": 0.7,
+    },
+    ...
+)
+```
+
+**Note:** The OCI API uses `max_tokens` for Gemini, not `max_output_tokens`. The provider automatically maps `max_output_tokens` to `max_tokens` with a warning.
+
+---
+
+## Part 4: Cohere Command Models
+
+Cohere excels at RAG and provides citations.
+
+### Available Models
+
+| Model | Features |
+|-------|----------|
+| `cohere.command-r-plus` | Powerful reasoning |
+| `cohere.command-a-03-2025` | Latest |
+| `cohere.command-a-vision` | Vision (V2 API, DAC only) |
+
+### Basic Usage
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="cohere.command-r-plus",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+response = llm.invoke("Explain the theory of relativity.")
+print(response.content)
+```
+
+### RAG with Citations
+
+Cohere returns citations when documents are provided:
+
+```python
+from langchain_core.messages import HumanMessage, SystemMessage
+
+llm = ChatOCIGenAI(model_id="cohere.command-r-plus", ...)
+
+# RAG-style prompt with context
+messages = [
+    SystemMessage(content="""Use the following documents to answer:
+
+Document 1: Python was created by Guido van Rossum in 1991.
+Document 2: Python 3.0 was released on December 3, 2008.
+Document 3: Python is known for its clear syntax and readability."""),
+    HumanMessage(content="When was Python created and by whom?"),
+]
+
+response = llm.invoke(messages)
+print(response.content)
+
+# Access citations in generation info
+if response.response_metadata.get("citations"):
+    print("\nCitations:", response.response_metadata["citations"])
+```
+
+### Tool Calling with Cohere
+
+```python
+from langchain_core.tools import tool
+
+@tool
+def search_docs(query: str) -> str:
+    """Search documents for information."""
+    return f"Results for: {query}"
+
+llm = ChatOCIGenAI(model_id="cohere.command-r-plus", ...)
+llm_with_tools = llm.bind_tools([search_docs])
+
+# Note: Cohere doesn't support parallel_tool_calls
+response = llm_with_tools.invoke("Search for Python tutorials")
+```
+
+### Cohere V2 Vision (DAC Only)
+
+Vision support requires dedicated AI cluster:
+
+```python
+from langchain_core.messages import HumanMessage
+from langchain_oci import ChatOCIGenAI, load_image
+
+# V2 API for vision - requires DAC
+llm = ChatOCIGenAI(
+    model_id="ocid1.generativeaiendpoint.oc1..xxx",  # DAC endpoint
+    provider="cohere",
+    ...
+)
+
+message = HumanMessage(
+    content=[
+        {"type": "text", "text": "What's in this image?"},
+        load_image("./image.jpg"),
+    ]
+)
+
+response = llm.invoke([message])
+```
+
+---
+
+## Part 5: xAI Grok Models
+
+Grok offers reasoning capabilities and vision.
+
+### Available Models
+
+| Model | Features |
+|-------|----------|
+| `xai.grok-4` | Vision, reasoning |
+| `xai.grok-4-fast-reasoning` | Optimized reasoning |
+
+### Basic Usage
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id="xai.grok-4",
+    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+    compartment_id="ocid1.compartment.oc1..xxx",
+)
+
+response = llm.invoke("Solve this logic puzzle: ...")
+print(response.content)
+```
+
+### Accessing Reasoning Content
+
+Grok reasoning models expose their thinking:
+
+```python
+llm = ChatOCIGenAI(model_id="xai.grok-4-fast-reasoning", ...)
+
+response = llm.invoke("What is 23 * 47? Show your reasoning.")
+
+# Access reasoning from response metadata
+if response.response_metadata.get("reasoning_content"):
+    print("Reasoning:", response.response_metadata["reasoning_content"])
+
+print("Answer:", response.content)
+```
+
+### Vision with Grok
+
+```python
+from langchain_core.messages import HumanMessage
+from langchain_oci import ChatOCIGenAI, load_image
+
+llm = ChatOCIGenAI(model_id="xai.grok-4", ...)
+
+message = HumanMessage(
+    content=[
+        {"type": "text", "text": "Analyze this chart and explain the trends."},
+        load_image("./chart.png"),
+    ]
+)
+
+response = llm.invoke([message])
+print(response.content)
+```
+
+---
+
+## Part 6: Provider Comparison
+
+### Feature Matrix
+
+| Feature | Meta | Gemini | Cohere | xAI |
+|---------|------|--------|--------|-----|
+| Vision | ✅ Llama 3.2 | ✅ All | ✅ V2/DAC | ✅ |
+| PDF | ❌ | ✅ | ❌ | ❌ |
+| Video | ❌ | ✅ | ❌ | ❌ |
+| Audio | ❌ | ✅ | ❌ | ❌ |
+| Parallel Tools | ✅ Llama 4+ | ❌ | ❌ | ❌ |
+| Citations | ❌ | ❌ | ✅ | ❌ |
+| Reasoning | ❌ | ❌ | ❌ | ✅ |
+| tool_choice | ✅ | ✅ | ❌ | ✅ |
+
+### Performance Characteristics
+
+| Provider | Latency | Throughput | Best For |
+|----------|---------|------------|----------|
+| Meta Llama 4 | Low | High | Production, tools |
+| Gemini Flash | Very Low | Very High | Multimodal, speed |
+| Cohere Command | Medium | Medium | RAG, search |
+| xAI Grok | Medium | Medium | Reasoning tasks |
+
+---
+
+## Part 7: Best Practices
+
+### Choosing a Provider
+
+```python
+# For vision tasks → Llama 3.2, Gemini, or Grok
+if need_vision and not need_pdf:
+    model = "meta.llama-3.2-90b-vision-instruct"
+elif need_multimodal:  # PDF, video, audio
+    model = "google.gemini-2.0-flash"
+
+# For tool-heavy workflows → Llama 4 (parallel tools)
+if many_tools and need_parallel:
+    model = "meta.llama-4-scout-17b-16e-instruct"
+
+# For RAG with citations → Cohere
+if need_citations:
+    model = "cohere.command-r-plus"
+
+# For reasoning tasks → Grok
+if need_reasoning:
+    model = "xai.grok-4-fast-reasoning"
+```
+
+### Handling Provider Differences
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+def get_llm_for_task(task_type: str) -> ChatOCIGenAI:
+    """Get appropriate LLM for task type."""
+    configs = {
+        "vision": {
+            "model_id": "meta.llama-3.2-90b-vision-instruct",
+        },
+        "multimodal": {
+            "model_id": "google.gemini-2.0-flash",
+        },
+        "rag": {
+            "model_id": "cohere.command-r-plus",
+        },
+        "reasoning": {
+            "model_id": "xai.grok-4-fast-reasoning",
+        },
+        "tools": {
+            "model_id": "meta.llama-4-scout-17b-16e-instruct",
+        },
+    }
+
+    config = configs.get(task_type, configs["tools"])
+
+    return ChatOCIGenAI(
+        **config,
+        service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
+        compartment_id="ocid1.compartment.oc1..xxx",
+    )
+```
+
+---
+
+## Summary
+
+You learned:
+
+- How the provider architecture abstracts model differences
+- Meta Llama features: vision, parallel tools, guidance
+- Google Gemini multimodal: PDF, video, audio
+- Cohere features: RAG, citations, V2 vision
+- xAI Grok: reasoning content access
+- How to choose providers for different tasks
+
+## Next Steps
+
+- [Tutorial 10: Embeddings](../10-embeddings/) - Text and image embeddings
+
+## API Reference
+
+| Provider Class | Models |
+|----------------|--------|
+| `GenericProvider` | Meta, xAI, OpenAI, Mistral |
+| `MetaProvider` | Meta Llama (extends Generic) |
+| `GeminiProvider` | Google Gemini (extends Generic) |
+| `CohereProvider` | Cohere Command |
+
+## Troubleshooting
+
+### Wrong Provider Selected
+
+```
+Unexpected response format
+```
+- For DAC endpoints, explicitly set `provider="meta"` etc.
+
+### Tool Choice Not Supported
+
+```
+ValueError: Tool choice is not supported for Cohere
+```
+- Cohere doesn't support `tool_choice` parameter
+- Remove it or switch to Meta/Gemini
+
+### Parallel Tools Error
+
+```
+Parallel tool calls not supported
+```
+- Only Llama 4+ supports `parallel_tool_calls=True`
+- Use sequential calls for other models
+
+### Vision Not Working
+
+```
+Content type not supported
+```
+- Check model supports vision (`is_vision_model()`)
+- Cohere vision requires V2 API on DAC
diff --git a/libs/oci/tutorials/09-provider-deep-dive/code/cohere_examples.py b/libs/oci/tutorials/09-provider-deep-dive/code/cohere_examples.py
new file mode 100644
index 00000000..9929323b
--- /dev/null
+++ b/libs/oci/tutorials/09-provider-deep-dive/code/cohere_examples.py
@@ -0,0 +1,182 @@
+# Tutorial 09: Cohere Provider Examples
+# Demonstrates Cohere-specific features: RAG, citations, V2 vision
+
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_core.tools import tool
+
+from langchain_oci import ChatOCIGenAI
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+def basic_cohere_chat():
+    """Basic chat with Cohere Command."""
+    print("Basic Cohere Command Chat")
+    print("=" * 50)
+
+    llm = ChatOCIGenAI(
+        model_id="cohere.command-r-plus",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    response = llm.invoke("Explain the difference between machine learning and deep learning.")
+    print(response.content)
+
+
+def rag_with_citations():
+    """RAG with citation support."""
+    print("\nRAG with Citations")
+    print("=" * 50)
+
+    llm = ChatOCIGenAI(
+        model_id="cohere.command-r-plus",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    # Provide context documents in system message
+    messages = [
+        SystemMessage(
+            content="""Use the following documents to answer the user's question.
+Cite the specific document numbers when making claims.
+
+Document 1: Oracle Cloud Infrastructure (OCI) was launched in October 2016.
+It provides enterprise-grade cloud services including compute, storage, and networking.
+
+Document 2: OCI Generative AI service provides access to large language models
+from multiple providers including Meta, Cohere, and Google.
+
+Document 3: The langchain-oci package enables Python developers to use OCI
+Generative AI services with the LangChain framework."""
+        ),
+        HumanMessage(content="When was OCI launched and what AI capabilities does it offer?"),
+    ]
+
+    response = llm.invoke(messages)
+    print("Response:", response.content)
+
+    # Check for citations in response metadata
+    if "citations" in response.response_metadata:
+        print("\nCitations:")
+        for citation in response.response_metadata["citations"]:
+            print(f"  - {citation}")
+
+
+def cohere_tool_calling():
+    """Tool calling with Cohere (limitations apply)."""
+    print("\nCohere Tool Calling")
+    print("=" * 50)
+
+    @tool
+    def search_database(query: str, table: str = "documents") -> str:
+        """Search the database for information."""
+        return f"Found 5 results for '{query}' in table '{table}'"
+
+    llm = ChatOCIGenAI(
+        model_id="cohere.command-r-plus",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    # Note: Cohere doesn't support tool_choice or parallel_tool_calls
+    llm_with_tools = llm.bind_tools([search_database])
+
+    print("Cohere tool calling limitations:")
+    print("- No tool_choice parameter support")
+    print("- No parallel_tool_calls support")
+    print("- Sequential tool execution only")
+    print("\nPattern:")
+    print("""
+    llm_with_tools = llm.bind_tools([search_database])
+    # Don't pass tool_choice or parallel_tool_calls
+    response = llm_with_tools.invoke("Search for Python tutorials")
+    """)
+
+
+def cohere_v2_vision():
+    """Vision with Cohere V2 API (DAC only)."""
+    print("\nCohere V2 Vision (DAC Only)")
+    print("=" * 50)
+
+    print("Cohere Command A Vision requires:")
+    print("1. Dedicated AI Cluster (DAC) deployment")
+    print("2. V2 API format (automatically selected for vision models)")
+    print()
+    print("Model: cohere.command-a-vision-07-2025")
+    print()
+    print("Usage pattern:")
+    print("""
+    # For DAC-deployed vision model
+    llm = ChatOCIGenAI(
+        model_id="ocid1.generativeaiendpoint.oc1..xxx",  # DAC endpoint
+        provider="cohere",  # Explicitly set provider
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    message = HumanMessage(content=[
+        {"type": "text", "text": "Describe this image."},
+        load_image("image.jpg"),
+    ])
+
+    response = llm.invoke([message])
+    """)
+
+
+def cohere_response_metadata():
+    """Accessing Cohere-specific response metadata."""
+    print("\nCohere Response Metadata")
+    print("=" * 50)
+
+    llm = ChatOCIGenAI(
+        model_id="cohere.command-r-plus",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    print("Available in response.response_metadata:")
+    print("- citations: Document citations (when RAG context provided)")
+    print("- finish_reason: Why generation stopped")
+    print("- documents: Referenced documents")
+    print("- search_queries: Generated search queries")
+    print("- is_search_required: Whether search was needed")
+    print("- total_tokens: Token usage")
+
+
+def cohere_model_comparison():
+    """Compare Cohere models."""
+    print("\nCohere Model Comparison")
+    print("=" * 50)
+
+    comparison = """
+    | Model | Best For | Key Features |
+    |-------|----------|--------------|
+    | cohere.command-r-plus | Complex reasoning | High capability |
+    | cohere.command-a-03-2025 | General use | Latest release |
+    | cohere.command-a-vision | Vision tasks | V2 API, DAC only |
+
+    Embedding Models:
+    | Model | Type | Dimensions |
+    |-------|------|------------|
+    | cohere.embed-english-v3.0 | Text | 1024 |
+    | cohere.embed-multilingual-v3.0 | Text | 1024 |
+    | cohere.embed-v4.0 | Text + Image | 256-1536 |
+    """
+    print(comparison)
+
+
+if __name__ == "__main__":
+    print("Cohere Provider Examples")
+    print("=" * 60)
+
+    # Uncomment to run (requires valid credentials):
+    # basic_cohere_chat()
+    # rag_with_citations()
+    # cohere_tool_calling()
+    cohere_v2_vision()
+    cohere_response_metadata()
+    cohere_model_comparison()
+
+    print("\nExamples are commented out - configure credentials and uncomment to run.")
diff --git a/libs/oci/tutorials/09-provider-deep-dive/code/gemini_examples.py b/libs/oci/tutorials/09-provider-deep-dive/code/gemini_examples.py
new file mode 100644
index 00000000..b2fa556b
--- /dev/null
+++ b/libs/oci/tutorials/09-provider-deep-dive/code/gemini_examples.py
@@ -0,0 +1,198 @@
+# Tutorial 09: Google Gemini Provider Examples
+# Demonstrates Gemini-specific features: multimodal (PDF, video, audio)
+
+import base64
+import os
+
+from langchain_core.messages import HumanMessage
+
+from langchain_oci import ChatOCIGenAI
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+def basic_gemini_chat():
+    """Basic chat with Gemini."""
+    print("Basic Gemini Chat")
+    print("=" * 50)
+
+    llm = ChatOCIGenAI(
+        model_id="google.gemini-2.0-flash",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    response = llm.invoke("Explain the concept of machine learning in simple terms.")
+    print(response.content)
+
+
+def pdf_processing():
+    """Process PDF documents with Gemini."""
+    print("\nPDF Processing with Gemini")
+    print("=" * 50)
+
+    llm = ChatOCIGenAI(
+        model_id="google.gemini-2.0-flash",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    pdf_path = "document.pdf"
+
+    if os.path.exists(pdf_path):
+        # Load and encode PDF
+        with open(pdf_path, "rb") as f:
+            pdf_data = base64.b64encode(f.read()).decode()
+
+        message = HumanMessage(
+            content=[
+                {"type": "text", "text": "Summarize the key points from this document."},
+                {"type": "media", "data": pdf_data, "mime_type": "application/pdf"},
+            ]
+        )
+
+        response = llm.invoke([message])
+        print(response.content)
+    else:
+        print("To test PDF processing:")
+        print(f"1. Place a PDF file at: {pdf_path}")
+        print("2. Run this example again")
+        print("\nCode pattern:")
+        print("""
+        with open("document.pdf", "rb") as f:
+            pdf_data = base64.b64encode(f.read()).decode()
+
+        message = HumanMessage(content=[
+            {"type": "text", "text": "Summarize this document."},
+            {"type": "media", "data": pdf_data, "mime_type": "application/pdf"},
+        ])
+        response = llm.invoke([message])
+        """)
+
+
+def video_analysis():
+    """Analyze video content with Gemini."""
+    print("\nVideo Analysis with Gemini")
+    print("=" * 50)
+
+    llm = ChatOCIGenAI(
+        model_id="google.gemini-2.0-flash",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    video_path = "video.mp4"
+
+    if os.path.exists(video_path):
+        with open(video_path, "rb") as f:
+            video_data = base64.b64encode(f.read()).decode()
+
+        message = HumanMessage(
+            content=[
+                {"type": "text", "text": "Describe what happens in this video."},
+                {"type": "media", "data": video_data, "mime_type": "video/mp4"},
+            ]
+        )
+
+        response = llm.invoke([message])
+        print(response.content)
+    else:
+        print("To test video analysis:")
+        print(f"1. Place a video file at: {video_path}")
+        print("2. Run this example again")
+        print("\nSupported formats: MP4, MPEG, MOV, AVI, FLV, MPG, WEBM, WMV, 3GPP")
+
+
+def audio_transcription():
+    """Transcribe and analyze audio with Gemini."""
+    print("\nAudio Transcription with Gemini")
+    print("=" * 50)
+
+    llm = ChatOCIGenAI(
+        model_id="google.gemini-2.0-flash",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    audio_path = "audio.mp3"
+
+    if os.path.exists(audio_path):
+        with open(audio_path, "rb") as f:
+            audio_data = base64.b64encode(f.read()).decode()
+
+        message = HumanMessage(
+            content=[
+                {"type": "text", "text": "Transcribe this audio and summarize the content."},
+                {"type": "media", "data": audio_data, "mime_type": "audio/mp3"},
+            ]
+        )
+
+        response = llm.invoke([message])
+        print(response.content)
+    else:
+        print("To test audio transcription:")
+        print(f"1. Place an audio file at: {audio_path}")
+        print("2. Run this example again")
+        print("\nSupported formats: WAV, MP3, AIFF, AAC, OGG, FLAC")
+
+
+def gemini_parameters():
+    """Gemini-specific parameter handling."""
+    print("\nGemini Parameter Handling")
+    print("=" * 50)
+
+    # Note: OCI uses max_tokens, not max_output_tokens
+    # The provider will automatically map with a warning
+    llm = ChatOCIGenAI(
+        model_id="google.gemini-2.0-flash",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+        model_kwargs={
+            "max_tokens": 1024,  # Use max_tokens for OCI
+            "temperature": 0.7,
+        },
+    )
+
+    print("Key differences from native Gemini SDK:")
+    print("- Use 'max_tokens' instead of 'max_output_tokens'")
+    print("- Provider auto-maps max_output_tokens -> max_tokens with warning")
+    print("- Both parameters provided? max_tokens takes precedence")
+
+
+def multimodal_comparison():
+    """Compare different media types."""
+    print("\nMultimodal Capability Summary")
+    print("=" * 50)
+
+    summary = """
+    Gemini Multimodal Support:
+
+    | Media Type | MIME Types | Use Case |
+    |------------|------------|----------|
+    | PDF | application/pdf | Document analysis |
+    | Video | video/mp4, video/webm | Video understanding |
+    | Audio | audio/mp3, audio/wav | Transcription, analysis |
+    | Image | image/jpeg, image/png | Vision (like other models) |
+
+    Unique to Gemini:
+    - Only provider supporting PDF, video, audio natively
+    - Can combine multiple media types in one request
+    - Supports long-form content (e.g., hour-long videos)
+    """
+    print(summary)
+
+
+if __name__ == "__main__":
+    print("Google Gemini Provider Examples")
+    print("=" * 60)
+
+    # Uncomment to run (requires valid credentials):
+    # basic_gemini_chat()
+    # pdf_processing()
+    # video_analysis()
+    # audio_transcription()
+    # gemini_parameters()
+    multimodal_comparison()
+
+    print("\nExamples are commented out - configure credentials and uncomment to run.")
diff --git a/libs/oci/tutorials/09-provider-deep-dive/code/meta_examples.py b/libs/oci/tutorials/09-provider-deep-dive/code/meta_examples.py
new file mode 100644
index 00000000..63bdbbd9
--- /dev/null
+++ b/libs/oci/tutorials/09-provider-deep-dive/code/meta_examples.py
@@ -0,0 +1,175 @@
+# Tutorial 09: Meta Llama Provider Examples
+# Demonstrates Meta-specific features: vision, parallel tools, guidance
+
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_core.tools import tool
+
+from langchain_oci import ChatOCIGenAI, load_image
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+def basic_meta_chat():
+    """Basic chat with Meta Llama."""
+    print("Basic Meta Llama Chat")
+    print("=" * 50)
+
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    response = llm.invoke("What are the key features of Python programming?")
+    print(response.content)
+
+
+def vision_with_llama():
+    """Vision analysis with Llama 3.2."""
+    print("\nVision with Llama 3.2")
+    print("=" * 50)
+
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.2-90b-vision-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    # Check if image exists, otherwise show how it would work
+    import os
+
+    if os.path.exists("sample.jpg"):
+        message = HumanMessage(
+            content=[
+                {"type": "text", "text": "Describe this image in detail."},
+                load_image("sample.jpg"),
+            ]
+        )
+        response = llm.invoke([message])
+        print(response.content)
+    else:
+        print("To test vision:")
+        print("1. Place an image file named 'sample.jpg' in this directory")
+        print("2. Run this example again")
+        print("\nCode pattern:")
+        print("""
+        message = HumanMessage(content=[
+            {"type": "text", "text": "Describe this image."},
+            load_image("sample.jpg"),
+        ])
+        response = llm.invoke([message])
+        """)
+
+
+@tool
+def get_weather(city: str) -> str:
+    """Get current weather for a city."""
+    # Simulated weather data
+    weather_data = {
+        "new york": "72F, sunny",
+        "london": "58F, cloudy",
+        "tokyo": "75F, clear",
+    }
+    return weather_data.get(city.lower(), f"Weather data unavailable for {city}")
+
+
+@tool
+def get_time(city: str) -> str:
+    """Get current time in a city."""
+    # Simulated time data
+    time_data = {
+        "new york": "10:00 AM EST",
+        "london": "3:00 PM GMT",
+        "tokyo": "12:00 AM JST",
+    }
+    return time_data.get(city.lower(), f"Time data unavailable for {city}")
+
+
+def parallel_tool_calls():
+    """Parallel tool calling with Llama 4."""
+    print("\nParallel Tool Calls (Llama 4)")
+    print("=" * 50)
+
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-4-scout-17b-16e-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    # Enable parallel tool calls
+    llm_with_tools = llm.bind_tools(
+        [get_weather, get_time],
+        parallel_tool_calls=True,
+    )
+
+    print("Query: What's the weather and time in New York and London?")
+    response = llm_with_tools.invoke("What's the weather and time in New York and London?")
+
+    print(f"\nTool calls made: {len(response.tool_calls)}")
+    for tc in response.tool_calls:
+        print(f"  - {tc['name']}({tc['args']})")
+
+    # Execute tools and get final response
+    # (In production, you'd use an agent loop)
+
+
+def tool_result_guidance():
+    """Using tool_result_guidance to improve responses."""
+    print("\nTool Result Guidance")
+    print("=" * 50)
+
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    # Without guidance, model might output raw JSON
+    # With guidance, model synthesizes natural response
+    llm_with_tools = llm.bind_tools(
+        [get_weather],
+        tool_result_guidance=True,  # Helps model use results naturally
+        max_sequential_tool_calls=5,  # Prevents infinite loops
+    )
+
+    print("With tool_result_guidance=True:")
+    print("- Model receives instruction to synthesize tool results")
+    print("- Prevents raw JSON output")
+    print("- max_sequential_tool_calls prevents infinite loops")
+
+
+def multi_image_comparison():
+    """Compare multiple images with Llama 3.2 Vision."""
+    print("\nMulti-Image Comparison")
+    print("=" * 50)
+
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.2-90b-vision-instruct",
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+    print("Pattern for comparing multiple images:")
+    print("""
+    message = HumanMessage(content=[
+        {"type": "text", "text": "Compare these two images."},
+        load_image("image1.jpg"),
+        load_image("image2.jpg"),
+    ])
+    response = llm.invoke([message])
+    """)
+
+
+if __name__ == "__main__":
+    print("Meta Llama Provider Examples")
+    print("=" * 60)
+
+    # Uncomment to run (requires valid credentials):
+    # basic_meta_chat()
+    # vision_with_llama()
+    # parallel_tool_calls()
+    # tool_result_guidance()
+    # multi_image_comparison()
+
+    print("\nExamples are commented out - configure credentials and uncomment to run.")
diff --git a/libs/oci/tutorials/09-provider-deep-dive/code/provider_comparison.py b/libs/oci/tutorials/09-provider-deep-dive/code/provider_comparison.py
new file mode 100644
index 00000000..8bb3d179
--- /dev/null
+++ b/libs/oci/tutorials/09-provider-deep-dive/code/provider_comparison.py
@@ -0,0 +1,202 @@
+# Tutorial 09: Provider Comparison
+# Helps choose the right provider for different use cases
+
+from langchain_oci import ChatOCIGenAI, is_vision_model
+
+COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
+SERVICE_ENDPOINT = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+
+
+def list_vision_models():
+    """List all vision-capable models."""
+    print("Vision-Capable Models")
+    print("=" * 50)
+
+    from langchain_oci import VISION_MODELS
+
+    print("Models that support image input:")
+    for model in sorted(VISION_MODELS):
+        print(f"  - {model}")
+
+    print(f"\nTotal: {len(VISION_MODELS)} vision models")
+
+
+def check_model_capabilities(model_id: str):
+    """Check capabilities for a specific model."""
+    print(f"\nModel: {model_id}")
+    print("-" * 40)
+
+    # Vision check
+    has_vision = is_vision_model(model_id)
+    print(f"  Vision: {'Yes' if has_vision else 'No'}")
+
+    # Provider detection
+    if model_id.startswith("meta."):
+        provider = "meta"
+        features = ["Vision (3.2)", "Parallel tools (4+)", "tool_choice"]
+    elif model_id.startswith("google."):
+        provider = "google"
+        features = ["Vision", "PDF", "Video", "Audio"]
+    elif model_id.startswith("cohere."):
+        provider = "cohere"
+        features = ["Citations", "RAG-optimized", "V2 vision (DAC)"]
+    elif model_id.startswith("xai."):
+        provider = "xai"
+        features = ["Vision", "Reasoning content"]
+    else:
+        provider = "generic"
+        features = ["Basic chat"]
+
+    print(f"  Provider: {provider}")
+    print(f"  Features: {', '.join(features)}")
+
+
+def provider_selection_guide():
+    """Guide for selecting the right provider."""
+    print("\nProvider Selection Guide")
+    print("=" * 50)
+
+    guide = """
+    Use this decision tree to pick the right model:
+
+    1. Do you need to process PDFs, videos, or audio?
+       YES → google.gemini-2.0-flash or google.gemini-2.5-flash
+
+    2. Do you need parallel tool calling (multiple tools at once)?
+       YES → meta.llama-4-scout-17b-16e-instruct
+
+    3. Do you need vision (image understanding)?
+       - Fast inference → google.gemini-2.0-flash
+       - High quality → meta.llama-3.2-90b-vision-instruct
+       - Reasoning → xai.grok-4
+
+    4. Do you need RAG with citations?
+       YES → cohere.command-r-plus
+
+    5. Do you need step-by-step reasoning exposed?
+       YES → xai.grok-4-fast-reasoning
+
+    6. General-purpose, high-quality text generation?
+       → meta.llama-3.3-70b-instruct (good balance)
+       → cohere.command-r-plus (excellent reasoning)
+
+    7. Need speed and low latency?
+       → google.gemini-2.0-flash (fastest)
+       → meta.llama-4-scout-17b-16e-instruct (fast + tools)
+    """
+    print(guide)
+
+
+def create_task_specific_llm(task: str) -> ChatOCIGenAI:
+    """Factory function to create task-appropriate LLM."""
+    print(f"\nCreating LLM for task: {task}")
+
+    task_configs = {
+        "vision": {
+            "model_id": "meta.llama-3.2-90b-vision-instruct",
+            "description": "Image analysis and understanding",
+        },
+        "multimodal": {
+            "model_id": "google.gemini-2.0-flash",
+            "description": "PDF, video, audio, and image processing",
+        },
+        "rag": {
+            "model_id": "cohere.command-r-plus",
+            "description": "RAG with citations",
+        },
+        "reasoning": {
+            "model_id": "xai.grok-4-fast-reasoning",
+            "description": "Complex reasoning with chain-of-thought",
+        },
+        "tools": {
+            "model_id": "meta.llama-4-scout-17b-16e-instruct",
+            "description": "Parallel tool calling workflows",
+        },
+        "general": {
+            "model_id": "meta.llama-3.3-70b-instruct",
+            "description": "General-purpose assistant",
+        },
+        "fast": {
+            "model_id": "google.gemini-2.0-flash",
+            "description": "Low-latency responses",
+        },
+    }
+
+    config = task_configs.get(task, task_configs["general"])
+    print(f"  Model: {config['model_id']}")
+    print(f"  Purpose: {config['description']}")
+
+    return ChatOCIGenAI(
+        model_id=config["model_id"],
+        service_endpoint=SERVICE_ENDPOINT,
+        compartment_id=COMPARTMENT_ID,
+    )
+
+
+def feature_matrix():
+    """Display complete feature matrix."""
+    print("\nComplete Feature Matrix")
+    print("=" * 50)
+
+    matrix = """
+    | Feature              | Meta  | Gemini | Cohere | xAI  |
+    |---------------------|-------|--------|--------|------|
+    | Text Generation     |   ✓   |   ✓    |   ✓    |  ✓   |
+    | Vision (Images)     |   ✓   |   ✓    |   ✓*   |  ✓   |
+    | PDF Processing      |   ✗   |   ✓    |   ✗    |  ✗   |
+    | Video Analysis      |   ✗   |   ✓    |   ✗    |  ✗   |
+    | Audio Transcription |   ✗   |   ✓    |   ✗    |  ✗   |
+    | Tool Calling        |   ✓   |   ✓    |   ✓    |  ✓   |
+    | Parallel Tools      |   ✓** |   ✗    |   ✗    |  ✗   |
+    | tool_choice         |   ✓   |   ✓    |   ✗    |  ✓   |
+    | Citations/RAG       |   ✗   |   ✗    |   ✓    |  ✗   |
+    | Reasoning Content   |   ✗   |   ✗    |   ✗    |  ✓   |
+    | Streaming           |   ✓   |   ✓    |   ✓    |  ✓   |
+    | Async               |   ✓   |   ✓    |   ✓    |  ✓   |
+
+    *  Cohere vision requires DAC (V2 API)
+    ** Parallel tools: Llama 4+ only
+    """
+    print(matrix)
+
+
+if __name__ == "__main__":
+    print("Provider Comparison Utility")
+    print("=" * 60)
+
+    # List vision models
+    list_vision_models()
+
+    # Check specific models
+    models_to_check = [
+        "meta.llama-3.3-70b-instruct",
+        "google.gemini-2.0-flash",
+        "cohere.command-r-plus",
+        "xai.grok-4",
+    ]
+
+    print("\n\nModel Capability Check")
+    print("=" * 50)
+    for model in models_to_check:
+        check_model_capabilities(model)
+
+    # Selection guide
+    provider_selection_guide()
+
+    # Feature matrix
+    feature_matrix()
+
+    # Example: Create task-specific LLM
+    print("\n\nTask-Specific LLM Factory")
+    print("=" * 50)
+    print("Example usage:")
+    print("""
+    # For RAG workflows
+    llm = create_task_specific_llm("rag")
+
+    # For multimodal (PDF, video)
+    llm = create_task_specific_llm("multimodal")
+
+    # For tool-heavy workflows
+    llm = create_task_specific_llm("tools")
+    """)

From 9309ce3790e0f0ad213d163383443865ff524729 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Thu, 19 Feb 2026 12:16:05 -0500
Subject: [PATCH 04/16] Add reference documentation and enhance CONTRIBUTING.md

Reference Docs (libs/oci/docs/):
- API_REFERENCE.md: Complete class/method signatures
- MODELS.md: Model IDs, features, selection guide
- CHANGELOG.md: Version history, migration guide

CONTRIBUTING.md Enhancements:
- Development setup with Poetry
- Architecture overview diagram
- Provider pattern explanation
- Adding a new provider guide
- Testing guidelines
---
 CONTRIBUTING.md                | 219 +++++++++++++++++++++
 libs/oci/docs/API_REFERENCE.md | 341 +++++++++++++++++++++++++++++++++
 libs/oci/docs/CHANGELOG.md     | 147 ++++++++++++++
 libs/oci/docs/MODELS.md        | 179 +++++++++++++++++
 4 files changed, 886 insertions(+)
 create mode 100644 libs/oci/docs/API_REFERENCE.md
 create mode 100644 libs/oci/docs/CHANGELOG.md
 create mode 100644 libs/oci/docs/MODELS.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 637430b5..4b0f46d6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,6 +2,17 @@
 
 We welcome your contributions! There are multiple ways to contribute.
 
+## Table of Contents
+
+- [Opening Issues](#opening-issues)
+- [Contributing Code](#contributing-code)
+- [Development Setup](#development-setup)
+- [Architecture Overview](#architecture-overview)
+- [Adding a New Provider](#adding-a-new-provider)
+- [Testing](#testing)
+- [Pull Request Process](#pull-request-process)
+- [Code of Conduct](#code-of-conduct)
+
 ## Opening issues
 
 For bugs or enhancement requests, please file a GitHub issue unless it's
@@ -31,6 +42,213 @@ git commit --signoff
 Only pull requests from committers that can be verified as having signed the OCA
 can be accepted.
 
+## Development Setup
+
+### Prerequisites
+
+- Python 3.9+
+- [Poetry](https://python-poetry.org/) for dependency management
+- OCI CLI configured (`~/.oci/config`)
+- Access to OCI Generative AI service (for integration tests)
+
+### Clone and Install
+
+```bash
+# Clone the repository
+git clone https://github.com/oracle/langchain-oracle.git
+cd langchain-oracle
+
+# Install langchain-oci with development dependencies
+cd libs/oci
+poetry install --with dev,test
+
+# Or install langchain-oracledb
+cd libs/oracledb
+poetry install --with dev,test
+```
+
+### Running Tests
+
+```bash
+# Unit tests only (no OCI credentials needed)
+poetry run pytest tests/unit
+
+# Integration tests (requires OCI credentials)
+export COMPARTMENT_ID="ocid1.compartment.oc1..xxx"
+export SERVICE_ENDPOINT="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
+poetry run pytest tests/integration
+
+# All tests
+poetry run pytest
+
+# With coverage
+poetry run pytest --cov=langchain_oci --cov-report=html
+```
+
+### Code Quality
+
+```bash
+# Format code
+poetry run black .
+poetry run isort .
+
+# Lint
+poetry run ruff check .
+
+# Type check
+poetry run mypy langchain_oci
+
+# All checks
+make lint  # If Makefile is available
+```
+
+---
+
+## Architecture Overview
+
+### langchain-oci Structure
+
+```
+libs/oci/langchain_oci/
+├── __init__.py              # Public exports
+├── chat_models/
+│   ├── oci_generative_ai.py # ChatOCIGenAI, ChatOCIOpenAI
+│   ├── oci_data_science.py  # ChatOCIModelDeployment variants
+│   └── providers/
+│       ├── base.py          # Provider base class
+│       ├── cohere.py        # CohereProvider
+│       └── generic.py       # GenericProvider, MetaProvider, GeminiProvider
+├── embeddings/
+│   ├── oci_generative_ai.py # OCIGenAIEmbeddings
+│   └── image.py             # Image embedding utilities
+├── agents/
+│   └── react.py             # create_oci_agent()
+├── llms/
+│   └── oci_generative_ai.py # Legacy OCIGenAI
+├── utils/
+│   └── vision.py            # Vision utilities
+└── common/
+    ├── auth.py              # Authentication
+    └── utils.py             # Shared utilities
+```
+
+### Provider Pattern
+
+Providers abstract model-specific behaviors:
+
+```
+ChatOCIGenAI
+    │
+    ├── model_id="meta.llama-*" → MetaProvider
+    ├── model_id="cohere.*"     → CohereProvider
+    ├── model_id="google.*"     → GeminiProvider
+    └── model_id="xai.*"        → GenericProvider
+```
+
+Each provider handles:
+- Message format conversion
+- Tool calling format
+- Response parsing
+- Streaming events
+
+---
+
+## Adding a New Provider
+
+### Step 1: Create Provider Class
+
+```python
+# libs/oci/langchain_oci/chat_models/providers/my_provider.py
+
+from langchain_oci.chat_models.providers.base import Provider
+
+class MyProvider(Provider):
+    """Provider for MyModel."""
+
+    stop_sequence_key: str = "stop"
+
+    def __init__(self) -> None:
+        from oci.generative_ai_inference import models
+        # Initialize OCI model classes
+        self.oci_chat_request = models.GenericChatRequest
+        # ... other initializations
+
+    def messages_to_oci_params(self, messages, **kwargs):
+        """Convert LangChain messages to OCI format."""
+        # Implementation
+
+    def chat_response_to_text(self, response):
+        """Extract text from response."""
+        # Implementation
+
+    def convert_to_oci_tool(self, tool):
+        """Convert tool to OCI format."""
+        # Implementation
+```
+
+### Step 2: Register Provider
+
+In `oci_generative_ai.py`, add detection:
+
+```python
+def _get_provider(self) -> Provider:
+    if self.model_id.startswith("mymodel."):
+        return MyProvider()
+    # ... existing logic
+```
+
+### Step 3: Add Tests
+
+```python
+# tests/unit/test_my_provider.py
+
+def test_my_provider_message_conversion():
+    provider = MyProvider()
+    messages = [HumanMessage(content="Hello")]
+    params = provider.messages_to_oci_params(messages)
+    assert "messages" in params
+```
+
+### Step 4: Update Documentation
+
+- Add to `docs/MODELS.md`
+- Add to feature matrix in README
+- Create tutorial examples if significant
+
+---
+
+## Testing
+
+### Test Categories
+
+| Type | Location | Requires OCI |
+|------|----------|--------------|
+| Unit | `tests/unit/` | No |
+| Integration | `tests/integration/` | Yes |
+
+### Writing Tests
+
+```python
+# Unit test example
+def test_vision_model_detection():
+    from langchain_oci import is_vision_model
+    assert is_vision_model("meta.llama-3.2-90b-vision-instruct")
+    assert not is_vision_model("meta.llama-3.3-70b-instruct")
+
+# Integration test example (requires OCI)
+@pytest.mark.integration
+def test_chat_invoke():
+    llm = ChatOCIGenAI(
+        model_id="meta.llama-3.3-70b-instruct",
+        service_endpoint=os.environ["SERVICE_ENDPOINT"],
+        compartment_id=os.environ["COMPARTMENT_ID"],
+    )
+    response = llm.invoke("Hello")
+    assert response.content
+```
+
+---
+
 ## Pull request process
 
 1. Ensure there is an issue created to track and discuss the fix or enhancement
@@ -41,6 +259,7 @@ can be accepted.
 1. Ensure that any documentation is updated with the changes that are required
    by your change.
 1. Ensure that any samples are updated if the base image has been changed.
+1. **Run tests and linting** before submitting.
 1. Submit the pull request. *Do not leave the pull request blank*. Explain exactly
    what your changes are meant to do and provide simple steps on how to validate.
    your changes. Ensure that you reference the issue you created as well.
diff --git a/libs/oci/docs/API_REFERENCE.md b/libs/oci/docs/API_REFERENCE.md
new file mode 100644
index 00000000..8f7e2b41
--- /dev/null
+++ b/libs/oci/docs/API_REFERENCE.md
@@ -0,0 +1,341 @@
+# API Reference
+
+Complete API reference for langchain-oci.
+
+## Chat Models
+
+### ChatOCIGenAI
+
+Main chat model for OCI Generative AI service.
+
+```python
+from langchain_oci import ChatOCIGenAI
+
+llm = ChatOCIGenAI(
+    model_id: str,                          # Model ID or endpoint OCID
+    service_endpoint: str,                  # OCI service endpoint URL
+    compartment_id: str,                    # OCI compartment OCID
+    provider: Optional[str] = None,         # "meta", "cohere", "google", "generic"
+    auth_type: str = "API_KEY",             # "API_KEY", "INSTANCE_PRINCIPAL", etc.
+    auth_profile: str = "DEFAULT",          # Profile name in ~/.oci/config
+    model_kwargs: Optional[Dict] = None,    # Model parameters (temperature, etc.)
+    streaming: bool = False,                # Enable streaming
+)
+```
+
+**Key Methods:**
+
+| Method | Description |
+|--------|-------------|
+| `invoke(input)` | Synchronous generation |
+| `ainvoke(input)` | Async generation |
+| `stream(input)` | Synchronous streaming |
+| `astream(input)` | Async streaming |
+| `batch(inputs)` | Batch generation |
+| `abatch(inputs)` | Async batch generation |
+| `bind_tools(tools, **kwargs)` | Bind tools for function calling |
+| `with_structured_output(schema)` | Get structured output |
+
+**bind_tools Parameters:**
+
+```python
+llm.bind_tools(
+    tools: List[BaseTool],                  # Tools to bind
+    tool_choice: Optional[str] = None,      # "auto", "required", "none", or tool name
+    parallel_tool_calls: bool = False,      # Enable parallel execution (Llama 4+)
+    tool_result_guidance: bool = False,     # Guide model to use results naturally
+    max_sequential_tool_calls: int = 8,     # Limit consecutive calls
+)
+```
+
+**with_structured_output Parameters:**
+
+```python
+llm.with_structured_output(
+    schema: Type[BaseModel],                # Pydantic model or dict
+    method: str = "function_calling",       # "function_calling", "json_mode", "json_schema"
+    include_raw: bool = False,              # Include raw response
+)
+```
+
+---
+
+### ChatOCIOpenAI
+
+OpenAI Responses API compatibility for OCI.
+
+```python
+from langchain_oci import ChatOCIOpenAI
+
+llm = ChatOCIOpenAI(
+    auth: httpx.Auth,                       # OCI auth handler
+    compartment_id: str,                    # OCI compartment OCID
+    model: str,                             # Model name (e.g., "openai.gpt-4.1")
+    conversation_store_id: Optional[str],   # For persistent memory
+    region: Optional[str] = None,           # OCI region
+    service_endpoint: Optional[str] = None, # Custom endpoint
+    base_url: Optional[str] = None,         # Full URL override
+)
+```
+
+---
+
+### ChatOCIModelDeployment
+
+Chat model for OCI Data Science Model Deployments.
+
+```python
+from langchain_oci import ChatOCIModelDeployment
+
+llm = ChatOCIModelDeployment(
+    endpoint: str,                          # Deployment predict URL
+    model: str = "odsc-llm",                # Model name
+    streaming: bool = False,                # Enable streaming
+    max_retries: int = 3,                   # Retry count
+    model_kwargs: Optional[Dict] = None,    # Model parameters
+    default_headers: Optional[Dict] = None, # Custom headers
+)
+```
+
+### ChatOCIModelDeploymentVLLM
+
+vLLM-specific deployment parameters.
+
+```python
+from langchain_oci import ChatOCIModelDeploymentVLLM
+
+llm = ChatOCIModelDeploymentVLLM(
+    endpoint: str,
+    model: str = "odsc-llm",
+    # vLLM-specific
+    temperature: float = 0.2,
+    max_tokens: int = 256,
+    top_p: float = 1.0,
+    top_k: int = -1,
+    frequency_penalty: float = 0.0,
+    presence_penalty: float = 0.0,
+    repetition_penalty: float = 1.0,
+    use_beam_search: bool = False,
+    best_of: int = 1,
+    min_tokens: int = 0,
+    tool_choice: Optional[str] = None,
+)
+```
+
+### ChatOCIModelDeploymentTGI
+
+TGI-specific deployment parameters.
+
+```python
+from langchain_oci import ChatOCIModelDeploymentTGI
+
+llm = ChatOCIModelDeploymentTGI(
+    endpoint: str,
+    model: str = "odsc-llm",
+    # TGI-specific
+    temperature: float = 0.2,
+    max_tokens: int = 256,
+    top_p: float = 0.9,
+    seed: Optional[int] = None,
+    logprobs: bool = False,
+    top_logprobs: int = 5,
+)
+```
+
+---
+
+## Embeddings
+
+### OCIGenAIEmbeddings
+
+Text and image embeddings.
+
+```python
+from langchain_oci import OCIGenAIEmbeddings
+
+embeddings = OCIGenAIEmbeddings(
+    model_id: str,                          # Embedding model ID
+    service_endpoint: str,                  # OCI service endpoint
+    compartment_id: str,                    # OCI compartment OCID
+    auth_type: str = "API_KEY",             # Authentication type
+    auth_profile: str = "DEFAULT",          # Profile name
+    truncate: str = "END",                  # Truncation strategy
+)
+```
+
+**Methods:**
+
+| Method | Description |
+|--------|-------------|
+| `embed_query(text)` | Embed single query |
+| `embed_documents(texts)` | Embed multiple documents |
+| `embed_image(path)` | Embed single image |
+| `embed_image_batch(paths)` | Embed multiple images |
+
+---
+
+## Agents
+
+### create_oci_agent
+
+Factory function to create a ReAct agent.
+
+```python
+from langchain_oci import create_oci_agent
+
+agent = create_oci_agent(
+    model_id: str,                          # Model ID
+    tools: List[BaseTool],                  # Tools for the agent
+    compartment_id: str,                    # OCI compartment OCID
+    service_endpoint: str,                  # OCI service endpoint
+    system_prompt: Optional[str] = None,    # System instructions
+    checkpointer: Optional[BaseCheckpointSaver] = None,  # Memory
+    interrupt_before: Optional[List[str]] = None,        # Human-in-loop
+    interrupt_after: Optional[List[str]] = None,
+    auth_type: str = "API_KEY",
+    auth_profile: str = "DEFAULT",
+    model_kwargs: Optional[Dict] = None,
+)
+```
+
+**Returns:** `CompiledStateGraph` (LangGraph agent)
+
+**Usage:**
+
+```python
+from langchain_core.messages import HumanMessage
+
+result = agent.invoke({
+    "messages": [HumanMessage(content="Search for Python tutorials")]
+})
+```
+
+---
+
+## Vision Utilities
+
+### load_image
+
+Load image file for vision models.
+
+```python
+from langchain_oci import load_image
+
+content_block = load_image(
+    path: str,                              # Path to image file
+    detail: str = "auto",                   # "auto", "low", "high"
+)
+# Returns: Dict with type="image_url" and base64 data
+```
+
+### encode_image
+
+Encode bytes as image content.
+
+```python
+from langchain_oci import encode_image
+
+content_block = encode_image(
+    data: bytes,                            # Raw image bytes
+    mime_type: str,                         # "image/jpeg", "image/png", etc.
+    detail: str = "auto",
+)
+```
+
+### to_data_uri
+
+Convert image to data URI string.
+
+```python
+from langchain_oci import to_data_uri
+
+uri = to_data_uri(
+    path: str,                              # Path to image file
+)
+# Returns: "data:image/jpeg;base64,..."
+```
+
+### is_vision_model
+
+Check if model supports vision.
+
+```python
+from langchain_oci import is_vision_model
+
+supports_vision = is_vision_model(model_id: str)
+# Returns: bool
+```
+
+---
+
+## Constants
+
+### VISION_MODELS
+
+List of vision-capable model IDs.
+
+```python
+from langchain_oci import VISION_MODELS
+
+print(VISION_MODELS)
+# ['meta.llama-3.2-90b-vision-instruct', 'google.gemini-2.0-flash', ...]
+```
+
+### IMAGE_EMBEDDING_MODELS
+
+List of models supporting image embeddings.
+
+```python
+from langchain_oci import IMAGE_EMBEDDING_MODELS
+
+print(IMAGE_EMBEDDING_MODELS)
+# ['cohere.embed-v4.0']
+```
+
+---
+
+## Authentication Types
+
+```python
+from langchain_oci import OCIAuthType
+
+OCIAuthType.API_KEY              # Default, uses ~/.oci/config
+OCIAuthType.INSTANCE_PRINCIPAL   # For OCI Compute instances
+OCIAuthType.RESOURCE_PRINCIPAL   # For OCI Functions, Jobs
+OCIAuthType.SECURITY_TOKEN       # Session-based authentication
+```
+
+---
+
+## Legacy LLM Classes
+
+### OCIGenAI
+
+Legacy LLM interface (text completion).
+
+```python
+from langchain_oci import OCIGenAI
+
+llm = OCIGenAI(
+    model_id: str,
+    service_endpoint: str,
+    compartment_id: str,
+    model_kwargs: Optional[Dict] = None,
+)
+
+response = llm.invoke("Complete this: ")
+```
+
+### OCIModelDeploymentLLM
+
+Legacy LLM for model deployments.
+
+```python
+from langchain_oci import OCIModelDeploymentLLM
+
+llm = OCIModelDeploymentLLM(
+    endpoint: str,
+    model: str = "odsc-llm",
+    model_kwargs: Optional[Dict] = None,
+)
+```
diff --git a/libs/oci/docs/CHANGELOG.md b/libs/oci/docs/CHANGELOG.md
new file mode 100644
index 00000000..5f477bed
--- /dev/null
+++ b/libs/oci/docs/CHANGELOG.md
@@ -0,0 +1,147 @@
+# Changelog
+
+All notable changes to langchain-oci are documented here.
+
+## [Unreleased]
+
+### Added
+- Comprehensive tutorial suite (10 tutorials)
+- API reference documentation
+- Model reference guide
+
+---
+
+## [0.2.0] - 2025
+
+### Added
+
+#### Vision & Multimodal
+- Vision support for 13 models via `load_image()`, `encode_image()`, `to_data_uri()`
+- `VISION_MODELS` registry for vision-capable model discovery
+- `is_vision_model()` utility function
+- Gemini multimodal support (PDF, video, audio)
+- Cohere V2 API for vision models (DAC deployments)
+
+#### Agents
+- `create_oci_agent()` factory function for ReAct agents
+- LangGraph integration with checkpointing
+- Human-in-the-loop support via `interrupt_before`/`interrupt_after`
+
+#### Tool Calling
+- Parallel tool calls for Llama 4+ models (`parallel_tool_calls=True`)
+- `tool_result_guidance` to help models synthesize tool results
+- `max_sequential_tool_calls` for infinite loop prevention
+- Intelligent `tool_choice` management
+
+#### Embeddings
+- Image embeddings via `embed_image()` and `embed_image_batch()`
+- `IMAGE_EMBEDDING_MODELS` registry
+- Support for `cohere.embed-v4.0` multimodal embeddings
+
+#### Async Support
+- Full async support via LangChain base classes
+- `ainvoke()`, `astream()`, `abatch()` methods
+- Async support for `ChatOCIModelDeployment`
+
+#### Providers
+- `GeminiProvider` with `max_output_tokens` → `max_tokens` mapping
+- Enhanced `CohereProvider` with V2 API support
+- `XAIProvider` with reasoning content extraction
+
+### Changed
+- Improved error handling for tool calling
+- Better message format validation
+- Enhanced streaming reliability
+
+### Fixed
+- Issue #28: Meta models outputting raw JSON instead of natural responses
+- Issue #78: NullPointerException with empty AI message content
+- Infinite loop detection for repeated tool calls
+
+---
+
+## [0.1.0] - 2024
+
+### Added
+- Initial release
+- `ChatOCIGenAI` for OCI Generative AI chat models
+- `ChatOCIOpenAI` for OpenAI Responses API compatibility
+- `ChatOCIModelDeployment` for OCI Data Science deployments
+- `ChatOCIModelDeploymentVLLM` for vLLM deployments
+- `ChatOCIModelDeploymentTGI` for TGI deployments
+- `OCIGenAIEmbeddings` for text embeddings
+- `OCIModelDeploymentEndpointEmbeddings` for deployment embeddings
+- Support for Meta, Cohere, Google, xAI, OpenAI, Mistral providers
+- Four authentication methods (API Key, Instance Principal, Resource Principal, Security Token)
+- Structured output via `with_structured_output()`
+- Basic tool calling via `bind_tools()`
+
+---
+
+## Version History
+
+| Version | Date | Highlights |
+|---------|------|------------|
+| 0.2.0 | 2025 | Vision, agents, parallel tools, async |
+| 0.1.0 | 2024 | Initial release |
+
+---
+
+## Migration Guide
+
+### From 0.1.x to 0.2.x
+
+#### Vision Support
+
+```python
+# Old: No vision support
+# New: Use load_image()
+from langchain_oci import ChatOCIGenAI, load_image
+from langchain_core.messages import HumanMessage
+
+llm = ChatOCIGenAI(model_id="meta.llama-3.2-90b-vision-instruct", ...)
+
+message = HumanMessage(content=[
+    {"type": "text", "text": "Describe this image."},
+    load_image("photo.jpg"),
+])
+response = llm.invoke([message])
+```
+
+#### Agents
+
+```python
+# Old: Manual agent setup
+# New: Use create_oci_agent()
+from langchain_oci import create_oci_agent
+
+agent = create_oci_agent(
+    model_id="meta.llama-4-scout-17b-16e-instruct",
+    tools=[my_tool],
+    compartment_id="...",
+    service_endpoint="...",
+)
+```
+
+#### Parallel Tools
+
+```python
+# Old: Sequential tool calls only
+# New: Enable parallel calls (Llama 4+)
+llm_with_tools = llm.bind_tools(
+    [tool1, tool2],
+    parallel_tool_calls=True,
+)
+```
+
+---
+
+## Deprecations
+
+None currently planned.
+
+---
+
+## Security
+
+See [SECURITY.md](../../../SECURITY.md) for vulnerability reporting.
diff --git a/libs/oci/docs/MODELS.md b/libs/oci/docs/MODELS.md
new file mode 100644
index 00000000..fdca575d
--- /dev/null
+++ b/libs/oci/docs/MODELS.md
@@ -0,0 +1,179 @@
+# Model Reference
+
+Complete reference of models available in OCI Generative AI.
+
+## Chat Models
+
+### Meta Llama
+
+| Model ID | Type | Features | Context |
+|----------|------|----------|---------|
+| `meta.llama-3.2-11b-vision-instruct` | Vision | Image analysis | 128K |
+| `meta.llama-3.2-90b-vision-instruct` | Vision | Image analysis | 128K |
+| `meta.llama-3.3-70b-instruct` | Text | Tools, reasoning | 128K |
+| `meta.llama-4-scout-17b-16e-instruct` | Text | Parallel tools | 128K |
+| `meta.llama-4-maverick-17b-128e-instruct-fp8` | Text | Parallel tools | 128K |
+
+**Key Features:**
+- Vision: Llama 3.2 11B/90B
+- Parallel tool calls: Llama 4 models only
+- `tool_choice` support: All models
+- `tool_result_guidance`: Recommended for tool workflows
+
+---
+
+### Google Gemini
+
+| Model ID | Type | Features | Context |
+|----------|------|----------|---------|
+| `google.gemini-2.0-flash` | Multimodal | PDF, video, audio, image | 1M |
+| `google.gemini-2.5-flash` | Multimodal | PDF, video, audio, image | 1M |
+| `google.gemini-2.5-pro` | Multimodal | Most capable | 1M |
+
+**Key Features:**
+- PDF processing via `application/pdf` mime type
+- Video analysis via `video/mp4`, `video/webm`, etc.
+- Audio transcription via `audio/mp3`, `audio/wav`, etc.
+- Use `max_tokens` (not `max_output_tokens`)
+
+**Supported Media Types:**
+
+| Type | MIME Types |
+|------|------------|
+| PDF | `application/pdf` |
+| Video | `video/mp4`, `video/mpeg`, `video/mov`, `video/avi`, `video/webm` |
+| Audio | `audio/mp3`, `audio/wav`, `audio/aac`, `audio/ogg`, `audio/flac` |
+| Image | `image/jpeg`, `image/png`, `image/gif`, `image/webp` |
+
+---
+
+### xAI Grok
+
+| Model ID | Type | Features | Context |
+|----------|------|----------|---------|
+| `xai.grok-4` | Vision | Image, reasoning | 128K |
+| `xai.grok-4-fast-reasoning` | Text | Optimized reasoning | 128K |
+
+**Key Features:**
+- `reasoning_content` in response metadata
+- Vision support
+- `tool_choice` support
+
+---
+
+### Cohere Command
+
+| Model ID | Type | Features | Context |
+|----------|------|----------|---------|
+| `cohere.command-r-plus` | Text | RAG, citations | 128K |
+| `cohere.command-a-03-2025` | Text | Latest release | 128K |
+| `cohere.command-a-vision` | Vision | V2 API, DAC only | 128K |
+
+**Key Features:**
+- Citations in response metadata
+- RAG-optimized
+- V2 API for vision (requires Dedicated AI Cluster)
+- No `tool_choice` support
+- No parallel tool calls
+
+---
+
+### OpenAI (via ChatOCIOpenAI)
+
+| Model ID | Type | Features |
+|----------|------|----------|
+| `openai.gpt-4.1` | Text | Tools, reasoning |
+| `openai.o1` | Text | Advanced reasoning |
+
+---
+
+## Embedding Models
+
+### Text Embeddings
+
+| Model ID | Languages | Dimensions |
+|----------|-----------|------------|
+| `cohere.embed-english-v3.0` | English | 1024 |
+| `cohere.embed-multilingual-v3.0` | 100+ languages | 1024 |
+
+### Multimodal Embeddings
+
+| Model ID | Types | Dimensions |
+|----------|-------|------------|
+| `cohere.embed-v4.0` | Text + Image | 256-1536 (configurable) |
+
+**Usage:**
+
+```python
+from langchain_oci import OCIGenAIEmbeddings
+
+# Text embeddings
+embeddings = OCIGenAIEmbeddings(model_id="cohere.embed-english-v3.0", ...)
+vector = embeddings.embed_query("Hello world")
+
+# Image embeddings (cohere.embed-v4.0 only)
+embeddings = OCIGenAIEmbeddings(model_id="cohere.embed-v4.0", ...)
+vector = embeddings.embed_image("photo.jpg")
+```
+
+---
+
+## Feature Matrix
+
+| Feature | Meta | Gemini | Cohere | xAI |
+|---------|------|--------|--------|-----|
+| Text Generation | ✅ | ✅ | ✅ | ✅ |
+| Vision (Images) | ✅ 3.2 | ✅ | ✅ DAC | ✅ |
+| PDF Processing | ❌ | ✅ | ❌ | ❌ |
+| Video Analysis | ❌ | ✅ | ❌ | ❌ |
+| Audio Analysis | ❌ | ✅ | ❌ | ❌ |
+| Tool Calling | ✅ | ✅ | ✅ | ✅ |
+| Parallel Tools | ✅ 4+ | ❌ | ❌ | ❌ |
+| `tool_choice` | ✅ | ✅ | ❌ | ✅ |
+| Citations | ❌ | ❌ | ✅ | ❌ |
+| Reasoning Content | ❌ | ❌ | ❌ | ✅ |
+| Streaming | ✅ | ✅ | ✅ | ✅ |
+| Async | ✅ | ✅ | ✅ | ✅ |
+
+---
+
+## Regions
+
+OCI Generative AI is available in these regions:
+
+| Region | Endpoint |
+|--------|----------|
+| us-chicago-1 | `https://inference.generativeai.us-chicago-1.oci.oraclecloud.com` |
+| eu-frankfurt-1 | `https://inference.generativeai.eu-frankfurt-1.oci.oraclecloud.com` |
+| ap-tokyo-1 | `https://inference.generativeai.ap-tokyo-1.oci.oraclecloud.com` |
+
+Check [OCI documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/overview.htm) for the latest region availability.
+
+---
+
+## Model Selection Guide
+
+### By Use Case
+
+| Use Case | Recommended Model |
+|----------|-------------------|
+| General chat | `meta.llama-3.3-70b-instruct` |
+| Image analysis | `meta.llama-3.2-90b-vision-instruct` |
+| PDF/document processing | `google.gemini-2.0-flash` |
+| Video understanding | `google.gemini-2.0-flash` |
+| Audio transcription | `google.gemini-2.0-flash` |
+| Tool-heavy workflows | `meta.llama-4-scout-17b-16e-instruct` |
+| RAG with citations | `cohere.command-r-plus` |
+| Complex reasoning | `xai.grok-4-fast-reasoning` |
+| Fast responses | `google.gemini-2.0-flash` |
+| Embeddings (text) | `cohere.embed-english-v3.0` |
+| Embeddings (multimodal) | `cohere.embed-v4.0` |
+
+### By Performance
+
+| Priority | Model |
+|----------|-------|
+| Lowest latency | `google.gemini-2.0-flash` |
+| Highest throughput | `google.gemini-2.0-flash` |
+| Best quality | `meta.llama-3.3-70b-instruct`, `cohere.command-r-plus` |
+| Best for tools | `meta.llama-4-scout-17b-16e-instruct` |

From 66f0b15ef05ba07b51aa3d29b23e69a6e6958934 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Thu, 19 Feb 2026 12:27:34 -0500
Subject: [PATCH 05/16] Fix linting issues in tutorial code examples

- Add per-file-ignores for T201 (print) in tutorials
- Fix E501 line length issues across all tutorial examples
- Fix F841 unused variable warnings in demo functions
---
 libs/oci/pyproject.toml                       |  3 ++
 .../code/image_analysis.py                    |  5 ++-
 .../code/agent_with_memory.py                 | 11 +++--
 .../code/human_in_loop.py                     |  2 +-
 .../code/basic_tools.py                       |  3 +-
 .../code/parallel_tools.py                    |  4 +-
 .../code/tool_workflows.py                    |  8 ++--
 .../code/data_classification.py               |  3 +-
 .../code/custom_endpoint.py                   |  2 +-
 .../code/vllm_deployment.py                   |  3 +-
 .../code/batch_processing.py                  |  5 ++-
 .../code/conversation_store.py                |  8 ++--
 .../code/tools_and_search.py                  |  6 +--
 .../code/cohere_examples.py                   | 17 ++++----
 .../code/gemini_examples.py                   | 28 ++++++-------
 .../code/meta_examples.py                     | 40 +++++++++----------
 .../10-embeddings/code/rag_example.py         | 25 +++++++-----
 17 files changed, 97 insertions(+), 76 deletions(-)

diff --git a/libs/oci/pyproject.toml b/libs/oci/pyproject.toml
index 79cb3fb1..ffc7e875 100644
--- a/libs/oci/pyproject.toml
+++ b/libs/oci/pyproject.toml
@@ -89,6 +89,9 @@ ignore = [
   "COM812",  # Messes with the formatter
 ]
 
+[tool.ruff.lint.per-file-ignores]
+"tutorials/**/*.py" = ["T201"]  # Allow print in tutorial examples
+
 [tool.mypy]
 plugins = ["pydantic.mypy"]
 check_untyped_defs = true
diff --git a/libs/oci/tutorials/02-vision-and-multimodal/code/image_analysis.py b/libs/oci/tutorials/02-vision-and-multimodal/code/image_analysis.py
index 8a4788b2..55ed16d8 100644
--- a/libs/oci/tutorials/02-vision-and-multimodal/code/image_analysis.py
+++ b/libs/oci/tutorials/02-vision-and-multimodal/code/image_analysis.py
@@ -55,7 +55,10 @@ def compare_images(image_path_1: str, image_path_2: str):
 
     message = HumanMessage(
         content=[
-            {"type": "text", "text": "Compare these two images. What are the key differences?"},
+            {
+                "type": "text",
+                "text": "Compare these two images. What are the key differences?",
+            },
             load_image(image_path_1),
             load_image(image_path_2),
         ]
diff --git a/libs/oci/tutorials/03-building-ai-agents/code/agent_with_memory.py b/libs/oci/tutorials/03-building-ai-agents/code/agent_with_memory.py
index 79be3f63..b09250d6 100644
--- a/libs/oci/tutorials/03-building-ai-agents/code/agent_with_memory.py
+++ b/libs/oci/tutorials/03-building-ai-agents/code/agent_with_memory.py
@@ -29,9 +29,12 @@ def get_recommendation(category: str) -> str:
         category: The category to get a recommendation for (food, music, movies)
     """
     recommendations = {
-        "food": "Based on your preferences, I recommend trying the new Italian restaurant downtown.",
-        "music": "You might enjoy the latest album by The Weeknd based on your taste.",
-        "movies": "I recommend watching 'Oppenheimer' - it matches your interest in drama.",
+        "food": (
+            "Based on your preferences, I recommend trying the new "
+            "Italian restaurant downtown."
+        ),
+        "music": "You might enjoy the latest album by The Weeknd.",
+        "movies": "I recommend watching 'Oppenheimer' - great drama.",
     }
     return recommendations.get(category, f"No recommendations available for {category}")
 
@@ -58,7 +61,7 @@ def main():
     # First message - set a preference
     print("Turn 1: Setting preference")
     result1 = agent.invoke(
-        {"messages": [HumanMessage(content="I love Italian food and classic rock music.")]},
+        {"messages": [HumanMessage(content="I love Italian food and rock.")]},
         config=config,
     )
     print(f"Agent: {result1['messages'][-1].content}\n")
diff --git a/libs/oci/tutorials/03-building-ai-agents/code/human_in_loop.py b/libs/oci/tutorials/03-building-ai-agents/code/human_in_loop.py
index 1756a016..f7cd9e93 100644
--- a/libs/oci/tutorials/03-building-ai-agents/code/human_in_loop.py
+++ b/libs/oci/tutorials/03-building-ai-agents/code/human_in_loop.py
@@ -66,7 +66,7 @@ def main():
     print("User: Send an email to john@example.com about the meeting tomorrow")
     result = agent.invoke(
         {"messages": [HumanMessage(
-            content="Send an email to john@example.com saying 'Meeting tomorrow at 10am'"
+            content="Send an email to john@example.com saying 'Meeting at 10am'"
         )]},
         config=config,
     )
diff --git a/libs/oci/tutorials/04-tool-calling-mastery/code/basic_tools.py b/libs/oci/tutorials/04-tool-calling-mastery/code/basic_tools.py
index fd6c5bee..fe8aed8e 100644
--- a/libs/oci/tutorials/04-tool-calling-mastery/code/basic_tools.py
+++ b/libs/oci/tutorials/04-tool-calling-mastery/code/basic_tools.py
@@ -30,7 +30,8 @@ def get_weather(city: str) -> str:
 # Tool with Pydantic schema for complex parameters
 class CalculatorInput(BaseModel):
     """Input for the calculator tool."""
-    expression: str = Field(description="Mathematical expression to evaluate (e.g., '2 + 2')")
+
+    expression: str = Field(description="Math expression to evaluate (e.g., '2 + 2')")
 
 
 @tool(args_schema=CalculatorInput)
diff --git a/libs/oci/tutorials/04-tool-calling-mastery/code/parallel_tools.py b/libs/oci/tutorials/04-tool-calling-mastery/code/parallel_tools.py
index 0ceab9ce..2a0132c1 100644
--- a/libs/oci/tutorials/04-tool-calling-mastery/code/parallel_tools.py
+++ b/libs/oci/tutorials/04-tool-calling-mastery/code/parallel_tools.py
@@ -72,11 +72,11 @@ def main():
     )
 
     # Query that requires multiple tools
-    print("Query: Tell me about the weather, time, and population of Chicago and New York")
+    print("Query: Tell me about weather, time, and population of both cities")
     print("-" * 60)
 
     messages = [HumanMessage(
-        content="Tell me the weather, current time, and population of Chicago and New York."
+        content="Tell me the weather, time, and population of Chicago and New York."
     )]
 
     response = llm_with_tools.invoke(messages)
diff --git a/libs/oci/tutorials/04-tool-calling-mastery/code/tool_workflows.py b/libs/oci/tutorials/04-tool-calling-mastery/code/tool_workflows.py
index fd4fe5c8..4a7dcfdf 100644
--- a/libs/oci/tutorials/04-tool-calling-mastery/code/tool_workflows.py
+++ b/libs/oci/tutorials/04-tool-calling-mastery/code/tool_workflows.py
@@ -42,7 +42,9 @@ def save_research_note(note: str) -> str:
     return f"Note saved successfully: '{note[:50]}...'"
 
 
-def run_workflow(llm_with_tools, messages: list, tools_dict: dict, max_iterations: int = 10):
+def run_workflow(
+    llm_with_tools, messages: list, tools_dict: dict, max_iterations: int = 10
+):
     """Run a multi-step tool workflow until completion."""
 
     for iteration in range(max_iterations):
@@ -88,8 +90,8 @@ def main():
     print("=" * 60)
 
     messages = [HumanMessage(
-        content="Research AI in healthcare. Get the content of the first article you find, "
-                "summarize it, and save a research note with the key findings."
+        content="Research AI in healthcare. Get the first article, "
+        "summarize it, and save a note with the key findings."
     )]
 
     final_answer = run_workflow(llm_with_tools, messages, tools_dict)
diff --git a/libs/oci/tutorials/05-structured-output/code/data_classification.py b/libs/oci/tutorials/05-structured-output/code/data_classification.py
index 3c83a8e3..17713c21 100644
--- a/libs/oci/tutorials/05-structured-output/code/data_classification.py
+++ b/libs/oci/tutorials/05-structured-output/code/data_classification.py
@@ -30,8 +30,9 @@ class UrgencyLevel(str, Enum):
 
 class DocumentClassification(BaseModel):
     """Classification result for a document."""
+
     category: DocumentCategory = Field(description="Primary document category")
-    subcategory: str = Field(description="Specific subcategory within the main category")
+    subcategory: str = Field(description="Specific subcategory within main category")
     urgency: UrgencyLevel = Field(description="How urgent is this document")
     confidence: float = Field(ge=0, le=1, description="Classification confidence 0-1")
     key_topics: List[str] = Field(description="Main topics covered in the document")
diff --git a/libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py b/libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py
index eb39d0fc..d70077c7 100644
--- a/libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py
+++ b/libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py
@@ -127,7 +127,7 @@ def demo_custom_deployment():
         },
     )
 
-    print(f"Custom deployment configured:")
+    print("Custom deployment configured:")
     print(f"  - Endpoint: {endpoint}")
     print(f"  - Model: {chat.model}")
     print(f"  - Custom param: {chat.custom_param}")
diff --git a/libs/oci/tutorials/06-model-deployments/code/vllm_deployment.py b/libs/oci/tutorials/06-model-deployments/code/vllm_deployment.py
index 0927ba89..029448a6 100644
--- a/libs/oci/tutorials/06-model-deployments/code/vllm_deployment.py
+++ b/libs/oci/tutorials/06-model-deployments/code/vllm_deployment.py
@@ -96,7 +96,8 @@ def beam_search_generation():
         max_tokens=100,
     )
 
-    response = chat.invoke("Translate to French: The quick brown fox jumps over the lazy dog.")
+    prompt = "Translate to French: The quick brown fox jumps over the lazy dog."
+    response = chat.invoke(prompt)
     print(f"Translation: {response.content}")
 
 
diff --git a/libs/oci/tutorials/07-async-for-production/code/batch_processing.py b/libs/oci/tutorials/07-async-for-production/code/batch_processing.py
index 4177f4d5..650df118 100644
--- a/libs/oci/tutorials/07-async-for-production/code/batch_processing.py
+++ b/libs/oci/tutorials/07-async-for-production/code/batch_processing.py
@@ -18,7 +18,6 @@ async def process_with_rate_limit(
 ) -> List[str]:
     """Process prompts with concurrency limit."""
     semaphore = asyncio.Semaphore(max_concurrent)
-    results = []
 
     async def limited_invoke(prompt: str, index: int):
         async with semaphore:
@@ -30,7 +29,9 @@ async def limited_invoke(prompt: str, index: int):
                 return (index, f"Error: {e}")
 
     # Create tasks for all prompts
-    tasks = [limited_invoke(p, i) for i, p in enumerate(prompts)]
+    tasks = [
+        limited_invoke(p, i) for i, p in enumerate(prompts)
+    ]
 
     # Process all with limited concurrency
     completed = await asyncio.gather(*tasks)
diff --git a/libs/oci/tutorials/08-openai-responses-api/code/conversation_store.py b/libs/oci/tutorials/08-openai-responses-api/code/conversation_store.py
index df7bcd1b..c6f4214b 100644
--- a/libs/oci/tutorials/08-openai-responses-api/code/conversation_store.py
+++ b/libs/oci/tutorials/08-openai-responses-api/code/conversation_store.py
@@ -38,8 +38,8 @@ def persistent_memory_demo():
     client = setup_client_with_store()
 
     # First message - introduce yourself
-    print("User: My name is Alice and I work as a data scientist.")
-    response1 = client.invoke("My name is Alice and I work as a data scientist.")
+    print("User: My name is Alice and I'm a data scientist.")
+    response1 = client.invoke("My name is Alice and I'm a data scientist.")
     print(f"Assistant: {response1.content}")
 
     # Second message - model should remember
@@ -48,8 +48,8 @@ def persistent_memory_demo():
     print(f"Assistant: {response2.content}")
 
     # Third message - continue context
-    print("\nUser: What programming languages should I learn for my job?")
-    response3 = client.invoke("What programming languages should I learn for my job?")
+    print("\nUser: What programming languages should I learn?")
+    response3 = client.invoke("What programming languages should I learn?")
     print(f"Assistant: {response3.content}")
 
 
diff --git a/libs/oci/tutorials/08-openai-responses-api/code/tools_and_search.py b/libs/oci/tutorials/08-openai-responses-api/code/tools_and_search.py
index 5537b92d..2ce9cbb3 100644
--- a/libs/oci/tutorials/08-openai-responses-api/code/tools_and_search.py
+++ b/libs/oci/tutorials/08-openai-responses-api/code/tools_and_search.py
@@ -34,8 +34,8 @@ def setup_client():
 class GetWeather(BaseModel):
     """Get the current weather for a location."""
 
-    location: str = Field(..., description="The city and state, e.g. San Francisco, CA")
-    unit: str = Field(default="fahrenheit", description="Temperature unit: celsius or fahrenheit")
+    location: str = Field(..., description="City and state, e.g. San Francisco, CA")
+    unit: str = Field(default="fahrenheit", description="Unit: celsius or fahrenheit")
 
 
 class SearchDatabase(BaseModel):
@@ -60,7 +60,7 @@ def function_calling_demo():
     response = llm_with_tools.invoke("What is the weather like in San Francisco?")
 
     print(f"Response content: {response.content}")
-    print(f"\nTool calls:")
+    print("\nTool calls:")
     for tool_call in response.tool_calls:
         print(f"  - Function: {tool_call['name']}")
         print(f"    Arguments: {tool_call['args']}")
diff --git a/libs/oci/tutorials/09-provider-deep-dive/code/cohere_examples.py b/libs/oci/tutorials/09-provider-deep-dive/code/cohere_examples.py
index 9929323b..6d80532e 100644
--- a/libs/oci/tutorials/09-provider-deep-dive/code/cohere_examples.py
+++ b/libs/oci/tutorials/09-provider-deep-dive/code/cohere_examples.py
@@ -21,7 +21,7 @@ def basic_cohere_chat():
         compartment_id=COMPARTMENT_ID,
     )
 
-    response = llm.invoke("Explain the difference between machine learning and deep learning.")
+    response = llm.invoke("Explain the difference between ML and deep learning.")
     print(response.content)
 
 
@@ -51,7 +51,7 @@ def rag_with_citations():
 Document 3: The langchain-oci package enables Python developers to use OCI
 Generative AI services with the LangChain framework."""
         ),
-        HumanMessage(content="When was OCI launched and what AI capabilities does it offer?"),
+        HumanMessage(content="When was OCI launched and what AI does it offer?"),
     ]
 
     response = llm.invoke(messages)
@@ -81,7 +81,7 @@ def search_database(query: str, table: str = "documents") -> str:
     )
 
     # Note: Cohere doesn't support tool_choice or parallel_tool_calls
-    llm_with_tools = llm.bind_tools([search_database])
+    _ = llm.bind_tools([search_database])  # Example binding
 
     print("Cohere tool calling limitations:")
     print("- No tool_choice parameter support")
@@ -130,11 +130,12 @@ def cohere_response_metadata():
     print("\nCohere Response Metadata")
     print("=" * 50)
 
-    llm = ChatOCIGenAI(
-        model_id="cohere.command-r-plus",
-        service_endpoint=SERVICE_ENDPOINT,
-        compartment_id=COMPARTMENT_ID,
-    )
+    # Example configuration for accessing metadata:
+    # llm = ChatOCIGenAI(
+    #     model_id="cohere.command-r-plus",
+    #     service_endpoint=SERVICE_ENDPOINT,
+    #     compartment_id=COMPARTMENT_ID,
+    # )
 
     print("Available in response.response_metadata:")
     print("- citations: Document citations (when RAG context provided)")
diff --git a/libs/oci/tutorials/09-provider-deep-dive/code/gemini_examples.py b/libs/oci/tutorials/09-provider-deep-dive/code/gemini_examples.py
index b2fa556b..1a2684ac 100644
--- a/libs/oci/tutorials/09-provider-deep-dive/code/gemini_examples.py
+++ b/libs/oci/tutorials/09-provider-deep-dive/code/gemini_examples.py
@@ -23,7 +23,7 @@ def basic_gemini_chat():
         compartment_id=COMPARTMENT_ID,
     )
 
-    response = llm.invoke("Explain the concept of machine learning in simple terms.")
+    response = llm.invoke("Explain machine learning in simple terms.")
     print(response.content)
 
 
@@ -47,7 +47,7 @@ def pdf_processing():
 
         message = HumanMessage(
             content=[
-                {"type": "text", "text": "Summarize the key points from this document."},
+                {"type": "text", "text": "Summarize key points from this document."},
                 {"type": "media", "data": pdf_data, "mime_type": "application/pdf"},
             ]
         )
@@ -101,7 +101,7 @@ def video_analysis():
         print("To test video analysis:")
         print(f"1. Place a video file at: {video_path}")
         print("2. Run this example again")
-        print("\nSupported formats: MP4, MPEG, MOV, AVI, FLV, MPG, WEBM, WMV, 3GPP")
+        print("\nFormats: MP4, MPEG, MOV, AVI, FLV, MPG, WEBM, WMV, 3GPP")
 
 
 def audio_transcription():
@@ -123,7 +123,7 @@ def audio_transcription():
 
         message = HumanMessage(
             content=[
-                {"type": "text", "text": "Transcribe this audio and summarize the content."},
+                {"type": "text", "text": "Transcribe and summarize this audio."},
                 {"type": "media", "data": audio_data, "mime_type": "audio/mp3"},
             ]
         )
@@ -143,16 +143,16 @@ def gemini_parameters():
     print("=" * 50)
 
     # Note: OCI uses max_tokens, not max_output_tokens
-    # The provider will automatically map with a warning
-    llm = ChatOCIGenAI(
-        model_id="google.gemini-2.0-flash",
-        service_endpoint=SERVICE_ENDPOINT,
-        compartment_id=COMPARTMENT_ID,
-        model_kwargs={
-            "max_tokens": 1024,  # Use max_tokens for OCI
-            "temperature": 0.7,
-        },
-    )
+    # Example configuration:
+    # llm = ChatOCIGenAI(
+    #     model_id="google.gemini-2.0-flash",
+    #     service_endpoint=SERVICE_ENDPOINT,
+    #     compartment_id=COMPARTMENT_ID,
+    #     model_kwargs={
+    #         "max_tokens": 1024,  # Use max_tokens for OCI
+    #         "temperature": 0.7,
+    #     },
+    # )
 
     print("Key differences from native Gemini SDK:")
     print("- Use 'max_tokens' instead of 'max_output_tokens'")
diff --git a/libs/oci/tutorials/09-provider-deep-dive/code/meta_examples.py b/libs/oci/tutorials/09-provider-deep-dive/code/meta_examples.py
index 63bdbbd9..a0dc0cba 100644
--- a/libs/oci/tutorials/09-provider-deep-dive/code/meta_examples.py
+++ b/libs/oci/tutorials/09-provider-deep-dive/code/meta_examples.py
@@ -1,7 +1,7 @@
 # Tutorial 09: Meta Llama Provider Examples
 # Demonstrates Meta-specific features: vision, parallel tools, guidance
 
-from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_core.messages import HumanMessage
 from langchain_core.tools import tool
 
 from langchain_oci import ChatOCIGenAI, load_image
@@ -104,7 +104,8 @@ def parallel_tool_calls():
     )
 
     print("Query: What's the weather and time in New York and London?")
-    response = llm_with_tools.invoke("What's the weather and time in New York and London?")
+    query = "What's the weather and time in New York and London?"
+    response = llm_with_tools.invoke(query)
 
     print(f"\nTool calls made: {len(response.tool_calls)}")
     for tc in response.tool_calls:
@@ -119,19 +120,17 @@ def tool_result_guidance():
     print("\nTool Result Guidance")
     print("=" * 50)
 
-    llm = ChatOCIGenAI(
-        model_id="meta.llama-3.3-70b-instruct",
-        service_endpoint=SERVICE_ENDPOINT,
-        compartment_id=COMPARTMENT_ID,
-    )
-
-    # Without guidance, model might output raw JSON
-    # With guidance, model synthesizes natural response
-    llm_with_tools = llm.bind_tools(
-        [get_weather],
-        tool_result_guidance=True,  # Helps model use results naturally
-        max_sequential_tool_calls=5,  # Prevents infinite loops
-    )
+    # Example configuration with tool_result_guidance:
+    # llm = ChatOCIGenAI(
+    #     model_id="meta.llama-3.3-70b-instruct",
+    #     service_endpoint=SERVICE_ENDPOINT,
+    #     compartment_id=COMPARTMENT_ID,
+    # )
+    # llm_with_tools = llm.bind_tools(
+    #     [get_weather],
+    #     tool_result_guidance=True,
+    #     max_sequential_tool_calls=5,
+    # )
 
     print("With tool_result_guidance=True:")
     print("- Model receives instruction to synthesize tool results")
@@ -144,11 +143,12 @@ def multi_image_comparison():
     print("\nMulti-Image Comparison")
     print("=" * 50)
 
-    llm = ChatOCIGenAI(
-        model_id="meta.llama-3.2-90b-vision-instruct",
-        service_endpoint=SERVICE_ENDPOINT,
-        compartment_id=COMPARTMENT_ID,
-    )
+    # Example configuration:
+    # llm = ChatOCIGenAI(
+    #     model_id="meta.llama-3.2-90b-vision-instruct",
+    #     service_endpoint=SERVICE_ENDPOINT,
+    #     compartment_id=COMPARTMENT_ID,
+    # )
 
     print("Pattern for comparing multiple images:")
     print("""
diff --git a/libs/oci/tutorials/10-embeddings/code/rag_example.py b/libs/oci/tutorials/10-embeddings/code/rag_example.py
index 2338822f..f3517f86 100644
--- a/libs/oci/tutorials/10-embeddings/code/rag_example.py
+++ b/libs/oci/tutorials/10-embeddings/code/rag_example.py
@@ -16,12 +16,17 @@ def cosine_similarity(a: list, b: list) -> float:
     return float(np.dot(a_arr, b_arr) / (np.linalg.norm(a_arr) * np.linalg.norm(b_arr)))
 
 
-def retrieve_context(query: str, documents: list, doc_vectors: list, embeddings, top_k: int = 2) -> list:
+def retrieve_context(
+    query: str, documents: list, doc_vectors: list, embeddings, top_k: int = 2
+) -> list:
     """Retrieve most relevant documents for a query."""
     query_vector = embeddings.embed_query(query)
 
     # Calculate similarities
-    similarities = [(i, cosine_similarity(query_vector, dv)) for i, dv in enumerate(doc_vectors)]
+    similarities = [
+        (i, cosine_similarity(query_vector, dv))
+        for i, dv in enumerate(doc_vectors)
+    ]
 
     # Sort by similarity (descending)
     similarities.sort(key=lambda x: x[1], reverse=True)
@@ -47,14 +52,14 @@ def main():
 
     # Knowledge base (in production, this would be much larger)
     knowledge_base = [
-        "Oracle Cloud Infrastructure (OCI) provides enterprise cloud services including compute, storage, and networking.",
-        "OCI Generative AI service offers access to large language models from multiple providers including Meta, Cohere, and Google.",
-        "LangChain is an open-source framework for building applications with large language models.",
-        "The langchain-oci package provides LangChain integrations for OCI Generative AI services.",
-        "RAG (Retrieval Augmented Generation) combines retrieval systems with LLMs to provide accurate, grounded responses.",
-        "Vector embeddings represent text as numerical vectors, enabling semantic similarity search.",
-        "OCI offers dedicated AI clusters (DAC) for running custom model endpoints.",
-        "The ChatOCIGenAI class is the main interface for chat models in langchain-oci.",
+        "OCI provides enterprise cloud services including compute and storage.",
+        "OCI GenAI offers access to LLMs from Meta, Cohere, and Google.",
+        "LangChain is a framework for building apps with LLMs.",
+        "The langchain-oci package integrates OCI GenAI with LangChain.",
+        "RAG combines retrieval with LLMs for accurate, grounded responses.",
+        "Vector embeddings enable semantic similarity search.",
+        "OCI offers dedicated AI clusters (DAC) for custom endpoints.",
+        "ChatOCIGenAI is the main interface for chat models.",
     ]
 
     print("RAG Example: Retrieval Augmented Generation")

From 8f55c7fe0302c38eb30e221012c465d1767b748a Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Thu, 19 Feb 2026 12:32:19 -0500
Subject: [PATCH 06/16] Add missing tutorials and classes to README

- Add tutorials 06, 08, 09 to the tutorials table
- Add ChatOCIModelDeploymentVLLM and ChatOCIModelDeploymentTGI to API Reference
---
 libs/oci/README.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libs/oci/README.md b/libs/oci/README.md
index 7be2fb50..efe51f2c 100644
--- a/libs/oci/README.md
+++ b/libs/oci/README.md
@@ -522,7 +522,10 @@ Comprehensive tutorials covering all features:
 | [03. Building AI Agents](./tutorials/03-building-ai-agents/) | create_oci_agent, checkpointing |
 | [04. Tool Calling Mastery](./tutorials/04-tool-calling-mastery/) | Parallel tools, workflows |
 | [05. Structured Output](./tutorials/05-structured-output/) | Pydantic, JSON modes |
+| [06. Model Deployments](./tutorials/06-model-deployments/) | vLLM, TGI, custom endpoints |
 | [07. Async for Production](./tutorials/07-async-for-production/) | ainvoke, astream, FastAPI |
+| [08. OpenAI Responses API](./tutorials/08-openai-responses-api/) | ChatOCIOpenAI, conversation stores |
+| [09. Provider Deep Dive](./tutorials/09-provider-deep-dive/) | Provider-specific features |
 | [10. Embeddings](./tutorials/10-embeddings/) | Text & image embeddings, RAG |
 
 See [tutorials/README.md](./tutorials/README.md) for the full learning path.
@@ -578,6 +581,8 @@ Content type not supported
 | `ChatOCIGenAI` | Main chat model for OCI GenAI |
 | `ChatOCIOpenAI` | OpenAI Responses API compatibility |
 | `ChatOCIModelDeployment` | Custom OCI Data Science deployments |
+| `ChatOCIModelDeploymentVLLM` | vLLM-specific deployment handler |
+| `ChatOCIModelDeploymentTGI` | TGI-specific deployment handler |
 
 ### Embeddings
 

From 7ee479800a7a2ff51c07f523e7cd853b72cf5cd6 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Thu, 19 Feb 2026 13:13:23 -0500
Subject: [PATCH 07/16] Fix formatting and exclude tutorials from mypy

- Run ruff format on all tutorial files
- Add tutorials/ to mypy exclude (demo code, not production)
---
 libs/oci/pyproject.toml                        |  1 +
 .../01-getting-started/code/auth_examples.py   |  4 ++--
 .../01-getting-started/code/basic_chat.py      |  2 +-
 .../code/pdf_processing.py                     |  8 ++------
 .../code/video_analysis.py                     | 14 +++++---------
 .../03-building-ai-agents/code/basic_agent.py  |  6 +++---
 .../code/human_in_loop.py                      | 10 +++++++---
 .../code/basic_tools.py                        |  8 ++++----
 .../code/parallel_tools.py                     | 14 ++++++++------
 .../code/tool_workflows.py                     | 18 ++++++++++--------
 .../code/data_classification.py                |  2 --
 .../code/pydantic_schemas.py                   |  4 ++++
 .../code/custom_endpoint.py                    |  4 +++-
 .../code/batch_processing.py                   | 16 +++++++++++-----
 .../code/fastapi_app.py                        |  1 +
 .../10-embeddings/code/image_embeddings.py     |  1 +
 .../10-embeddings/code/rag_example.py          |  3 +--
 17 files changed, 64 insertions(+), 52 deletions(-)

diff --git a/libs/oci/pyproject.toml b/libs/oci/pyproject.toml
index ffc7e875..5dffa310 100644
--- a/libs/oci/pyproject.toml
+++ b/libs/oci/pyproject.toml
@@ -104,6 +104,7 @@ warn_redundant_casts = true
 warn_unreachable = true
 warn_unused_configs = true
 warn_unused_ignores = true
+exclude = ["tutorials/"]
 
 # Ignore missing imports only for specific untyped packages
 [[tool.mypy.overrides]]
diff --git a/libs/oci/tutorials/01-getting-started/code/auth_examples.py b/libs/oci/tutorials/01-getting-started/code/auth_examples.py
index a80c3a9f..64a74271 100644
--- a/libs/oci/tutorials/01-getting-started/code/auth_examples.py
+++ b/libs/oci/tutorials/01-getting-started/code/auth_examples.py
@@ -18,8 +18,8 @@ def example_api_key():
         model_id=MODEL_ID,
         service_endpoint=SERVICE_ENDPOINT,
         compartment_id=COMPARTMENT_ID,
-        auth_type="API_KEY",       # Optional, this is the default
-        auth_profile="DEFAULT",     # Optional, uses DEFAULT profile
+        auth_type="API_KEY",  # Optional, this is the default
+        auth_profile="DEFAULT",  # Optional, uses DEFAULT profile
     )
     return llm.invoke("Hello!")
 
diff --git a/libs/oci/tutorials/01-getting-started/code/basic_chat.py b/libs/oci/tutorials/01-getting-started/code/basic_chat.py
index 5a28e749..8b943bf9 100644
--- a/libs/oci/tutorials/01-getting-started/code/basic_chat.py
+++ b/libs/oci/tutorials/01-getting-started/code/basic_chat.py
@@ -15,7 +15,7 @@
     model_kwargs={
         "temperature": 0.7,
         "max_tokens": 500,
-    }
+    },
 )
 
 # Simple invocation
diff --git a/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py b/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py
index c3a34ef0..8947ee36 100644
--- a/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py
+++ b/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py
@@ -29,11 +29,7 @@ def analyze_pdf(pdf_path: str, prompt: str):
     message = HumanMessage(
         content=[
             {"type": "text", "text": prompt},
-            {
-                "type": "media",
-                "data": pdf_data,
-                "mime_type": "application/pdf"
-            },
+            {"type": "media", "data": pdf_data, "mime_type": "application/pdf"},
         ]
     )
 
@@ -55,7 +51,7 @@ def extract_key_data(pdf_path: str):
         "1. All dates mentioned "
         "2. All monetary amounts "
         "3. Names of parties involved "
-        "Format as a structured list."
+        "Format as a structured list.",
     )
 
 
diff --git a/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py b/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py
index ad506759..b778d92e 100644
--- a/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py
+++ b/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py
@@ -20,7 +20,7 @@ def analyze_video(video_path: str, prompt: str):
         compartment_id=COMPARTMENT_ID,
         model_kwargs={
             "max_tokens": 2000,  # Videos may need longer responses
-        }
+        },
     )
 
     # Load and encode video
@@ -41,11 +41,7 @@ def analyze_video(video_path: str, prompt: str):
     message = HumanMessage(
         content=[
             {"type": "text", "text": prompt},
-            {
-                "type": "media",
-                "data": video_data,
-                "mime_type": mime_type
-            },
+            {"type": "media", "data": video_data, "mime_type": mime_type},
         ]
     )
 
@@ -58,7 +54,7 @@ def describe_video(video_path: str):
     return analyze_video(
         video_path,
         "Describe what's happening in this video. "
-        "Include: actions, people/objects, setting, and timeline of events."
+        "Include: actions, people/objects, setting, and timeline of events.",
     )
 
 
@@ -68,7 +64,7 @@ def extract_key_moments(video_path: str):
         video_path,
         "Identify and describe the key moments in this video. "
         "For each moment, provide: timestamp (if visible), what happens, "
-        "and why it's significant."
+        "and why it's significant.",
     )
 
 
@@ -77,7 +73,7 @@ def check_for_safety_issues(video_path: str):
     return analyze_video(
         video_path,
         "Analyze this video for any safety hazards or compliance issues. "
-        "List any concerns found with descriptions."
+        "List any concerns found with descriptions.",
     )
 
 
diff --git a/libs/oci/tutorials/03-building-ai-agents/code/basic_agent.py b/libs/oci/tutorials/03-building-ai-agents/code/basic_agent.py
index 0a134678..2ad4f39d 100644
--- a/libs/oci/tutorials/03-building-ai-agents/code/basic_agent.py
+++ b/libs/oci/tutorials/03-building-ai-agents/code/basic_agent.py
@@ -62,9 +62,9 @@ def main():
     )
 
     # Run the agent
-    result = agent.invoke({
-        "messages": [HumanMessage(content="What's the weather and time in Tokyo?")]
-    })
+    result = agent.invoke(
+        {"messages": [HumanMessage(content="What's the weather and time in Tokyo?")]}
+    )
 
     # Print all messages to see the agent's reasoning
     print("Agent Execution Trace:")
diff --git a/libs/oci/tutorials/03-building-ai-agents/code/human_in_loop.py b/libs/oci/tutorials/03-building-ai-agents/code/human_in_loop.py
index f7cd9e93..ab83cab1 100644
--- a/libs/oci/tutorials/03-building-ai-agents/code/human_in_loop.py
+++ b/libs/oci/tutorials/03-building-ai-agents/code/human_in_loop.py
@@ -65,9 +65,13 @@ def main():
     # User requests an action
     print("User: Send an email to john@example.com about the meeting tomorrow")
     result = agent.invoke(
-        {"messages": [HumanMessage(
-            content="Send an email to john@example.com saying 'Meeting at 10am'"
-        )]},
+        {
+            "messages": [
+                HumanMessage(
+                    content="Send an email to john@example.com saying 'Meeting at 10am'"
+                )
+            ]
+        },
         config=config,
     )
 
diff --git a/libs/oci/tutorials/04-tool-calling-mastery/code/basic_tools.py b/libs/oci/tutorials/04-tool-calling-mastery/code/basic_tools.py
index fe8aed8e..cb4e5af6 100644
--- a/libs/oci/tutorials/04-tool-calling-mastery/code/basic_tools.py
+++ b/libs/oci/tutorials/04-tool-calling-mastery/code/basic_tools.py
@@ -68,12 +68,12 @@ def main():
         print(f"Arguments: {tool_call['args']}")
 
         # Execute the tool
-        result = get_weather.invoke(tool_call['args'])
+        result = get_weather.invoke(tool_call["args"])
         print(f"Tool result: {result}")
 
         # Send result back to model
         messages.append(response)
-        messages.append(ToolMessage(content=result, tool_call_id=tool_call['id']))
+        messages.append(ToolMessage(content=result, tool_call_id=tool_call["id"]))
 
         final_response = llm_with_tools.invoke(messages)
         print(f"Final answer: {final_response.content}")
@@ -89,11 +89,11 @@ def main():
         print(f"Tool requested: {tool_call['name']}")
         print(f"Arguments: {tool_call['args']}")
 
-        result = calculate.invoke(tool_call['args'])
+        result = calculate.invoke(tool_call["args"])
         print(f"Tool result: {result}")
 
         messages.append(response)
-        messages.append(ToolMessage(content=result, tool_call_id=tool_call['id']))
+        messages.append(ToolMessage(content=result, tool_call_id=tool_call["id"]))
 
         final_response = llm_with_tools.invoke(messages)
         print(f"Final answer: {final_response.content}")
diff --git a/libs/oci/tutorials/04-tool-calling-mastery/code/parallel_tools.py b/libs/oci/tutorials/04-tool-calling-mastery/code/parallel_tools.py
index 2a0132c1..03a8cec1 100644
--- a/libs/oci/tutorials/04-tool-calling-mastery/code/parallel_tools.py
+++ b/libs/oci/tutorials/04-tool-calling-mastery/code/parallel_tools.py
@@ -47,9 +47,9 @@ def execute_tools(tool_calls: list, tools_dict: dict) -> list:
     """Execute multiple tool calls and return ToolMessages."""
     results = []
     for tc in tool_calls:
-        tool_func = tools_dict[tc['name']]
-        result = tool_func.invoke(tc['args'])
-        results.append(ToolMessage(content=result, tool_call_id=tc['id']))
+        tool_func = tools_dict[tc["name"]]
+        result = tool_func.invoke(tc["args"])
+        results.append(ToolMessage(content=result, tool_call_id=tc["id"]))
     return results
 
 
@@ -75,9 +75,11 @@ def main():
     print("Query: Tell me about weather, time, and population of both cities")
     print("-" * 60)
 
-    messages = [HumanMessage(
-        content="Tell me the weather, time, and population of Chicago and New York."
-    )]
+    messages = [
+        HumanMessage(
+            content="Tell me the weather, time, and population of Chicago and New York."
+        )
+    ]
 
     response = llm_with_tools.invoke(messages)
 
diff --git a/libs/oci/tutorials/04-tool-calling-mastery/code/tool_workflows.py b/libs/oci/tutorials/04-tool-calling-mastery/code/tool_workflows.py
index 4a7dcfdf..59e81e17 100644
--- a/libs/oci/tutorials/04-tool-calling-mastery/code/tool_workflows.py
+++ b/libs/oci/tutorials/04-tool-calling-mastery/code/tool_workflows.py
@@ -62,10 +62,10 @@ def run_workflow(
         # Execute tools
         messages.append(response)
         for tc in response.tool_calls:
-            tool_func = tools_dict[tc['name']]
-            result = tool_func.invoke(tc['args'])
+            tool_func = tools_dict[tc["name"]]
+            result = tool_func.invoke(tc["args"])
             print(f"  {tc['name']} -> {result[:60]}...")
-            messages.append(ToolMessage(content=result, tool_call_id=tc['id']))
+            messages.append(ToolMessage(content=result, tool_call_id=tc["id"]))
 
     return "Max iterations reached"
 
@@ -77,7 +77,7 @@ def main():
         service_endpoint=SERVICE_ENDPOINT,
         compartment_id=COMPARTMENT_ID,
         max_sequential_tool_calls=10,  # Allow multi-step workflows
-        tool_result_guidance=True,     # Help model use results
+        tool_result_guidance=True,  # Help model use results
     )
 
     tools = [search_articles, get_article_content, summarize_text, save_research_note]
@@ -89,10 +89,12 @@ def main():
     print("Request: Research AI in healthcare, summarize findings, and save notes")
     print("=" * 60)
 
-    messages = [HumanMessage(
-        content="Research AI in healthcare. Get the first article, "
-        "summarize it, and save a note with the key findings."
-    )]
+    messages = [
+        HumanMessage(
+            content="Research AI in healthcare. Get the first article, "
+            "summarize it, and save a note with the key findings."
+        )
+    ]
 
     final_answer = run_workflow(llm_with_tools, messages, tools_dict)
 
diff --git a/libs/oci/tutorials/05-structured-output/code/data_classification.py b/libs/oci/tutorials/05-structured-output/code/data_classification.py
index 17713c21..507ab860 100644
--- a/libs/oci/tutorials/05-structured-output/code/data_classification.py
+++ b/libs/oci/tutorials/05-structured-output/code/data_classification.py
@@ -73,14 +73,12 @@ def main():
         Estimated time to resolution: 4 hours.
         Contact the SRE team for updates.
         """,
-
         """
         Q4 2025 Financial Report
         Revenue increased by 15% compared to Q3. Operating expenses
         remained stable. Net profit margin improved to 12%.
         Recommend continued investment in R&D and marketing.
         """,
-
         """
         Employee Handbook Update
         Section 5.3 regarding remote work policy has been updated.
diff --git a/libs/oci/tutorials/05-structured-output/code/pydantic_schemas.py b/libs/oci/tutorials/05-structured-output/code/pydantic_schemas.py
index a29b5270..73af6cf6 100644
--- a/libs/oci/tutorials/05-structured-output/code/pydantic_schemas.py
+++ b/libs/oci/tutorials/05-structured-output/code/pydantic_schemas.py
@@ -15,6 +15,7 @@
 # Simple schema
 class Contact(BaseModel):
     """A contact with name and email."""
+
     name: str = Field(description="The person's full name")
     email: str = Field(description="The email address")
     phone: Optional[str] = Field(default=None, description="Phone number if available")
@@ -23,6 +24,7 @@ class Contact(BaseModel):
 # Nested schema
 class Address(BaseModel):
     """A physical address."""
+
     street: str = Field(description="Street address")
     city: str = Field(description="City name")
     state: str = Field(description="State or province")
@@ -31,6 +33,7 @@ class Address(BaseModel):
 
 class Company(BaseModel):
     """A company with address."""
+
     name: str = Field(description="Company name")
     industry: str = Field(description="Industry sector")
     headquarters: Address = Field(description="Main office location")
@@ -46,6 +49,7 @@ class Sentiment(str, Enum):
 
 class ReviewAnalysis(BaseModel):
     """Analysis of a product review."""
+
     summary: str = Field(description="Brief summary")
     sentiment: Sentiment = Field(description="Overall sentiment")
     keywords: List[str] = Field(description="Key topics")
diff --git a/libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py b/libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py
index d70077c7..cbedffb3 100644
--- a/libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py
+++ b/libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py
@@ -115,7 +115,9 @@ def demo_custom_deployment():
     print("=" * 50)
 
     # Replace with your endpoint
-    endpoint = "https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict"
+    endpoint = (
+        "https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict"
+    )
 
     chat = CustomFormatDeployment(
         endpoint=endpoint,
diff --git a/libs/oci/tutorials/07-async-for-production/code/batch_processing.py b/libs/oci/tutorials/07-async-for-production/code/batch_processing.py
index 650df118..b9478772 100644
--- a/libs/oci/tutorials/07-async-for-production/code/batch_processing.py
+++ b/libs/oci/tutorials/07-async-for-production/code/batch_processing.py
@@ -29,9 +29,7 @@ async def limited_invoke(prompt: str, index: int):
                 return (index, f"Error: {e}")
 
     # Create tasks for all prompts
-    tasks = [
-        limited_invoke(p, i) for i, p in enumerate(prompts)
-    ]
+    tasks = [limited_invoke(p, i) for i, p in enumerate(prompts)]
 
     # Process all with limited concurrency
     completed = await asyncio.gather(*tasks)
@@ -53,8 +51,16 @@ async def main():
     prompts = [
         f"In one sentence, what is {topic}?"
         for topic in [
-            "Python", "JavaScript", "Rust", "Go", "TypeScript",
-            "Java", "C++", "Swift", "Kotlin", "Ruby"
+            "Python",
+            "JavaScript",
+            "Rust",
+            "Go",
+            "TypeScript",
+            "Java",
+            "C++",
+            "Swift",
+            "Kotlin",
+            "Ruby",
         ]
     ]
 
diff --git a/libs/oci/tutorials/07-async-for-production/code/fastapi_app.py b/libs/oci/tutorials/07-async-for-production/code/fastapi_app.py
index 7e7a3379..afc01e2e 100644
--- a/libs/oci/tutorials/07-async-for-production/code/fastapi_app.py
+++ b/libs/oci/tutorials/07-async-for-production/code/fastapi_app.py
@@ -109,4 +109,5 @@ async def chat_batch(request: BatchRequest):
 # Example usage when running directly
 if __name__ == "__main__":
     import uvicorn
+
     uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/libs/oci/tutorials/10-embeddings/code/image_embeddings.py b/libs/oci/tutorials/10-embeddings/code/image_embeddings.py
index b381af69..41cc6889 100644
--- a/libs/oci/tutorials/10-embeddings/code/image_embeddings.py
+++ b/libs/oci/tutorials/10-embeddings/code/image_embeddings.py
@@ -36,6 +36,7 @@ def main():
 
     # Check if images exist (demo mode)
     import os
+
     images_exist = all(os.path.exists(p) for p in image_paths)
 
     if not images_exist:
diff --git a/libs/oci/tutorials/10-embeddings/code/rag_example.py b/libs/oci/tutorials/10-embeddings/code/rag_example.py
index f3517f86..19d73ebe 100644
--- a/libs/oci/tutorials/10-embeddings/code/rag_example.py
+++ b/libs/oci/tutorials/10-embeddings/code/rag_example.py
@@ -24,8 +24,7 @@ def retrieve_context(
 
     # Calculate similarities
     similarities = [
-        (i, cosine_similarity(query_vector, dv))
-        for i, dv in enumerate(doc_vectors)
+        (i, cosine_similarity(query_vector, dv)) for i, dv in enumerate(doc_vectors)
     ]
 
     # Sort by similarity (descending)

From d4c218de0946f4a3ad39ef88b0b8cf51769b5f02 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Thu, 19 Feb 2026 13:49:19 -0500
Subject: [PATCH 08/16] Restore original README.md content

---
 README.md | 121 +++++++++++++-----------------------------------------
 1 file changed, 29 insertions(+), 92 deletions(-)

diff --git a/README.md b/README.md
index 98732a0f..15252252 100644
--- a/README.md
+++ b/README.md
@@ -1,127 +1,64 @@
-# LangChain Oracle
+# 🦜️🔗 LangChain 🤝 Oracle
 
 [![PyPI - langchain-oci](https://img.shields.io/pypi/v/langchain-oci?label=langchain-oci)](https://pypi.org/project/langchain-oci/)
 [![PyPI - langchain-oracledb](https://img.shields.io/pypi/v/langchain-oracledb?label=langchain-oracledb)](https://pypi.org/project/langchain-oracledb/)
 [![Python versions](https://img.shields.io/pypi/pyversions/langchain-oci)](https://pypi.org/project/langchain-oci/)
 [![License](https://img.shields.io/badge/License-UPL%201.0-green)](https://opensource.org/licenses/UPL)
 
-Official LangChain integrations for [Oracle Cloud Infrastructure (OCI)](https://cloud.oracle.com/) and [Oracle AI Vector Search](https://www.oracle.com/database/ai-vector-search/).
+Welcome to the official repository for LangChain integration with [Oracle Cloud Infrastructure (OCI)](https://cloud.oracle.com/) and [Oracle AI Vector Search](https://www.oracle.com/database/ai-vector-search/). This project provides native LangChain components for interacting with Oracle's AI services—providing support for **OCI Generative AI**, **OCI Data Science** and **Oracle AI Vector Search**.
 
-## Packages
+## Features
 
-| Package | Description | Install |
-|---------|-------------|---------|
-| [**langchain-oci**](./libs/oci/) | OCI Generative AI & Data Science | `pip install langchain-oci` |
-| [**langchain-oracledb**](./libs/oracledb/) | Oracle AI Vector Search | `pip install langchain-oracledb` |
+- **LLMs**: Includes LLM classes for OCI services like [Generative AI](https://cloud.oracle.com/ai-services/generative-ai) and [ModelDeployment Endpoints](https://cloud.oracle.com/ai-services/model-deployment), allowing you to leverage their language models within LangChain.
+- **Agents**: Includes Runnables to support [Oracle Generative AI Agents](https://www.oracle.com/artificial-intelligence/generative-ai/agents/), allowing you to leverage Generative AI Agents within LangChain and LangGraph.
+- **Vector Search**: Offers native integration with [Oracle AI Vector Search](https://www.oracle.com/database/ai-vector-search/) through a LangChain-compatible components. This enables pipelines that can:
+    - Load the documents from various sources using `OracleDocLoader`
+    - Summarize content within/outside the database using `OracleSummary`
+    - Generate embeddings within/outside the database using `OracleEmbeddings`
+    - Chunk according to different requirements using Advanced Oracle Capabilities from `OracleTextSplitter`
+    - Store, index, and query vectors using `OracleVS`
+- **More to come**: This repository will continue to expand and offer additional components for various OCI and Oracle AI services as development progresses.
 
----
-
-## langchain-oci Features
-
-Full-featured LangChain integration for OCI Generative AI services.
-
-### Chat Models & Providers
-
-| Provider | Models | Vision | Tool Calling |
-|----------|--------|--------|--------------|
-| **Meta** | Llama 3.2, 3.3, 4 | ✅ | ✅ (parallel in Llama 4) |
-| **Google** | Gemini 2.0/2.5 Flash | ✅ | ✅ |
-| **xAI** | Grok 4 | ✅ | ✅ |
-| **Cohere** | Command R+, Command A | ✅ (V2) | ✅ |
-| **OpenAI** | GPT-4, o1 | - | ✅ |
-
-### Key Features
-
-| Feature | Description |
-|---------|-------------|
-| **Vision & Multimodal** | 13+ vision models, Gemini PDF/video/audio support |
-| **AI Agents** | `create_oci_agent()` with checkpointing & human-in-the-loop |
-| **Tool Calling** | Parallel tools, infinite loop detection, `tool_result_guidance` |
-| **Structured Output** | Pydantic schemas, `json_mode`, `json_schema` |
-| **Async Support** | `ainvoke()`, `astream()`, `abatch()` |
-| **Embeddings** | Text & image embeddings in same vector space |
-
-### Quick Example
-
-```python
-from langchain_oci import ChatOCIGenAI
-
-llm = ChatOCIGenAI(
-    model_id="meta.llama-3.3-70b-instruct",
-    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
-    compartment_id="ocid1.compartment.oc1..xxx",
-)
-
-response = llm.invoke("Hello!")
-print(response.content)
-```
-
-**[See full documentation →](./libs/oci/)**
-
-**[Explore tutorials →](./libs/oci/tutorials/)**
+> This project merges and replaces earlier OCI and Oracle AI Vector Search integrations from the `langchain-community` repository and unifies contributions from Oracle teams.
+> All integrations in this package assume that you have the credentials setup to connect with oci and database services.
 
 ---
 
-## langchain-oracledb Features
-
-Native integration with Oracle AI Vector Search.
-
-| Component | Description |
-|-----------|-------------|
-| `OracleVS` | Vector store with similarity search |
-| `OracleDocLoader` | Document loading from various sources |
-| `OracleEmbeddings` | In-database embedding generation |
-| `OracleTextSplitter` | Advanced text chunking |
-| `OracleSummary` | In-database summarization |
-
-### Quick Example
-
-```python
-from langchain_oracledb import OracleVS
+## Installation
 
-vectorstore = OracleVS(
-    client=connection,
-    embedding_function=embeddings,
-    table_name="my_vectors",
-)
+For OCI services:
 
-results = vectorstore.similarity_search("search query", k=5)
+```bash
+python -m pip install -U langchain-oci
 ```
 
-**[See full documentation →](./libs/oracledb/)**
-
----
-
-## Installation
+For Oracle AI Vector Search services:
 
 ```bash
-# OCI Generative AI
-pip install langchain-oci oci
-
-# Oracle AI Vector Search
-pip install langchain-oracledb oracledb
+python -m pip install -U langchain-oracledb
 ```
 
 ---
 
-## Documentation
+## Quick Start
 
-- **[langchain-oci Documentation](./libs/oci/README.md)** - Chat models, embeddings, agents
-- **[langchain-oci Tutorials](./libs/oci/tutorials/)** - Step-by-step learning path
-- **[langchain-oracledb Documentation](./libs/oracledb/README.md)** - Vector search integration
+This repository includes three main integration categories. For detailed information, please refer to the respective libraries:
 
----
+- [OCI Generative AI](https://github.com/oracle/langchain-oracle/tree/main/libs/oci)
+- [OCI Data Science (Model Deployment)](https://github.com/oracle/langchain-oracle/tree/main/libs/oci)
+- [Oracle AI Vector Search](https://github.com/oracle/langchain-oracle/tree/main/libs/oracledb)
 
 ## Contributing
 
-This project welcomes contributions from the community. Before submitting a pull request, please [review our contribution guide](./CONTRIBUTING.md).
+This project welcomes contributions from the community. Before submitting a pull request, please [review our contribution guide](./CONTRIBUTING.md)
 
 ## Security
 
-Please consult the [security guide](./SECURITY.md) for our responsible security vulnerability disclosure process.
+Please consult the [security guide](./SECURITY.md) for our responsible security vulnerability disclosure process
 
 ## License
 
 Copyright (c) 2025 Oracle and/or its affiliates.
 
-Released under the Universal Permissive License v1.0 as shown at <https://oss.oracle.com/licenses/upl/>
+Released under the Universal Permissive License v1.0 as shown at
+<https://oss.oracle.com/licenses/upl/>

From 82613f382f40ff4abebc289edda5290ee9bcc9d1 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Fri, 20 Feb 2026 09:57:31 -0500
Subject: [PATCH 09/16] Update Gemini model references from 2.0 to 2.5

Replace all references to google.gemini-2.0-flash with
google.gemini-2.5-flash to match currently available models
in OCI Generative AI.

Files updated:
- README.md
- docs/API_REFERENCE.md
- docs/MODELS.md
- tutorials/01-getting-started/README.md
- tutorials/02-vision-and-multimodal/README.md
- tutorials/02-vision-and-multimodal/code/*
- tutorials/09-provider-deep-dive/README.md
- tutorials/09-provider-deep-dive/code/*
- tests/integration_tests/chat_models/test_gemini_provider.py
---
 libs/oci/README.md                                 |  4 ++--
 libs/oci/docs/API_REFERENCE.md                     |  2 +-
 libs/oci/docs/MODELS.md                            | 14 +++++++-------
 .../chat_models/test_gemini_provider.py            | 10 +++++-----
 libs/oci/tutorials/01-getting-started/README.md    |  2 +-
 .../tutorials/02-vision-and-multimodal/README.md   |  8 ++++----
 .../code/pdf_processing.py                         |  2 +-
 .../code/video_analysis.py                         |  2 +-
 libs/oci/tutorials/09-provider-deep-dive/README.md | 14 +++++++-------
 .../09-provider-deep-dive/code/gemini_examples.py  | 10 +++++-----
 .../code/provider_comparison.py                    | 12 ++++++------
 11 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/libs/oci/README.md b/libs/oci/README.md
index efe51f2c..4a48a870 100644
--- a/libs/oci/README.md
+++ b/libs/oci/README.md
@@ -203,7 +203,7 @@ import base64
 from langchain_core.messages import HumanMessage
 from langchain_oci import ChatOCIGenAI
 
-llm = ChatOCIGenAI(model_id="google.gemini-2.0-flash", ...)
+llm = ChatOCIGenAI(model_id="google.gemini-2.5-flash", ...)
 
 # PDF
 with open("doc.pdf", "rb") as f:
@@ -489,7 +489,7 @@ response = chat.invoke("Hello!")
 ### Google Gemini
 
 ```python
-"google.gemini-2.0-flash"       # Fast, multimodal
+"google.gemini-2.5-flash"       # Fast, multimodal
 "google.gemini-2.5-flash"       # Latest
 "google.gemini-2.5-pro"         # Most capable
 ```
diff --git a/libs/oci/docs/API_REFERENCE.md b/libs/oci/docs/API_REFERENCE.md
index 8f7e2b41..cb7866a8 100644
--- a/libs/oci/docs/API_REFERENCE.md
+++ b/libs/oci/docs/API_REFERENCE.md
@@ -278,7 +278,7 @@ List of vision-capable model IDs.
 from langchain_oci import VISION_MODELS
 
 print(VISION_MODELS)
-# ['meta.llama-3.2-90b-vision-instruct', 'google.gemini-2.0-flash', ...]
+# ['meta.llama-3.2-90b-vision-instruct', 'google.gemini-2.5-flash', ...]
 ```
 
 ### IMAGE_EMBEDDING_MODELS
diff --git a/libs/oci/docs/MODELS.md b/libs/oci/docs/MODELS.md
index fdca575d..d748bab1 100644
--- a/libs/oci/docs/MODELS.md
+++ b/libs/oci/docs/MODELS.md
@@ -26,7 +26,7 @@ Complete reference of models available in OCI Generative AI.
 
 | Model ID | Type | Features | Context |
 |----------|------|----------|---------|
-| `google.gemini-2.0-flash` | Multimodal | PDF, video, audio, image | 1M |
+| `google.gemini-2.5-flash` | Multimodal | PDF, video, audio, image | 1M |
 | `google.gemini-2.5-flash` | Multimodal | PDF, video, audio, image | 1M |
 | `google.gemini-2.5-pro` | Multimodal | Most capable | 1M |
 
@@ -159,13 +159,13 @@ Check [OCI documentation](https://docs.oracle.com/en-us/iaas/Content/generative-
 |----------|-------------------|
 | General chat | `meta.llama-3.3-70b-instruct` |
 | Image analysis | `meta.llama-3.2-90b-vision-instruct` |
-| PDF/document processing | `google.gemini-2.0-flash` |
-| Video understanding | `google.gemini-2.0-flash` |
-| Audio transcription | `google.gemini-2.0-flash` |
+| PDF/document processing | `google.gemini-2.5-flash` |
+| Video understanding | `google.gemini-2.5-flash` |
+| Audio transcription | `google.gemini-2.5-flash` |
 | Tool-heavy workflows | `meta.llama-4-scout-17b-16e-instruct` |
 | RAG with citations | `cohere.command-r-plus` |
 | Complex reasoning | `xai.grok-4-fast-reasoning` |
-| Fast responses | `google.gemini-2.0-flash` |
+| Fast responses | `google.gemini-2.5-flash` |
 | Embeddings (text) | `cohere.embed-english-v3.0` |
 | Embeddings (multimodal) | `cohere.embed-v4.0` |
 
@@ -173,7 +173,7 @@ Check [OCI documentation](https://docs.oracle.com/en-us/iaas/Content/generative-
 
 | Priority | Model |
 |----------|-------|
-| Lowest latency | `google.gemini-2.0-flash` |
-| Highest throughput | `google.gemini-2.0-flash` |
+| Lowest latency | `google.gemini-2.5-flash` |
+| Highest throughput | `google.gemini-2.5-flash` |
 | Best quality | `meta.llama-3.3-70b-instruct`, `cohere.command-r-plus` |
 | Best for tools | `meta.llama-4-scout-17b-16e-instruct` |
diff --git a/libs/oci/tests/integration_tests/chat_models/test_gemini_provider.py b/libs/oci/tests/integration_tests/chat_models/test_gemini_provider.py
index 3e8bae7e..725f75c2 100644
--- a/libs/oci/tests/integration_tests/chat_models/test_gemini_provider.py
+++ b/libs/oci/tests/integration_tests/chat_models/test_gemini_provider.py
@@ -59,7 +59,7 @@ def test_gemini_basic_invoke() -> None:
         pytest.skip("Set OCI_RUN_GEMINI_INTEGRATION=1 to run this test")
 
     cfg = _get_config()
-    model_id = os.getenv("OCI_MODEL_ID", "google.gemini-2.0-flash-001")
+    model_id = os.getenv("OCI_MODEL_ID", "google.gemini-2.5-flash")
 
     llm = ChatOCIGenAI(
         model_id=model_id,
@@ -83,7 +83,7 @@ def test_gemini_max_output_tokens_mapping() -> None:
         pytest.skip("Set OCI_RUN_GEMINI_INTEGRATION=1 to run this test")
 
     cfg = _get_config()
-    model_id = os.getenv("OCI_MODEL_ID", "google.gemini-2.0-flash-001")
+    model_id = os.getenv("OCI_MODEL_ID", "google.gemini-2.5-flash")
 
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always")
@@ -116,7 +116,7 @@ def test_gemini_both_tokens_params() -> None:
         pytest.skip("Set OCI_RUN_GEMINI_INTEGRATION=1 to run this test")
 
     cfg = _get_config()
-    model_id = os.getenv("OCI_MODEL_ID", "google.gemini-2.0-flash-001")
+    model_id = os.getenv("OCI_MODEL_ID", "google.gemini-2.5-flash")
 
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always")
@@ -153,7 +153,7 @@ def test_gemini_streaming() -> None:
         pytest.skip("Set OCI_RUN_GEMINI_INTEGRATION=1 to run this test")
 
     cfg = _get_config()
-    model_id = os.getenv("OCI_MODEL_ID", "google.gemini-2.0-flash-001")
+    model_id = os.getenv("OCI_MODEL_ID", "google.gemini-2.5-flash")
 
     llm = ChatOCIGenAI(
         model_id=model_id,
@@ -182,7 +182,7 @@ def test_gemini_streaming_with_max_output_tokens() -> None:
         pytest.skip("Set OCI_RUN_GEMINI_INTEGRATION=1 to run this test")
 
     cfg = _get_config()
-    model_id = os.getenv("OCI_MODEL_ID", "google.gemini-2.0-flash-001")
+    model_id = os.getenv("OCI_MODEL_ID", "google.gemini-2.5-flash")
 
     with warnings.catch_warnings(record=True) as w:
         warnings.simplefilter("always")
diff --git a/libs/oci/tutorials/01-getting-started/README.md b/libs/oci/tutorials/01-getting-started/README.md
index b43d086b..4f4e75ea 100644
--- a/libs/oci/tutorials/01-getting-started/README.md
+++ b/libs/oci/tutorials/01-getting-started/README.md
@@ -178,7 +178,7 @@ llm = ChatOCIGenAI(
 "cohere.command-a-03-2025"              # Latest, with vision
 
 # Google Gemini models
-"google.gemini-2.0-flash"               # Fast, multimodal
+"google.gemini-2.5-flash"               # Fast, multimodal
 
 # xAI Grok models
 "xai.grok-4"                            # Reasoning and vision
diff --git a/libs/oci/tutorials/02-vision-and-multimodal/README.md b/libs/oci/tutorials/02-vision-and-multimodal/README.md
index b5c758c3..8b2e73bf 100644
--- a/libs/oci/tutorials/02-vision-and-multimodal/README.md
+++ b/libs/oci/tutorials/02-vision-and-multimodal/README.md
@@ -226,7 +226,7 @@ from langchain_core.messages import HumanMessage
 from langchain_oci import ChatOCIGenAI
 
 llm = ChatOCIGenAI(
-    model_id="google.gemini-2.0-flash",
+    model_id="google.gemini-2.5-flash",
     service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
     compartment_id="ocid1.compartment.oc1..xxx",
 )
@@ -278,7 +278,7 @@ from langchain_core.messages import HumanMessage
 from langchain_oci import ChatOCIGenAI
 
 llm = ChatOCIGenAI(
-    model_id="google.gemini-2.0-flash",
+    model_id="google.gemini-2.5-flash",
     service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
     compartment_id="ocid1.compartment.oc1..xxx",
 )
@@ -330,7 +330,7 @@ from langchain_core.messages import HumanMessage
 from langchain_oci import ChatOCIGenAI
 
 llm = ChatOCIGenAI(
-    model_id="google.gemini-2.0-flash",
+    model_id="google.gemini-2.5-flash",
     service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
     compartment_id="ocid1.compartment.oc1..xxx",
 )
@@ -376,7 +376,7 @@ Best for: Multimodal (PDF, video, audio), complex reasoning
 
 ```python
 llm = ChatOCIGenAI(
-    model_id="google.gemini-2.0-flash",
+    model_id="google.gemini-2.5-flash",
     service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
     compartment_id="ocid1.compartment.oc1..xxx",
 )
diff --git a/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py b/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py
index 8947ee36..0fe75ffb 100644
--- a/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py
+++ b/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py
@@ -16,7 +16,7 @@ def analyze_pdf(pdf_path: str, prompt: str):
     """Analyze a PDF document with Gemini."""
     # Create Gemini model
     llm = ChatOCIGenAI(
-        model_id="google.gemini-2.0-flash",
+        model_id="google.gemini-2.5-flash",
         service_endpoint=SERVICE_ENDPOINT,
         compartment_id=COMPARTMENT_ID,
     )
diff --git a/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py b/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py
index b778d92e..1119cf89 100644
--- a/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py
+++ b/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py
@@ -15,7 +15,7 @@
 def analyze_video(video_path: str, prompt: str):
     """Analyze a video file with Gemini."""
     llm = ChatOCIGenAI(
-        model_id="google.gemini-2.0-flash",
+        model_id="google.gemini-2.5-flash",
         service_endpoint=SERVICE_ENDPOINT,
         compartment_id=COMPARTMENT_ID,
         model_kwargs={
diff --git a/libs/oci/tutorials/09-provider-deep-dive/README.md b/libs/oci/tutorials/09-provider-deep-dive/README.md
index a0837c99..5acb1fd0 100644
--- a/libs/oci/tutorials/09-provider-deep-dive/README.md
+++ b/libs/oci/tutorials/09-provider-deep-dive/README.md
@@ -83,7 +83,7 @@ llm = ChatOCIGenAI(model_id="meta.llama-3.3-70b-instruct", ...)
 llm = ChatOCIGenAI(model_id="cohere.command-r-plus", ...)
 
 # Auto-detects GeminiProvider
-llm = ChatOCIGenAI(model_id="google.gemini-2.0-flash", ...)
+llm = ChatOCIGenAI(model_id="google.gemini-2.5-flash", ...)
 ```
 
 ### Manual Override
@@ -212,7 +212,7 @@ Gemini offers advanced multimodal capabilities.
 
 | Model | Features |
 |-------|----------|
-| `google.gemini-2.0-flash` | Fast, multimodal |
+| `google.gemini-2.5-flash` | Fast, multimodal |
 | `google.gemini-2.5-flash` | Latest, multimodal |
 | `google.gemini-2.5-pro` | Most capable |
 
@@ -222,7 +222,7 @@ Gemini offers advanced multimodal capabilities.
 from langchain_oci import ChatOCIGenAI
 
 llm = ChatOCIGenAI(
-    model_id="google.gemini-2.0-flash",
+    model_id="google.gemini-2.5-flash",
     service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
     compartment_id="ocid1.compartment.oc1..xxx",
 )
@@ -238,7 +238,7 @@ import base64
 from langchain_core.messages import HumanMessage
 from langchain_oci import ChatOCIGenAI
 
-llm = ChatOCIGenAI(model_id="google.gemini-2.0-flash", ...)
+llm = ChatOCIGenAI(model_id="google.gemini-2.5-flash", ...)
 
 # Load PDF
 with open("document.pdf", "rb") as f:
@@ -299,7 +299,7 @@ response = llm.invoke([message])
 
 ```python
 llm = ChatOCIGenAI(
-    model_id="google.gemini-2.0-flash",
+    model_id="google.gemini-2.5-flash",
     model_kwargs={
         "max_tokens": 1024,  # Note: max_tokens, not max_output_tokens
         "temperature": 0.7,
@@ -508,7 +508,7 @@ print(response.content)
 if need_vision and not need_pdf:
     model = "meta.llama-3.2-90b-vision-instruct"
 elif need_multimodal:  # PDF, video, audio
-    model = "google.gemini-2.0-flash"
+    model = "google.gemini-2.5-flash"
 
 # For tool-heavy workflows → Llama 4 (parallel tools)
 if many_tools and need_parallel:
@@ -535,7 +535,7 @@ def get_llm_for_task(task_type: str) -> ChatOCIGenAI:
             "model_id": "meta.llama-3.2-90b-vision-instruct",
         },
         "multimodal": {
-            "model_id": "google.gemini-2.0-flash",
+            "model_id": "google.gemini-2.5-flash",
         },
         "rag": {
             "model_id": "cohere.command-r-plus",
diff --git a/libs/oci/tutorials/09-provider-deep-dive/code/gemini_examples.py b/libs/oci/tutorials/09-provider-deep-dive/code/gemini_examples.py
index 1a2684ac..9391c857 100644
--- a/libs/oci/tutorials/09-provider-deep-dive/code/gemini_examples.py
+++ b/libs/oci/tutorials/09-provider-deep-dive/code/gemini_examples.py
@@ -18,7 +18,7 @@ def basic_gemini_chat():
     print("=" * 50)
 
     llm = ChatOCIGenAI(
-        model_id="google.gemini-2.0-flash",
+        model_id="google.gemini-2.5-flash",
         service_endpoint=SERVICE_ENDPOINT,
         compartment_id=COMPARTMENT_ID,
     )
@@ -33,7 +33,7 @@ def pdf_processing():
     print("=" * 50)
 
     llm = ChatOCIGenAI(
-        model_id="google.gemini-2.0-flash",
+        model_id="google.gemini-2.5-flash",
         service_endpoint=SERVICE_ENDPOINT,
         compartment_id=COMPARTMENT_ID,
     )
@@ -77,7 +77,7 @@ def video_analysis():
     print("=" * 50)
 
     llm = ChatOCIGenAI(
-        model_id="google.gemini-2.0-flash",
+        model_id="google.gemini-2.5-flash",
         service_endpoint=SERVICE_ENDPOINT,
         compartment_id=COMPARTMENT_ID,
     )
@@ -110,7 +110,7 @@ def audio_transcription():
     print("=" * 50)
 
     llm = ChatOCIGenAI(
-        model_id="google.gemini-2.0-flash",
+        model_id="google.gemini-2.5-flash",
         service_endpoint=SERVICE_ENDPOINT,
         compartment_id=COMPARTMENT_ID,
     )
@@ -145,7 +145,7 @@ def gemini_parameters():
     # Note: OCI uses max_tokens, not max_output_tokens
     # Example configuration:
     # llm = ChatOCIGenAI(
-    #     model_id="google.gemini-2.0-flash",
+    #     model_id="google.gemini-2.5-flash",
     #     service_endpoint=SERVICE_ENDPOINT,
     #     compartment_id=COMPARTMENT_ID,
     #     model_kwargs={
diff --git a/libs/oci/tutorials/09-provider-deep-dive/code/provider_comparison.py b/libs/oci/tutorials/09-provider-deep-dive/code/provider_comparison.py
index 8bb3d179..6b800e48 100644
--- a/libs/oci/tutorials/09-provider-deep-dive/code/provider_comparison.py
+++ b/libs/oci/tutorials/09-provider-deep-dive/code/provider_comparison.py
@@ -60,13 +60,13 @@ def provider_selection_guide():
     Use this decision tree to pick the right model:
 
     1. Do you need to process PDFs, videos, or audio?
-       YES → google.gemini-2.0-flash or google.gemini-2.5-flash
+       YES → google.gemini-2.5-flash or google.gemini-2.5-flash
 
     2. Do you need parallel tool calling (multiple tools at once)?
        YES → meta.llama-4-scout-17b-16e-instruct
 
     3. Do you need vision (image understanding)?
-       - Fast inference → google.gemini-2.0-flash
+       - Fast inference → google.gemini-2.5-flash
        - High quality → meta.llama-3.2-90b-vision-instruct
        - Reasoning → xai.grok-4
 
@@ -81,7 +81,7 @@ def provider_selection_guide():
        → cohere.command-r-plus (excellent reasoning)
 
     7. Need speed and low latency?
-       → google.gemini-2.0-flash (fastest)
+       → google.gemini-2.5-flash (fastest)
        → meta.llama-4-scout-17b-16e-instruct (fast + tools)
     """
     print(guide)
@@ -97,7 +97,7 @@ def create_task_specific_llm(task: str) -> ChatOCIGenAI:
             "description": "Image analysis and understanding",
         },
         "multimodal": {
-            "model_id": "google.gemini-2.0-flash",
+            "model_id": "google.gemini-2.5-flash",
             "description": "PDF, video, audio, and image processing",
         },
         "rag": {
@@ -117,7 +117,7 @@ def create_task_specific_llm(task: str) -> ChatOCIGenAI:
             "description": "General-purpose assistant",
         },
         "fast": {
-            "model_id": "google.gemini-2.0-flash",
+            "model_id": "google.gemini-2.5-flash",
             "description": "Low-latency responses",
         },
     }
@@ -170,7 +170,7 @@ def feature_matrix():
     # Check specific models
     models_to_check = [
         "meta.llama-3.3-70b-instruct",
-        "google.gemini-2.0-flash",
+        "google.gemini-2.5-flash",
         "cohere.command-r-plus",
         "xai.grok-4",
     ]

From 245cb96fcd03ecf795fd09f713c10fb133fa8219 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Fri, 20 Feb 2026 10:10:04 -0500
Subject: [PATCH 10/16] Add prerequisites and troubleshooting for OpenAI
 Responses API tutorial

- Clarify that OpenAI Responses API must be enabled in tenancy
- Add note about 404 errors when API is not available
- Add troubleshooting entry for /responses endpoint 404
---
 .../tutorials/08-openai-responses-api/README.md    | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/libs/oci/tutorials/08-openai-responses-api/README.md b/libs/oci/tutorials/08-openai-responses-api/README.md
index 47584e8c..13831052 100644
--- a/libs/oci/tutorials/08-openai-responses-api/README.md
+++ b/libs/oci/tutorials/08-openai-responses-api/README.md
@@ -13,13 +13,16 @@ Use OpenAI-compatible models through OCI with conversation persistence and advan
 ## Prerequisites
 
 - Completed [Tutorial 01: Getting Started](../01-getting-started/)
-- Access to OCI Generative AI with OpenAI-compatible models
+- Access to OCI Generative AI with **OpenAI Responses API enabled**
+- OpenAI-compatible models available (e.g., `openai.gpt-4.1`)
 - Additional packages installed
 
 ```bash
 pip install oci-openai langchain-openai langchain-oci
 ```
 
+> **Note:** The OpenAI Responses API is a specific OCI feature that may not be available in all tenancies or regions. If you receive a 404 error, contact your OCI administrator to enable the Responses API, or use `ChatOCIGenAI` with standard models instead.
+
 ## Concepts Covered
 
 | Class/Feature | Description |
@@ -455,3 +458,12 @@ Model 'openai.gpt-4.1' not found
 - Check model is available in your region
 - Verify compartment has GenAI access
 - Use `oci generative-ai model list` to see available models
+
+### 404 Not Found on /responses
+
+```
+openai.NotFoundError: Error code: 404
+```
+- The OpenAI Responses API may not be enabled in your tenancy
+- Contact your OCI administrator to enable the Responses API
+- Alternative: Use `ChatOCIGenAI` with standard models like `meta.llama-3.3-70b-instruct`

From 9351b6896b43e3e608c9d44f2bbd7c9754c51840 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Fri, 20 Feb 2026 10:21:23 -0500
Subject: [PATCH 11/16] Remove Tutorial 08 (OpenAI Responses API)

The OpenAI Responses API (/responses endpoint) is not available
in current OCI tenancies. Users should use ChatOCIGenAI with
standard models instead.
---
 .../08-openai-responses-api/README.md         | 469 ------------------
 .../code/conversation_store.py                | 127 -----
 .../code/openai_compat.py                     | 122 -----
 .../code/tools_and_search.py                  | 146 ------
 libs/oci/tutorials/README.md                  |  14 +-
 5 files changed, 6 insertions(+), 872 deletions(-)
 delete mode 100644 libs/oci/tutorials/08-openai-responses-api/README.md
 delete mode 100644 libs/oci/tutorials/08-openai-responses-api/code/conversation_store.py
 delete mode 100644 libs/oci/tutorials/08-openai-responses-api/code/openai_compat.py
 delete mode 100644 libs/oci/tutorials/08-openai-responses-api/code/tools_and_search.py

diff --git a/libs/oci/tutorials/08-openai-responses-api/README.md b/libs/oci/tutorials/08-openai-responses-api/README.md
deleted file mode 100644
index 13831052..00000000
--- a/libs/oci/tutorials/08-openai-responses-api/README.md
+++ /dev/null
@@ -1,469 +0,0 @@
-# Tutorial 08: OpenAI Responses API
-
-Use OpenAI-compatible models through OCI with conversation persistence and advanced features.
-
-## What You'll Learn
-
-- Configure `ChatOCIOpenAI` for OpenAI Responses API
-- Use conversation stores for persistent memory
-- Authenticate with various OCI methods
-- Access web search and MCP tools
-- Migrate from OpenAI to OCI
-
-## Prerequisites
-
-- Completed [Tutorial 01: Getting Started](../01-getting-started/)
-- Access to OCI Generative AI with **OpenAI Responses API enabled**
-- OpenAI-compatible models available (e.g., `openai.gpt-4.1`)
-- Additional packages installed
-
-```bash
-pip install oci-openai langchain-openai langchain-oci
-```
-
-> **Note:** The OpenAI Responses API is a specific OCI feature that may not be available in all tenancies or regions. If you receive a 404 error, contact your OCI administrator to enable the Responses API, or use `ChatOCIGenAI` with standard models instead.
-
-## Concepts Covered
-
-| Class/Feature | Description |
-|---------------|-------------|
-| `ChatOCIOpenAI` | OCI client for OpenAI Responses API |
-| Conversation Store | Persistent conversation memory |
-| Web Search | Built-in web search tool |
-| Hosted MCP | Model Context Protocol integration |
-
----
-
-## Part 1: What is ChatOCIOpenAI?
-
-`ChatOCIOpenAI` provides access to OpenAI-compatible models through OCI's Generative AI service. It extends LangChain's `ChatOpenAI` with:
-
-- OCI authentication (API keys, session tokens, principals)
-- Conversation stores for persistent memory
-- Regional endpoints across OCI
-- Access to web search and hosted MCP tools
-
-### Architecture
-
-```
-┌─────────────────────────────────────────────────────────┐
-│                   Your Application                       │
-│                                                          │
-│   ┌──────────────────────────────────────────────────┐  │
-│   │              ChatOCIOpenAI                        │  │
-│   │  ┌──────────┐  ┌──────────┐  ┌──────────────┐   │  │
-│   │  │ OCI Auth │  │ Conv.    │  │ OpenAI       │   │  │
-│   │  │ Handler  │  │ Store    │  │ Responses API│   │  │
-│   │  └──────────┘  └──────────┘  └──────────────┘   │  │
-│   └──────────────────────────────────────────────────┘  │
-│                          │                               │
-└──────────────────────────┼───────────────────────────────┘
-                           │
-                           ▼
-              ┌────────────────────────┐
-              │  OCI Generative AI     │
-              │  OpenAI-Compatible     │
-              │  Endpoint              │
-              └────────────────────────┘
-```
-
----
-
-## Part 2: Authentication Setup
-
-`ChatOCIOpenAI` uses the `oci-openai` package for authentication.
-
-### Session Token Authentication
-
-```python
-from oci_openai import OciSessionAuth
-from langchain_oci import ChatOCIOpenAI
-
-# Authenticate with OCI CLI session
-# First run: oci session authenticate --profile-name MY_PROFILE
-auth = OciSessionAuth(profile_name="MY_PROFILE")
-
-client = ChatOCIOpenAI(
-    auth=auth,
-    compartment_id="ocid1.compartment.oc1..xxx",
-    region="us-chicago-1",
-    model="openai.gpt-4.1",
-)
-```
-
-### Resource Principal Authentication
-
-For OCI Functions, Jobs, and other OCI resources:
-
-```python
-from oci_openai import OciResourcePrincipalAuth
-from langchain_oci import ChatOCIOpenAI
-
-auth = OciResourcePrincipalAuth()
-
-client = ChatOCIOpenAI(
-    auth=auth,
-    compartment_id="ocid1.compartment.oc1..xxx",
-    region="us-chicago-1",
-    model="openai.gpt-4.1",
-)
-```
-
-### Instance Principal Authentication
-
-For OCI Compute instances:
-
-```python
-from oci_openai import OciInstancePrincipalAuth
-from langchain_oci import ChatOCIOpenAI
-
-auth = OciInstancePrincipalAuth()
-
-client = ChatOCIOpenAI(
-    auth=auth,
-    compartment_id="ocid1.compartment.oc1..xxx",
-    region="us-chicago-1",
-    model="openai.gpt-4.1",
-)
-```
-
----
-
-## Part 3: Basic Usage
-
-### Simple Invocation
-
-```python
-from oci_openai import OciSessionAuth
-from langchain_oci import ChatOCIOpenAI
-
-client = ChatOCIOpenAI(
-    auth=OciSessionAuth(profile_name="DEFAULT"),
-    compartment_id="ocid1.compartment.oc1..xxx",
-    region="us-chicago-1",
-    model="openai.gpt-4.1",
-)
-
-# Simple message
-response = client.invoke("What is the capital of France?")
-print(response.content)
-```
-
-### With System Message
-
-```python
-messages = [
-    ("system", "You are a helpful translator. Translate user messages to French."),
-    ("human", "Hello, how are you?"),
-]
-
-response = client.invoke(messages)
-print(response.content)  # "Bonjour, comment allez-vous?"
-```
-
-### Prompt Chaining
-
-```python
-from langchain_core.prompts import ChatPromptTemplate
-
-prompt = ChatPromptTemplate.from_messages([
-    ("system", "You are a helpful assistant that translates {input_language} to {output_language}."),
-    ("human", "{input}"),
-])
-
-chain = prompt | client
-
-response = chain.invoke({
-    "input_language": "English",
-    "output_language": "German",
-    "input": "I love programming.",
-})
-print(response.content)  # "Ich liebe Programmieren."
-```
-
----
-
-## Part 4: Conversation Stores
-
-Persist conversations across sessions using OCI Conversation Stores.
-
-### Creating a Conversation Store
-
-First, create a conversation store in OCI Console or via CLI:
-
-```bash
-oci generative-ai conversation-store create \
-    --compartment-id ocid1.compartment.oc1..xxx \
-    --display-name "My Conversation Store" \
-    --region us-chicago-1
-```
-
-### Using Conversation Store
-
-```python
-from oci_openai import OciSessionAuth
-from langchain_oci import ChatOCIOpenAI
-
-# With conversation store for persistent memory
-client = ChatOCIOpenAI(
-    auth=OciSessionAuth(profile_name="DEFAULT"),
-    compartment_id="ocid1.compartment.oc1..xxx",
-    region="us-chicago-1",
-    model="openai.gpt-4.1",
-    conversation_store_id="ocid1.conversationstore.oc1..xxx",
-)
-
-# First conversation
-response1 = client.invoke("My name is Alice.")
-print(response1.content)
-
-# Later conversation - model remembers
-response2 = client.invoke("What is my name?")
-print(response2.content)  # "Your name is Alice."
-```
-
----
-
-## Part 5: Function Calling
-
-Bind tools to the model for function calling.
-
-```python
-from pydantic import BaseModel, Field
-from langchain_oci import ChatOCIOpenAI
-
-class GetWeather(BaseModel):
-    """Get weather for a location."""
-    location: str = Field(..., description="The city and state, e.g. San Francisco, CA")
-
-client = ChatOCIOpenAI(
-    auth=auth,
-    compartment_id="ocid1.compartment.oc1..xxx",
-    region="us-chicago-1",
-    model="openai.gpt-4.1",
-)
-
-# Bind tools
-llm_with_tools = client.bind_tools([GetWeather])
-
-# Model will call the function
-response = llm_with_tools.invoke("What is the weather like in San Francisco?")
-
-# Access tool calls
-for tool_call in response.tool_calls:
-    print(f"Function: {tool_call['name']}")
-    print(f"Arguments: {tool_call['args']}")
-```
-
----
-
-## Part 6: Web Search
-
-Use the built-in web search capability.
-
-```python
-from langchain_oci import ChatOCIOpenAI
-
-client = ChatOCIOpenAI(
-    auth=auth,
-    compartment_id="ocid1.compartment.oc1..xxx",
-    region="us-chicago-1",
-    model="openai.gpt-4.1",
-)
-
-# Enable web search
-web_search_tool = {"type": "web_search_preview"}
-llm_with_search = client.bind_tools([web_search_tool])
-
-# Ask about current events
-response = llm_with_search.invoke("What was a positive news story from today?")
-print(response.content)
-```
-
----
-
-## Part 7: Hosted MCP (Model Context Protocol)
-
-Access external knowledge sources via hosted MCP servers.
-
-```python
-from langchain_oci import ChatOCIOpenAI
-
-client = ChatOCIOpenAI(
-    auth=auth,
-    compartment_id="ocid1.compartment.oc1..xxx",
-    region="us-chicago-1",
-    model="openai.gpt-4.1",
-)
-
-# Bind MCP tool
-mcp_tool = {
-    "type": "mcp",
-    "server_label": "deepwiki",
-    "server_url": "https://mcp.deepwiki.com/mcp",
-    "require_approval": "never",
-}
-
-llm_with_mcp = client.bind_tools([mcp_tool])
-
-# Query external knowledge
-response = llm_with_mcp.invoke(
-    "What transport protocols does the 2025-03-26 version of the MCP "
-    "spec (modelcontextprotocol/modelcontextprotocol) support?"
-)
-print(response.content)
-```
-
----
-
-## Part 8: Endpoint Configuration
-
-Configure custom endpoints for different regions or setups.
-
-### Using Region
-
-```python
-client = ChatOCIOpenAI(
-    auth=auth,
-    compartment_id="ocid1.compartment.oc1..xxx",
-    region="us-chicago-1",  # Automatically constructs endpoint
-    model="openai.gpt-4.1",
-)
-```
-
-### Using Service Endpoint
-
-```python
-client = ChatOCIOpenAI(
-    auth=auth,
-    compartment_id="ocid1.compartment.oc1..xxx",
-    service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
-    model="openai.gpt-4.1",
-)
-```
-
-### Using Base URL
-
-```python
-client = ChatOCIOpenAI(
-    auth=auth,
-    compartment_id="ocid1.compartment.oc1..xxx",
-    base_url="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/v1",
-    model="openai.gpt-4.1",
-)
-```
-
----
-
-## Part 9: Migration from OpenAI
-
-Migrating from OpenAI to OCI is straightforward.
-
-### Before (OpenAI)
-
-```python
-from langchain_openai import ChatOpenAI
-
-client = ChatOpenAI(
-    api_key="sk-...",
-    model="gpt-4",
-)
-
-response = client.invoke("Hello!")
-```
-
-### After (OCI)
-
-```python
-from oci_openai import OciSessionAuth
-from langchain_oci import ChatOCIOpenAI
-
-client = ChatOCIOpenAI(
-    auth=OciSessionAuth(profile_name="DEFAULT"),
-    compartment_id="ocid1.compartment.oc1..xxx",
-    region="us-chicago-1",
-    model="openai.gpt-4.1",  # OCI model name
-)
-
-response = client.invoke("Hello!")
-```
-
-### Key Differences
-
-| Aspect | OpenAI | OCI |
-|--------|--------|-----|
-| Authentication | API key | OCI auth (session, principal) |
-| Model names | `gpt-4` | `openai.gpt-4.1` |
-| Endpoint | OpenAI servers | OCI regional endpoints |
-| Conversation store | N/A | Built-in support |
-
----
-
-## Summary
-
-You learned how to:
-
-- Configure `ChatOCIOpenAI` with various authentication methods
-- Use conversation stores for persistent memory
-- Perform function calling with custom tools
-- Access web search capabilities
-- Integrate hosted MCP servers
-- Migrate from OpenAI to OCI
-
-## Next Steps
-
-- [Tutorial 09: Provider Deep Dive](../09-provider-deep-dive/) - Explore provider-specific features
-- [Tutorial 10: Embeddings](../10-embeddings/) - Text and image embeddings
-
-## API Reference
-
-| Class/Function | Description |
-|----------------|-------------|
-| `ChatOCIOpenAI` | OpenAI Responses API client for OCI |
-| `OciSessionAuth` | Session token authentication |
-| `OciResourcePrincipalAuth` | Resource principal auth |
-| `OciInstancePrincipalAuth` | Instance principal auth |
-
-## Troubleshooting
-
-### Import Error
-
-```
-ImportError: oci-openai not found
-```
-- Install: `pip install oci-openai`
-
-### Authentication Failed
-
-```
-401 Unauthorized
-```
-- Refresh session: `oci session authenticate --profile-name MY_PROFILE`
-- Check profile name matches config
-
-### Conversation Store Not Found
-
-```
-NotAuthorizedOrNotFound: conversation_store_id
-```
-- Verify conversation store OCID is correct
-- Ensure compartment has access to the store
-- Check IAM policies for conversation store operations
-
-### Model Not Available
-
-```
-Model 'openai.gpt-4.1' not found
-```
-- Check model is available in your region
-- Verify compartment has GenAI access
-- Use `oci generative-ai model list` to see available models
-
-### 404 Not Found on /responses
-
-```
-openai.NotFoundError: Error code: 404
-```
-- The OpenAI Responses API may not be enabled in your tenancy
-- Contact your OCI administrator to enable the Responses API
-- Alternative: Use `ChatOCIGenAI` with standard models like `meta.llama-3.3-70b-instruct`
diff --git a/libs/oci/tutorials/08-openai-responses-api/code/conversation_store.py b/libs/oci/tutorials/08-openai-responses-api/code/conversation_store.py
deleted file mode 100644
index c6f4214b..00000000
--- a/libs/oci/tutorials/08-openai-responses-api/code/conversation_store.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# Tutorial 08: Conversation Store Example
-# Demonstrates persistent conversation memory with ChatOCIOpenAI
-
-# Note: Requires oci-openai package
-# pip install oci-openai langchain-openai langchain-oci
-
-# Configuration - replace with your values
-COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
-REGION = "us-chicago-1"
-MODEL = "openai.gpt-4.1"
-CONVERSATION_STORE_ID = "ocid1.conversationstore.oc1..your-store-id"
-
-
-def setup_client_with_store():
-    """Set up ChatOCIOpenAI with conversation store."""
-    from oci_openai import OciSessionAuth
-
-    from langchain_oci import ChatOCIOpenAI
-
-    auth = OciSessionAuth(profile_name="DEFAULT")
-
-    client = ChatOCIOpenAI(
-        auth=auth,
-        compartment_id=COMPARTMENT_ID,
-        region=REGION,
-        model=MODEL,
-        conversation_store_id=CONVERSATION_STORE_ID,
-    )
-
-    return client
-
-
-def persistent_memory_demo():
-    """Demonstrate persistent conversation memory."""
-    print("Persistent Memory Demo")
-    print("=" * 50)
-
-    client = setup_client_with_store()
-
-    # First message - introduce yourself
-    print("User: My name is Alice and I'm a data scientist.")
-    response1 = client.invoke("My name is Alice and I'm a data scientist.")
-    print(f"Assistant: {response1.content}")
-
-    # Second message - model should remember
-    print("\nUser: What is my name and profession?")
-    response2 = client.invoke("What is my name and profession?")
-    print(f"Assistant: {response2.content}")
-
-    # Third message - continue context
-    print("\nUser: What programming languages should I learn?")
-    response3 = client.invoke("What programming languages should I learn?")
-    print(f"Assistant: {response3.content}")
-
-
-def multi_session_demo():
-    """Demonstrate memory persistence across sessions."""
-    print("\nMulti-Session Demo")
-    print("=" * 50)
-
-    # Session 1
-    print("--- Session 1 ---")
-    client1 = setup_client_with_store()
-
-    print("User: Remember this number: 42")
-    response1 = client1.invoke("Remember this number: 42")
-    print(f"Assistant: {response1.content}")
-
-    # Simulate new session (create new client)
-    print("\n--- Session 2 (new client instance) ---")
-    client2 = setup_client_with_store()
-
-    print("User: What number did I ask you to remember?")
-    response2 = client2.invoke("What number did I ask you to remember?")
-    print(f"Assistant: {response2.content}")
-
-
-def conversation_store_info():
-    """Information about conversation stores."""
-    print("\nConversation Store Information")
-    print("=" * 50)
-
-    info = """
-    Conversation Stores in OCI:
-
-    1. Creating a Store:
-       oci generative-ai conversation-store create \\
-           --compartment-id <compartment-ocid> \\
-           --display-name "My Store" \\
-           --region us-chicago-1
-
-    2. Listing Stores:
-       oci generative-ai conversation-store list \\
-           --compartment-id <compartment-ocid> \\
-           --region us-chicago-1
-
-    3. Getting Store Details:
-       oci generative-ai conversation-store get \\
-           --conversation-store-id <store-ocid> \\
-           --region us-chicago-1
-
-    4. Deleting a Store:
-       oci generative-ai conversation-store delete \\
-           --conversation-store-id <store-ocid> \\
-           --region us-chicago-1
-
-    Benefits:
-    - Persistent memory across sessions
-    - Managed by OCI (no external database needed)
-    - Secure and compliant with OCI policies
-    - Automatic conversation management
-    """
-    print(info)
-
-
-if __name__ == "__main__":
-    print("Conversation Store Examples")
-    print("Note: Requires oci-openai package and a conversation store\n")
-
-    # Show information
-    conversation_store_info()
-
-    # Uncomment to run (requires actual OCI setup):
-    # persistent_memory_demo()
-    # multi_session_demo()
-
-    print("\nExamples are commented out - configure credentials and uncomment to run.")
diff --git a/libs/oci/tutorials/08-openai-responses-api/code/openai_compat.py b/libs/oci/tutorials/08-openai-responses-api/code/openai_compat.py
deleted file mode 100644
index 06ca29c2..00000000
--- a/libs/oci/tutorials/08-openai-responses-api/code/openai_compat.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Tutorial 08: OpenAI Compatibility Example
-# Demonstrates ChatOCIOpenAI for OpenAI Responses API
-
-from langchain_core.prompts import ChatPromptTemplate
-
-# Note: Requires oci-openai package
-# pip install oci-openai langchain-openai langchain-oci
-
-# Configuration - replace with your values
-COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
-REGION = "us-chicago-1"
-MODEL = "openai.gpt-4.1"
-
-
-def setup_client():
-    """Set up ChatOCIOpenAI client with session auth."""
-    from oci_openai import OciSessionAuth
-
-    from langchain_oci import ChatOCIOpenAI
-
-    # Session auth requires: oci session authenticate --profile-name DEFAULT
-    auth = OciSessionAuth(profile_name="DEFAULT")
-
-    client = ChatOCIOpenAI(
-        auth=auth,
-        compartment_id=COMPARTMENT_ID,
-        region=REGION,
-        model=MODEL,
-    )
-
-    return client
-
-
-def basic_usage():
-    """Basic invocation example."""
-    print("Basic Usage")
-    print("=" * 50)
-
-    client = setup_client()
-
-    # Simple string message
-    response = client.invoke("What is the capital of Japan?")
-    print(f"Response: {response.content}")
-
-
-def message_formats():
-    """Different message format examples."""
-    print("\nMessage Formats")
-    print("=" * 50)
-
-    client = setup_client()
-
-    # Tuple format (role, content)
-    messages = [
-        ("system", "You are a helpful coding assistant."),
-        ("human", "Write a Python function to reverse a string."),
-    ]
-
-    response = client.invoke(messages)
-    print(f"Response:\n{response.content}")
-
-
-def prompt_chaining():
-    """Using prompt templates with ChatOCIOpenAI."""
-    print("\nPrompt Chaining")
-    print("=" * 50)
-
-    client = setup_client()
-
-    # Create a prompt template
-    prompt = ChatPromptTemplate.from_messages(
-        [
-            (
-                "system",
-                "You are a helpful assistant that translates "
-                "{input_language} to {output_language}. "
-                "Only output the translation, nothing else.",
-            ),
-            ("human", "{input}"),
-        ]
-    )
-
-    # Create chain
-    chain = prompt | client
-
-    # Invoke with variables
-    response = chain.invoke(
-        {
-            "input_language": "English",
-            "output_language": "Spanish",
-            "input": "Hello, how are you today?",
-        }
-    )
-
-    print(f"Translation: {response.content}")
-
-
-def streaming_response():
-    """Streaming example."""
-    print("\nStreaming Response")
-    print("=" * 50)
-
-    client = setup_client()
-
-    print("Response: ", end="")
-    for chunk in client.stream("Tell me a short joke about programming."):
-        print(chunk.content, end="", flush=True)
-    print()
-
-
-if __name__ == "__main__":
-    print("ChatOCIOpenAI Examples")
-    print("Note: Requires oci-openai package and valid OCI session\n")
-
-    # Uncomment to run (requires actual OCI setup):
-    # basic_usage()
-    # message_formats()
-    # prompt_chaining()
-    # streaming_response()
-
-    print("Examples are commented out - configure credentials and uncomment to run.")
-    print("First authenticate: oci session authenticate --profile-name DEFAULT")
diff --git a/libs/oci/tutorials/08-openai-responses-api/code/tools_and_search.py b/libs/oci/tutorials/08-openai-responses-api/code/tools_and_search.py
deleted file mode 100644
index 2ce9cbb3..00000000
--- a/libs/oci/tutorials/08-openai-responses-api/code/tools_and_search.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# Tutorial 08: Tools and Web Search Example
-# Demonstrates function calling, web search, and MCP with ChatOCIOpenAI
-
-from pydantic import BaseModel, Field
-
-# Note: Requires oci-openai package
-# pip install oci-openai langchain-openai langchain-oci
-
-# Configuration - replace with your values
-COMPARTMENT_ID = "ocid1.compartment.oc1..your-compartment-id"
-REGION = "us-chicago-1"
-MODEL = "openai.gpt-4.1"
-
-
-def setup_client():
-    """Set up ChatOCIOpenAI client."""
-    from oci_openai import OciSessionAuth
-
-    from langchain_oci import ChatOCIOpenAI
-
-    auth = OciSessionAuth(profile_name="DEFAULT")
-
-    client = ChatOCIOpenAI(
-        auth=auth,
-        compartment_id=COMPARTMENT_ID,
-        region=REGION,
-        model=MODEL,
-    )
-
-    return client
-
-
-# Define tools as Pydantic models
-class GetWeather(BaseModel):
-    """Get the current weather for a location."""
-
-    location: str = Field(..., description="City and state, e.g. San Francisco, CA")
-    unit: str = Field(default="fahrenheit", description="Unit: celsius or fahrenheit")
-
-
-class SearchDatabase(BaseModel):
-    """Search a database for information."""
-
-    query: str = Field(..., description="The search query")
-    table: str = Field(..., description="The database table to search")
-    limit: int = Field(default=10, description="Maximum number of results")
-
-
-def function_calling_demo():
-    """Demonstrate function calling with custom tools."""
-    print("Function Calling Demo")
-    print("=" * 50)
-
-    client = setup_client()
-
-    # Bind tools to the model
-    llm_with_tools = client.bind_tools([GetWeather, SearchDatabase])
-
-    # Ask a question that requires tool use
-    response = llm_with_tools.invoke("What is the weather like in San Francisco?")
-
-    print(f"Response content: {response.content}")
-    print("\nTool calls:")
-    for tool_call in response.tool_calls:
-        print(f"  - Function: {tool_call['name']}")
-        print(f"    Arguments: {tool_call['args']}")
-
-
-def web_search_demo():
-    """Demonstrate web search capability."""
-    print("\nWeb Search Demo")
-    print("=" * 50)
-
-    client = setup_client()
-
-    # Enable web search
-    web_search_tool = {"type": "web_search_preview"}
-    llm_with_search = client.bind_tools([web_search_tool])
-
-    # Ask about current events
-    response = llm_with_search.invoke("What are the latest developments in AI today?")
-
-    print(f"Response: {response.content}")
-
-
-def hosted_mcp_demo():
-    """Demonstrate hosted MCP integration."""
-    print("\nHosted MCP Demo")
-    print("=" * 50)
-
-    client = setup_client()
-
-    # Configure MCP tool
-    mcp_tool = {
-        "type": "mcp",
-        "server_label": "deepwiki",
-        "server_url": "https://mcp.deepwiki.com/mcp",
-        "require_approval": "never",
-    }
-
-    llm_with_mcp = client.bind_tools([mcp_tool])
-
-    # Query using MCP
-    response = llm_with_mcp.invoke(
-        "What is the Model Context Protocol and what are its main features?"
-    )
-
-    print(f"Response: {response.content}")
-
-
-def combined_tools_demo():
-    """Demonstrate combining multiple tools."""
-    print("\nCombined Tools Demo")
-    print("=" * 50)
-
-    client = setup_client()
-
-    # Combine custom tools with web search
-    tools = [
-        GetWeather,
-        {"type": "web_search_preview"},
-    ]
-
-    llm_with_tools = client.bind_tools(tools)
-
-    # The model can choose which tool to use
-    print("Query 1: Weather question")
-    response1 = llm_with_tools.invoke("What's the weather in Tokyo?")
-    print(f"Tool calls: {[tc['name'] for tc in response1.tool_calls]}")
-
-    print("\nQuery 2: Current events question")
-    response2 = llm_with_tools.invoke("What happened in tech news today?")
-    print(f"Response: {response2.content[:200]}...")
-
-
-if __name__ == "__main__":
-    print("Tools and Web Search Examples")
-    print("Note: Requires oci-openai package and valid OCI session\n")
-
-    # Uncomment to run (requires actual OCI setup):
-    # function_calling_demo()
-    # web_search_demo()
-    # hosted_mcp_demo()
-    # combined_tools_demo()
-
-    print("Examples are commented out - configure credentials and uncomment to run.")
diff --git a/libs/oci/tutorials/README.md b/libs/oci/tutorials/README.md
index d9f6d319..151fb4f4 100644
--- a/libs/oci/tutorials/README.md
+++ b/libs/oci/tutorials/README.md
@@ -35,12 +35,12 @@ Welcome to the langchain-oci tutorials! These tutorials will take you from begin
                            │
                       ADVANCED
                            │
-         ┌─────────────────┼─────────────────┐
-         ▼                 ▼                  ▼
-┌──────────────┐  ┌──────────────┐  ┌──────────────┐
-│ 06. Model    │  │ 07. Async &  │  │ 08. OpenAI   │
-│ Deployments  │  │ Production   │  │ Responses    │
-└──────────────┘  └──────────────┘  └──────────────┘
+              ┌────────────┴────────────┐
+              ▼                          ▼
+    ┌──────────────┐          ┌──────────────┐
+    │ 06. Model    │          │ 07. Async &  │
+    │ Deployments  │          │ Production   │
+    └──────────────┘          └──────────────┘
                            │
                     SPECIALIZED
                            │
@@ -63,7 +63,6 @@ Welcome to the langchain-oci tutorials! These tutorials will take you from begin
 | 05 | [Structured Output](./05-structured-output/) | Intermediate | Pydantic schemas, JSON modes |
 | 06 | [Model Deployments](./06-model-deployments/) | Advanced | vLLM, TGI, custom endpoints |
 | 07 | [Async for Production](./07-async-for-production/) | Advanced | ainvoke, astream, FastAPI |
-| 08 | [OpenAI Responses API](./08-openai-responses-api/) | Advanced | ChatOCIOpenAI, conversation store |
 | 09 | [Provider Deep Dive](./09-provider-deep-dive/) | Specialized | Meta, Gemini, Cohere, xAI |
 | 10 | [Embeddings](./10-embeddings/) | Specialized | Text & image embeddings, RAG |
 
@@ -95,7 +94,6 @@ Then continue with [Tutorial 01: Getting Started](./01-getting-started/).
 | Feature | Tutorial(s) |
 |---------|-------------|
 | `ChatOCIGenAI` | 01, 02, 03, 04, 05, 07 |
-| `ChatOCIOpenAI` | 08 |
 | `ChatOCIModelDeployment` | 06, 07 |
 | `OCIGenAIEmbeddings` | 10 |
 | `create_oci_agent()` | 03 |

From 718c2d355b6a3582d0cbf361ecc13ce054e340dc Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Fri, 20 Feb 2026 11:31:37 -0500
Subject: [PATCH 12/16] Remove Tutorial 06 (Model Deployments)

Requires OCI Data Science infrastructure with GPU shapes
and AQUA which is not available in all tenancies.
---
 .../tutorials/06-model-deployments/README.md  | 479 ------------------
 .../code/custom_endpoint.py                   | 176 -------
 .../code/tgi_deployment.py                    | 115 -----
 .../code/vllm_deployment.py                   | 115 -----
 libs/oci/tutorials/README.md                  |  13 +-
 5 files changed, 5 insertions(+), 893 deletions(-)
 delete mode 100644 libs/oci/tutorials/06-model-deployments/README.md
 delete mode 100644 libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py
 delete mode 100644 libs/oci/tutorials/06-model-deployments/code/tgi_deployment.py
 delete mode 100644 libs/oci/tutorials/06-model-deployments/code/vllm_deployment.py

diff --git a/libs/oci/tutorials/06-model-deployments/README.md b/libs/oci/tutorials/06-model-deployments/README.md
deleted file mode 100644
index c3a09ec2..00000000
--- a/libs/oci/tutorials/06-model-deployments/README.md
+++ /dev/null
@@ -1,479 +0,0 @@
-# Tutorial 06: Custom Model Deployments
-
-Deploy and use custom models on OCI Data Science Model Deployments with LangChain.
-
-## What You'll Learn
-
-- Deploy custom models using OCI Data Science
-- Use `ChatOCIModelDeployment` for chat interfaces
-- Configure vLLM and TGI deployments
-- Handle authentication with `oracle-ads`
-- Enable streaming and async operations
-
-## Prerequisites
-
-- Completed [Tutorial 01: Getting Started](../01-getting-started/)
-- OCI Data Science Model Deployment endpoint
-- `oracle-ads` and `langchain-openai` installed
-
-```bash
-pip install oracle-ads langchain-openai langchain-oci
-```
-
-## Concepts Covered
-
-| Class | Description |
-|-------|-------------|
-| `ChatOCIModelDeployment` | Base chat model for OCI deployments |
-| `ChatOCIModelDeploymentVLLM` | vLLM-specific deployment |
-| `ChatOCIModelDeploymentTGI` | Text Generation Inference deployment |
-| `OCIModelDeploymentLLM` | Legacy LLM interface |
-
----
-
-## Part 1: Understanding OCI Model Deployments
-
-OCI Data Science Model Deployments allow you to deploy custom models (fine-tuned LLMs, open-source models) on dedicated infrastructure. Unlike the managed GenAI service, you have full control over:
-
-- Model selection (any Hugging Face model)
-- Infrastructure sizing (GPU types, memory)
-- Inference framework (vLLM, TGI, custom)
-
-### Deployment Architecture
-
-```
-┌─────────────────────────────────────────────────────────┐
-│                  OCI Data Science                        │
-│                                                          │
-│   ┌──────────────┐    ┌──────────────┐                  │
-│   │   vLLM       │    │     TGI      │                  │
-│   │  Deployment  │    │  Deployment  │                  │
-│   └──────┬───────┘    └──────┬───────┘                  │
-│          │                   │                           │
-│          └─────────┬─────────┘                          │
-│                    │                                     │
-│         ┌──────────┴──────────┐                         │
-│         │  /v1/chat/completions│                         │
-│         │    (OpenAI-compatible)│                        │
-│         └──────────────────────┘                         │
-└─────────────────────────────────────────────────────────┘
-```
-
----
-
-## Part 2: Authentication Setup
-
-OCI Model Deployments use `oracle-ads` for authentication.
-
-### Configure ADS Authentication
-
-```python
-import ads
-
-# Option 1: API Key (default, uses ~/.oci/config)
-ads.set_auth("api_key")
-
-# Option 2: Resource Principal (for OCI Functions, Jobs)
-ads.set_auth("resource_principal")
-
-# Option 3: Instance Principal (for Compute instances)
-ads.set_auth("instance_principal")
-
-# Option 4: Security Token (session-based)
-ads.set_auth("security_token")
-```
-
-### Verify Authentication
-
-```python
-# Check current auth method
-print(ads.common.auth.default_signer())
-```
-
----
-
-## Part 3: Basic ChatOCIModelDeployment
-
-The base class works with any OpenAI-compatible endpoint.
-
-```python
-from langchain_oci import ChatOCIModelDeployment
-
-# Create chat model pointing to your deployment
-chat = ChatOCIModelDeployment(
-    endpoint="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict",
-    model="odsc-llm",  # Default model name for AQUA deployments
-    streaming=True,
-    model_kwargs={
-        "max_tokens": 512,
-        "temperature": 0.2,
-    },
-)
-
-# Simple invocation
-response = chat.invoke("What is machine learning?")
-print(response.content)
-```
-
-### With Custom Headers
-
-```python
-chat = ChatOCIModelDeployment(
-    endpoint="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict",
-    model="odsc-llm",
-    default_headers={
-        "route": "/v1/chat/completions",
-        "X-Custom-Header": "value",
-    },
-)
-```
-
-### Message Formats
-
-```python
-from langchain_core.messages import HumanMessage, SystemMessage
-
-messages = [
-    SystemMessage(content="You are a helpful assistant."),
-    HumanMessage(content="Explain quantum computing in simple terms."),
-]
-
-response = chat.invoke(messages)
-```
-
----
-
-## Part 4: vLLM Deployments
-
-vLLM is optimized for high-throughput LLM inference with PagedAttention.
-
-```python
-from langchain_oci import ChatOCIModelDeploymentVLLM
-
-chat = ChatOCIModelDeploymentVLLM(
-    endpoint="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict",
-    model="odsc-llm",
-
-    # vLLM-specific parameters
-    temperature=0.2,
-    max_tokens=512,
-    top_p=0.95,
-    top_k=40,
-
-    # Sampling parameters
-    frequency_penalty=0.1,
-    presence_penalty=0.1,
-    repetition_penalty=1.1,
-
-    # Beam search (optional)
-    use_beam_search=False,
-    best_of=1,
-
-    # Token control
-    min_tokens=10,
-    ignore_eos=False,
-    skip_special_tokens=True,
-)
-
-response = chat.invoke("Write a haiku about coding.")
-print(response.content)
-```
-
-### vLLM Tool Calling
-
-If your vLLM deployment is configured with `--tool-call-parser`:
-
-```python
-from langchain_core.tools import tool
-
-@tool
-def get_weather(city: str) -> str:
-    """Get weather for a city."""
-    return f"Weather in {city}: 72F, sunny"
-
-chat = ChatOCIModelDeploymentVLLM(
-    endpoint="...",
-    tool_choice="auto",  # Enable tool calling
-)
-
-chat_with_tools = chat.bind_tools([get_weather])
-response = chat_with_tools.invoke("What's the weather in Chicago?")
-```
-
-### Custom Chat Templates
-
-```python
-chat = ChatOCIModelDeploymentVLLM(
-    endpoint="...",
-    chat_template="{% for message in messages %}...",  # Jinja2 template
-)
-```
-
----
-
-## Part 5: TGI Deployments
-
-Text Generation Inference (TGI) from Hugging Face offers production-ready serving.
-
-```python
-from langchain_oci import ChatOCIModelDeploymentTGI
-
-chat = ChatOCIModelDeploymentTGI(
-    endpoint="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict",
-    model="odsc-llm",
-
-    # TGI parameters
-    temperature=0.2,
-    max_tokens=512,
-    top_p=0.9,
-
-    # Reproducibility
-    seed=42,
-
-    # Penalties
-    frequency_penalty=0.0,
-    presence_penalty=0.0,
-
-    # Log probabilities
-    logprobs=True,
-    top_logprobs=5,
-)
-
-response = chat.invoke("Explain the theory of relativity.")
-print(response.content)
-```
-
----
-
-## Part 6: Streaming Responses
-
-All deployment classes support streaming.
-
-### Synchronous Streaming
-
-```python
-chat = ChatOCIModelDeployment(
-    endpoint="...",
-    streaming=True,
-)
-
-for chunk in chat.stream("Tell me a story about a robot."):
-    print(chunk.content, end="", flush=True)
-```
-
-### Async Streaming
-
-```python
-import asyncio
-
-async def stream_response():
-    chat = ChatOCIModelDeployment(
-        endpoint="...",
-        streaming=True,
-    )
-
-    async for chunk in chat.astream("Tell me a story about a robot."):
-        print(chunk.content, end="", flush=True)
-
-asyncio.run(stream_response())
-```
-
----
-
-## Part 7: Async Operations
-
-Full async support for high-concurrency applications.
-
-```python
-import asyncio
-from langchain_oci import ChatOCIModelDeployment
-
-async def main():
-    chat = ChatOCIModelDeployment(endpoint="...", model="odsc-llm")
-
-    # Single async request
-    response = await chat.ainvoke("Hello!")
-    print(response.content)
-
-    # Concurrent requests
-    questions = [
-        "What is Python?",
-        "What is JavaScript?",
-        "What is Rust?",
-    ]
-
-    tasks = [chat.ainvoke(q) for q in questions]
-    responses = await asyncio.gather(*tasks)
-
-    for q, r in zip(questions, responses):
-        print(f"Q: {q}")
-        print(f"A: {r.content[:100]}...")
-
-asyncio.run(main())
-```
-
----
-
-## Part 8: Structured Output
-
-Use JSON mode for structured responses.
-
-```python
-from pydantic import BaseModel, Field
-from langchain_oci import ChatOCIModelDeployment
-
-class MovieReview(BaseModel):
-    title: str = Field(description="Movie title")
-    rating: int = Field(description="Rating from 1-10")
-    summary: str = Field(description="Brief review summary")
-
-chat = ChatOCIModelDeployment(endpoint="...", model="odsc-llm")
-
-# Use JSON mode
-structured_chat = chat.with_structured_output(MovieReview, method="json_mode")
-
-response = structured_chat.invoke(
-    "Review the movie 'Inception'. Respond in JSON with "
-    "title, rating (1-10), and summary fields."
-)
-
-print(f"Title: {response.title}")
-print(f"Rating: {response.rating}/10")
-print(f"Summary: {response.summary}")
-```
-
----
-
-## Part 9: Custom Endpoint Handling
-
-Extend the base class for custom inference endpoints.
-
-```python
-from langchain_oci import ChatOCIModelDeployment
-from langchain_core.outputs import ChatResult, ChatGeneration
-from langchain_core.messages import AIMessage
-
-class MyCustomDeployment(ChatOCIModelDeployment):
-    """Custom deployment with non-standard response format."""
-
-    def _construct_json_body(self, messages: list, params: dict) -> dict:
-        """Custom request payload."""
-        return {
-            "inputs": messages,
-            "parameters": params,
-            "custom_field": "value",
-        }
-
-    def _process_response(self, response_json: dict) -> ChatResult:
-        """Custom response parsing."""
-        # Extract text from custom response format
-        text = response_json.get("output", {}).get("generated_text", "")
-
-        return ChatResult(
-            generations=[
-                ChatGeneration(
-                    message=AIMessage(content=text),
-                    generation_info={"custom": True},
-                )
-            ]
-        )
-
-# Use custom deployment
-chat = MyCustomDeployment(endpoint="...", model="my-model")
-response = chat.invoke("Hello!")
-```
-
----
-
-## Part 10: Legacy LLM Interface
-
-For text completion (non-chat) workloads.
-
-```python
-from langchain_oci import OCIModelDeploymentLLM
-
-llm = OCIModelDeploymentLLM(
-    endpoint="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict",
-    model="odsc-llm",
-    streaming=True,
-    model_kwargs={
-        "max_tokens": 256,
-        "temperature": 0.7,
-    },
-)
-
-# Text completion
-response = llm.invoke("Complete this sentence: The future of AI is")
-print(response)
-
-# Streaming
-for chunk in llm.stream("Write a poem about:"):
-    print(chunk, end="", flush=True)
-```
-
----
-
-## Summary
-
-You learned how to:
-
-- Configure authentication with `oracle-ads`
-- Use `ChatOCIModelDeployment` for any OpenAI-compatible endpoint
-- Configure vLLM deployments with `ChatOCIModelDeploymentVLLM`
-- Configure TGI deployments with `ChatOCIModelDeploymentTGI`
-- Stream responses synchronously and asynchronously
-- Get structured output with JSON mode
-- Extend base classes for custom endpoints
-
-## Next Steps
-
-- [Tutorial 07: Async for Production](../07-async-for-production/) - Scale with async patterns
-- [Tutorial 08: OpenAI Responses API](../08-openai-responses-api/) - Use ChatOCIOpenAI
-
-## API Reference
-
-| Class | Description |
-|-------|-------------|
-| `ChatOCIModelDeployment` | Base class for OCI model deployments |
-| `ChatOCIModelDeploymentVLLM` | vLLM-specific parameters |
-| `ChatOCIModelDeploymentTGI` | TGI-specific parameters |
-| `OCIModelDeploymentLLM` | Text completion interface |
-| `OCIModelDeploymentVLLM` | vLLM text completion |
-| `OCIModelDeploymentTGI` | TGI text completion |
-
-## Troubleshooting
-
-### Authentication Errors
-
-```
-Could not authenticate
-```
-- Run `ads.set_auth("api_key")` with correct profile
-- Verify `~/.oci/config` is properly configured
-- Check IAM policies for model deployment access
-
-### Connection Timeout
-
-```
-ConnectTimeout
-```
-- Verify endpoint URL is correct
-- Check network connectivity to OCI
-- Ensure deployment is in ACTIVE state
-
-### 401 Unauthorized
-
-```
-TokenExpiredError
-```
-- Refresh security token: `oci session authenticate`
-- Check resource principal permissions
-- Verify compartment access policies
-
-### Model Not Found
-
-```
-Model 'xxx' not found
-```
-- Use `"odsc-llm"` for AQUA deployments
-- Check model name matches deployment configuration
diff --git a/libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py b/libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py
deleted file mode 100644
index cbedffb3..00000000
--- a/libs/oci/tutorials/06-model-deployments/code/custom_endpoint.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# Tutorial 06: Custom Endpoint Example
-# Demonstrates extending ChatOCIModelDeployment for custom inference formats
-
-import ads
-from langchain_core.messages import AIMessage
-from langchain_core.outputs import ChatGeneration, ChatResult
-
-from langchain_oci import ChatOCIModelDeployment
-
-# Configure authentication
-ads.set_auth("api_key")
-
-
-class CustomFormatDeployment(ChatOCIModelDeployment):
-    """Custom deployment handler for non-standard response formats.
-
-    This example shows how to extend the base class to handle
-    custom request/response formats from your model deployment.
-    """
-
-    custom_param: str = "default_value"  # Add custom parameters
-
-    def _construct_json_body(self, messages: list, params: dict) -> dict:
-        """Construct custom request payload.
-
-        Override this method if your endpoint expects a different
-        request format than the standard OpenAI chat completions format.
-        """
-        # Convert LangChain messages to custom format
-        formatted_messages = []
-        for msg in messages:
-            if hasattr(msg, "content"):
-                formatted_messages.append(
-                    {
-                        "role": msg.type,  # "human", "ai", "system"
-                        "text": msg.content,
-                    }
-                )
-
-        return {
-            "conversation": formatted_messages,
-            "config": {
-                "max_tokens": params.get("max_tokens", 512),
-                "temperature": params.get("temperature", 0.7),
-                "custom_param": self.custom_param,
-            },
-        }
-
-    def _process_response(self, response_json: dict) -> ChatResult:
-        """Process custom response format.
-
-        Override this method if your endpoint returns a different
-        response format than the standard OpenAI chat completions format.
-        """
-        # Example custom response format:
-        # {
-        #     "output": {
-        #         "generated_text": "...",
-        #         "tokens_used": 100
-        #     },
-        #     "status": "success"
-        # }
-
-        output = response_json.get("output", {})
-        text = output.get("generated_text", "")
-        tokens = output.get("tokens_used", 0)
-
-        return ChatResult(
-            generations=[
-                ChatGeneration(
-                    message=AIMessage(content=text),
-                    generation_info={
-                        "tokens_used": tokens,
-                        "custom_format": True,
-                    },
-                )
-            ],
-            llm_output={
-                "model_name": self.model,
-                "token_usage": {"total_tokens": tokens},
-            },
-        )
-
-
-class BatchInferenceDeployment(ChatOCIModelDeployment):
-    """Custom deployment for batch inference endpoints.
-
-    Some deployments support batch processing of multiple prompts
-    in a single request for efficiency.
-    """
-
-    batch_size: int = 5
-
-    def _construct_json_body(self, messages: list, params: dict) -> dict:
-        """Construct batch request payload."""
-        # Extract user messages for batch processing
-        prompts = []
-        for msg in messages:
-            if hasattr(msg, "content"):
-                prompts.append(msg.content)
-
-        return {
-            "prompts": prompts,
-            "batch_config": {
-                "max_batch_size": self.batch_size,
-                "return_all": True,
-            },
-            **params,
-        }
-
-
-def demo_custom_deployment():
-    """Demonstrate custom deployment usage."""
-    print("Custom Deployment Example")
-    print("=" * 50)
-
-    # Replace with your endpoint
-    endpoint = (
-        "https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict"
-    )
-
-    chat = CustomFormatDeployment(
-        endpoint=endpoint,
-        model="my-custom-model",
-        custom_param="special_value",
-        model_kwargs={
-            "max_tokens": 256,
-            "temperature": 0.5,
-        },
-    )
-
-    print("Custom deployment configured:")
-    print(f"  - Endpoint: {endpoint}")
-    print(f"  - Model: {chat.model}")
-    print(f"  - Custom param: {chat.custom_param}")
-
-    # Uncomment to actually invoke (requires real endpoint):
-    # response = chat.invoke("Hello, custom model!")
-    # print(f"Response: {response.content}")
-
-
-def show_extension_patterns():
-    """Show common extension patterns."""
-    print("\nCommon Extension Patterns")
-    print("=" * 50)
-
-    patterns = """
-    1. Custom Request Format:
-       Override _construct_json_body() to change how messages
-       are formatted in the HTTP request body.
-
-    2. Custom Response Parsing:
-       Override _process_response() to parse non-standard
-       response formats from your model.
-
-    3. Custom Streaming:
-       Override _process_stream_response() for custom
-       streaming response formats.
-
-    4. Custom Headers:
-       Use default_headers parameter or override _headers()
-       for custom HTTP headers.
-
-    5. Custom Parameters:
-       Add Pydantic fields to your subclass for
-       deployment-specific configuration.
-    """
-    print(patterns)
-
-
-if __name__ == "__main__":
-    print("Custom Endpoint Examples")
-    print("Demonstrates extending ChatOCIModelDeployment\n")
-
-    demo_custom_deployment()
-    show_extension_patterns()
diff --git a/libs/oci/tutorials/06-model-deployments/code/tgi_deployment.py b/libs/oci/tutorials/06-model-deployments/code/tgi_deployment.py
deleted file mode 100644
index 8c8c5e62..00000000
--- a/libs/oci/tutorials/06-model-deployments/code/tgi_deployment.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Tutorial 06: TGI Deployment Example
-# Demonstrates ChatOCIModelDeploymentTGI for Hugging Face TGI endpoints
-
-import ads
-from langchain_core.messages import HumanMessage, SystemMessage
-
-from langchain_oci import ChatOCIModelDeploymentTGI
-
-# Configure authentication
-ads.set_auth("api_key")
-
-# Replace with your deployment endpoint
-ENDPOINT = "https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict"
-
-
-def basic_tgi_chat():
-    """Basic chat with TGI deployment."""
-    print("Basic TGI Chat")
-    print("=" * 50)
-
-    chat = ChatOCIModelDeploymentTGI(
-        endpoint=ENDPOINT,
-        model="odsc-llm",
-        temperature=0.2,
-        max_tokens=512,
-        top_p=0.9,
-    )
-
-    messages = [
-        SystemMessage(content="You are a helpful assistant."),
-        HumanMessage(content="Explain the difference between REST and GraphQL."),
-    ]
-
-    response = chat.invoke(messages)
-    print(response.content)
-
-
-def reproducible_generation():
-    """Using seed for reproducible outputs."""
-    print("\nReproducible Generation with Seed")
-    print("=" * 50)
-
-    chat = ChatOCIModelDeploymentTGI(
-        endpoint=ENDPOINT,
-        model="odsc-llm",
-        temperature=0.5,
-        max_tokens=100,
-        seed=42,  # Set seed for reproducibility
-    )
-
-    # Generate twice with same seed
-    prompt = "Generate a random 6-digit code:"
-
-    response1 = chat.invoke(prompt)
-    print(f"First generation:  {response1.content.strip()}")
-
-    response2 = chat.invoke(prompt)
-    print(f"Second generation: {response2.content.strip()}")
-
-    print("(With same seed, outputs should be identical)")
-
-
-def logprobs_analysis():
-    """Getting log probabilities for token analysis."""
-    print("\nLog Probabilities Analysis")
-    print("=" * 50)
-
-    chat = ChatOCIModelDeploymentTGI(
-        endpoint=ENDPOINT,
-        model="odsc-llm",
-        temperature=0.0,
-        max_tokens=50,
-        logprobs=True,
-        top_logprobs=3,
-    )
-
-    response = chat.invoke("The capital of France is")
-    print(f"Response: {response.content}")
-
-    # Access log probabilities from response metadata
-    if response.response_metadata.get("logprobs"):
-        print("\nTop token probabilities available in response_metadata")
-
-
-def streaming_tgi():
-    """Streaming with TGI deployment."""
-    print("\nStreaming TGI Response")
-    print("=" * 50)
-
-    chat = ChatOCIModelDeploymentTGI(
-        endpoint=ENDPOINT,
-        model="odsc-llm",
-        streaming=True,
-        temperature=0.7,
-        max_tokens=200,
-    )
-
-    print("Response: ", end="")
-    for chunk in chat.stream("Write a haiku about machine learning."):
-        print(chunk.content, end="", flush=True)
-    print()
-
-
-if __name__ == "__main__":
-    print("TGI Deployment Examples")
-    print("Note: Replace ENDPOINT with your actual deployment URL")
-    print()
-
-    # Uncomment to run (requires actual deployment):
-    # basic_tgi_chat()
-    # reproducible_generation()
-    # logprobs_analysis()
-    # streaming_tgi()
-
-    print("Examples are commented out - configure ENDPOINT and uncomment to run.")
diff --git a/libs/oci/tutorials/06-model-deployments/code/vllm_deployment.py b/libs/oci/tutorials/06-model-deployments/code/vllm_deployment.py
deleted file mode 100644
index 029448a6..00000000
--- a/libs/oci/tutorials/06-model-deployments/code/vllm_deployment.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Tutorial 06: vLLM Deployment Example
-# Demonstrates ChatOCIModelDeploymentVLLM for high-throughput inference
-
-import ads
-from langchain_core.messages import HumanMessage, SystemMessage
-
-from langchain_oci import ChatOCIModelDeploymentVLLM
-
-# Configure authentication (uses ~/.oci/config by default)
-ads.set_auth("api_key")
-
-# Replace with your deployment endpoint
-ENDPOINT = "https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<ocid>/predict"
-
-
-def basic_chat():
-    """Basic chat with vLLM deployment."""
-    print("Basic vLLM Chat")
-    print("=" * 50)
-
-    chat = ChatOCIModelDeploymentVLLM(
-        endpoint=ENDPOINT,
-        model="odsc-llm",
-        temperature=0.2,
-        max_tokens=512,
-        top_p=0.95,
-    )
-
-    messages = [
-        SystemMessage(content="You are a helpful coding assistant."),
-        HumanMessage(content="Write a Python function to check if a number is prime."),
-    ]
-
-    response = chat.invoke(messages)
-    print(response.content)
-
-
-def streaming_chat():
-    """Streaming response from vLLM deployment."""
-    print("\nStreaming vLLM Chat")
-    print("=" * 50)
-
-    chat = ChatOCIModelDeploymentVLLM(
-        endpoint=ENDPOINT,
-        model="odsc-llm",
-        streaming=True,
-        temperature=0.7,
-        max_tokens=256,
-    )
-
-    print("Response: ", end="")
-    for chunk in chat.stream("Tell me a short story about a robot learning to paint."):
-        print(chunk.content, end="", flush=True)
-    print()
-
-
-def advanced_parameters():
-    """Using advanced vLLM sampling parameters."""
-    print("\nAdvanced vLLM Parameters")
-    print("=" * 50)
-
-    chat = ChatOCIModelDeploymentVLLM(
-        endpoint=ENDPOINT,
-        model="odsc-llm",
-        # Sampling parameters
-        temperature=0.8,
-        top_p=0.9,
-        top_k=40,
-        # Penalties
-        frequency_penalty=0.2,
-        presence_penalty=0.1,
-        repetition_penalty=1.1,
-        # Token control
-        max_tokens=200,
-        min_tokens=50,
-        # Output control
-        skip_special_tokens=True,
-        spaces_between_special_tokens=True,
-    )
-
-    response = chat.invoke("Generate a creative product name for a smart water bottle.")
-    print(f"Generated name: {response.content}")
-
-
-def beam_search_generation():
-    """Using beam search for more deterministic output."""
-    print("\nBeam Search Generation")
-    print("=" * 50)
-
-    chat = ChatOCIModelDeploymentVLLM(
-        endpoint=ENDPOINT,
-        model="odsc-llm",
-        use_beam_search=True,
-        best_of=3,
-        temperature=0.0,  # Usually 0 for beam search
-        max_tokens=100,
-    )
-
-    prompt = "Translate to French: The quick brown fox jumps over the lazy dog."
-    response = chat.invoke(prompt)
-    print(f"Translation: {response.content}")
-
-
-if __name__ == "__main__":
-    print("vLLM Deployment Examples")
-    print("Note: Replace ENDPOINT with your actual deployment URL")
-    print()
-
-    # Uncomment to run (requires actual deployment):
-    # basic_chat()
-    # streaming_chat()
-    # advanced_parameters()
-    # beam_search_generation()
-
-    print("Examples are commented out - configure ENDPOINT and uncomment to run.")
diff --git a/libs/oci/tutorials/README.md b/libs/oci/tutorials/README.md
index 151fb4f4..6518b2c7 100644
--- a/libs/oci/tutorials/README.md
+++ b/libs/oci/tutorials/README.md
@@ -35,12 +35,11 @@ Welcome to the langchain-oci tutorials! These tutorials will take you from begin
                            │
                       ADVANCED
                            │
-              ┌────────────┴────────────┐
-              ▼                          ▼
-    ┌──────────────┐          ┌──────────────┐
-    │ 06. Model    │          │ 07. Async &  │
-    │ Deployments  │          │ Production   │
-    └──────────────┘          └──────────────┘
+                           ▼
+                 ┌──────────────┐
+                 │ 07. Async &  │
+                 │ Production   │
+                 └──────────────┘
                            │
                     SPECIALIZED
                            │
@@ -61,7 +60,6 @@ Welcome to the langchain-oci tutorials! These tutorials will take you from begin
 | 03 | [Building AI Agents](./03-building-ai-agents/) | Beginner | ReAct agents, tools, memory |
 | 04 | [Tool Calling Mastery](./04-tool-calling-mastery/) | Intermediate | bind_tools, parallel calls, workflows |
 | 05 | [Structured Output](./05-structured-output/) | Intermediate | Pydantic schemas, JSON modes |
-| 06 | [Model Deployments](./06-model-deployments/) | Advanced | vLLM, TGI, custom endpoints |
 | 07 | [Async for Production](./07-async-for-production/) | Advanced | ainvoke, astream, FastAPI |
 | 09 | [Provider Deep Dive](./09-provider-deep-dive/) | Specialized | Meta, Gemini, Cohere, xAI |
 | 10 | [Embeddings](./10-embeddings/) | Specialized | Text & image embeddings, RAG |
@@ -94,7 +92,6 @@ Then continue with [Tutorial 01: Getting Started](./01-getting-started/).
 | Feature | Tutorial(s) |
 |---------|-------------|
 | `ChatOCIGenAI` | 01, 02, 03, 04, 05, 07 |
-| `ChatOCIModelDeployment` | 06, 07 |
 | `OCIGenAIEmbeddings` | 10 |
 | `create_oci_agent()` | 03 |
 | Vision (13 models) | 02 |

From da33f96dcc3def19f7b1fec391d61d15e28f69a2 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Tue, 24 Feb 2026 15:37:41 -0500
Subject: [PATCH 13/16] Address PR review comments on documentation

- Add note linking to OCI's official model reference in MODELS.md and README.md
  since models are updated frequently by OCI
- Remove migration guide from CHANGELOG.md (redundant with tutorials)
- Change Tutorial 03 (AI Agents) from Beginner to Intermediate level
- Add note that max_tokens varies by model (not always 4096)
- Clarify provider table shows examples, not comprehensive list
- Add deprecation note for MetaProvider (use GenericProvider for Llama)
- Remove duplicate entry in Gemini models table
- Add OpenAI provider section to Tutorial 09 (ChatOCIOpenAI)
- Fix API_REFERENCE.md: remove incorrect detail param from load_image,
  fix to_data_uri and encode_image signatures to match actual code
- Fix VISION_MODELS import in Tutorial 02 (use langchain_oci, not utils.vision)
- Update CONTRIBUTING.md to use make commands instead of poetry run
- Remove "Adding a New Provider" section from CONTRIBUTING.md

Signed-off-by: Federico Kamelhar <federico.kamelhar@oracle.com>
---
 CONTRIBUTING.md                               | 79 ++-----------------
 libs/oci/README.md                            |  8 +-
 libs/oci/docs/API_REFERENCE.md                | 12 +--
 libs/oci/docs/CHANGELOG.md                    | 49 ------------
 libs/oci/docs/MODELS.md                       |  4 +-
 .../tutorials/01-getting-started/README.md    | 12 ++-
 .../02-vision-and-multimodal/README.md        |  2 +-
 .../tutorials/09-provider-deep-dive/README.md | 79 +++++++++++++++----
 libs/oci/tutorials/README.md                  | 47 +++++------
 9 files changed, 120 insertions(+), 172 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4b0f46d6..8c921411 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -8,7 +8,6 @@ We welcome your contributions! There are multiple ways to contribute.
 - [Contributing Code](#contributing-code)
 - [Development Setup](#development-setup)
 - [Architecture Overview](#architecture-overview)
-- [Adding a New Provider](#adding-a-new-provider)
 - [Testing](#testing)
 - [Pull Request Process](#pull-request-process)
 - [Code of Conduct](#code-of-conduct)
@@ -87,19 +86,17 @@ poetry run pytest --cov=langchain_oci --cov-report=html
 
 ### Code Quality
 
+Use the Makefile targets for consistent code quality checks:
+
 ```bash
 # Format code
-poetry run black .
-poetry run isort .
-
-# Lint
-poetry run ruff check .
+make format
 
-# Type check
-poetry run mypy langchain_oci
+# Lint (runs ruff check, ruff format --diff, mypy)
+make lint
 
-# All checks
-make lint  # If Makefile is available
+# Run all tests
+make test
 ```
 
 ---
@@ -153,68 +150,6 @@ Each provider handles:
 
 ---
 
-## Adding a New Provider
-
-### Step 1: Create Provider Class
-
-```python
-# libs/oci/langchain_oci/chat_models/providers/my_provider.py
-
-from langchain_oci.chat_models.providers.base import Provider
-
-class MyProvider(Provider):
-    """Provider for MyModel."""
-
-    stop_sequence_key: str = "stop"
-
-    def __init__(self) -> None:
-        from oci.generative_ai_inference import models
-        # Initialize OCI model classes
-        self.oci_chat_request = models.GenericChatRequest
-        # ... other initializations
-
-    def messages_to_oci_params(self, messages, **kwargs):
-        """Convert LangChain messages to OCI format."""
-        # Implementation
-
-    def chat_response_to_text(self, response):
-        """Extract text from response."""
-        # Implementation
-
-    def convert_to_oci_tool(self, tool):
-        """Convert tool to OCI format."""
-        # Implementation
-```
-
-### Step 2: Register Provider
-
-In `oci_generative_ai.py`, add detection:
-
-```python
-def _get_provider(self) -> Provider:
-    if self.model_id.startswith("mymodel."):
-        return MyProvider()
-    # ... existing logic
-```
-
-### Step 3: Add Tests
-
-```python
-# tests/unit/test_my_provider.py
-
-def test_my_provider_message_conversion():
-    provider = MyProvider()
-    messages = [HumanMessage(content="Hello")]
-    params = provider.messages_to_oci_params(messages)
-    assert "messages" in params
-```
-
-### Step 4: Update Documentation
-
-- Add to `docs/MODELS.md`
-- Add to feature matrix in README
-- Create tutorial examples if significant
-
 ---
 
 ## Testing
diff --git a/libs/oci/README.md b/libs/oci/README.md
index 4a48a870..6295efb2 100644
--- a/libs/oci/README.md
+++ b/libs/oci/README.md
@@ -140,13 +140,15 @@ llm = ChatOCIGenAI(
 
 ### Provider Matrix
 
-| Provider | Models | Features |
-|----------|--------|----------|
+> **Note:** For the most current model list, see the [OCI Generative AI documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/pretrained-models.htm).
+
+| Provider | Example Models | Features |
+|----------|----------------|----------|
 | **Meta** | Llama 3.2, 3.3, 4 | Vision, parallel tools |
 | **Google** | Gemini 2.0/2.5 Flash | Multimodal (PDF, video, audio) |
 | **xAI** | Grok 4 | Vision, reasoning |
 | **Cohere** | Command R+, Command A | RAG, vision (V2) |
-| **OpenAI** | GPT-4, o1 | Reasoning |
+| **OpenAI** | GPT-4, o1 | Reasoning (via ChatOCIOpenAI) |
 | **Mistral** | Mistral models | Fast inference |
 
 ---
diff --git a/libs/oci/docs/API_REFERENCE.md b/libs/oci/docs/API_REFERENCE.md
index cb7866a8..31dcf619 100644
--- a/libs/oci/docs/API_REFERENCE.md
+++ b/libs/oci/docs/API_REFERENCE.md
@@ -222,8 +222,7 @@ Load image file for vision models.
 from langchain_oci import load_image
 
 content_block = load_image(
-    path: str,                              # Path to image file
-    detail: str = "auto",                   # "auto", "low", "high"
+    file_path: str,                         # Path to image file
 )
 # Returns: Dict with type="image_url" and base64 data
 ```
@@ -236,10 +235,10 @@ Encode bytes as image content.
 from langchain_oci import encode_image
 
 content_block = encode_image(
-    data: bytes,                            # Raw image bytes
-    mime_type: str,                         # "image/jpeg", "image/png", etc.
-    detail: str = "auto",
+    image_bytes: bytes,                     # Raw image bytes
+    mime_type: str = "image/png",           # "image/jpeg", "image/png", etc.
 )
+# Returns: Dict with type="image_url" and base64 data
 ```
 
 ### to_data_uri
@@ -250,7 +249,8 @@ Convert image to data URI string.
 from langchain_oci import to_data_uri
 
 uri = to_data_uri(
-    path: str,                              # Path to image file
+    image: Union[str, bytes, Path],         # File path, bytes, or existing data URI
+    mime_type: str = "image/png",           # MIME type (used when image is bytes)
 )
 # Returns: "data:image/jpeg;base64,..."
 ```
diff --git a/libs/oci/docs/CHANGELOG.md b/libs/oci/docs/CHANGELOG.md
index 5f477bed..de9cf07d 100644
--- a/libs/oci/docs/CHANGELOG.md
+++ b/libs/oci/docs/CHANGELOG.md
@@ -87,55 +87,6 @@ All notable changes to langchain-oci are documented here.
 
 ---
 
-## Migration Guide
-
-### From 0.1.x to 0.2.x
-
-#### Vision Support
-
-```python
-# Old: No vision support
-# New: Use load_image()
-from langchain_oci import ChatOCIGenAI, load_image
-from langchain_core.messages import HumanMessage
-
-llm = ChatOCIGenAI(model_id="meta.llama-3.2-90b-vision-instruct", ...)
-
-message = HumanMessage(content=[
-    {"type": "text", "text": "Describe this image."},
-    load_image("photo.jpg"),
-])
-response = llm.invoke([message])
-```
-
-#### Agents
-
-```python
-# Old: Manual agent setup
-# New: Use create_oci_agent()
-from langchain_oci import create_oci_agent
-
-agent = create_oci_agent(
-    model_id="meta.llama-4-scout-17b-16e-instruct",
-    tools=[my_tool],
-    compartment_id="...",
-    service_endpoint="...",
-)
-```
-
-#### Parallel Tools
-
-```python
-# Old: Sequential tool calls only
-# New: Enable parallel calls (Llama 4+)
-llm_with_tools = llm.bind_tools(
-    [tool1, tool2],
-    parallel_tool_calls=True,
-)
-```
-
----
-
 ## Deprecations
 
 None currently planned.
diff --git a/libs/oci/docs/MODELS.md b/libs/oci/docs/MODELS.md
index d748bab1..78a8bbb5 100644
--- a/libs/oci/docs/MODELS.md
+++ b/libs/oci/docs/MODELS.md
@@ -1,6 +1,8 @@
 # Model Reference
 
-Complete reference of models available in OCI Generative AI.
+> **Note:** OCI Generative AI models are updated frequently. For the most current and comprehensive model list, refer to the [OCI Generative AI documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/pretrained-models.htm).
+>
+> This document provides examples and guidance for using models with langchain-oci.
 
 ## Chat Models
 
diff --git a/libs/oci/tutorials/01-getting-started/README.md b/libs/oci/tutorials/01-getting-started/README.md
index 4f4e75ea..7e3d88ab 100644
--- a/libs/oci/tutorials/01-getting-started/README.md
+++ b/libs/oci/tutorials/01-getting-started/README.md
@@ -156,10 +156,12 @@ llm = ChatOCIGenAI(
 
 ## Part 4: Choosing a Provider & Model
 
-### Available Providers
+### Available Providers (Examples)
 
-| Provider | Models | Strengths |
-|----------|--------|-----------|
+> **Note:** This is not a comprehensive list. See the [OCI Generative AI documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/pretrained-models.htm) for all available models.
+
+| Provider | Example Models | Strengths |
+|----------|----------------|-----------|
 | **Meta** | Llama 3.2, 3.3, 4 | Excellent general-purpose, tool calling |
 | **Cohere** | Command R+, Command A | RAG, document processing |
 | **Google** | Gemini 2.0 Flash, 2.5 | Multimodal (PDF, video, audio) |
@@ -269,10 +271,12 @@ llm = ChatOCIGenAI(
 | Parameter | Range | Effect |
 |-----------|-------|--------|
 | `temperature` | 0.0 - 1.0 | Higher = more creative, lower = more focused |
-| `max_tokens` | 1 - 4096+ | Maximum tokens in the response |
+| `max_tokens` | Model-dependent | Maximum tokens in the response (varies by model) |
 | `top_p` | 0.0 - 1.0 | Nucleus sampling cutoff |
 | `top_k` | 1 - 500 | Number of top tokens to consider |
 
+> **Note:** Maximum token limits vary by model. Check the [OCI Generative AI documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/pretrained-models.htm) for specific model limits.
+
 ---
 
 ## Summary
diff --git a/libs/oci/tutorials/02-vision-and-multimodal/README.md b/libs/oci/tutorials/02-vision-and-multimodal/README.md
index 8b2e73bf..1453144a 100644
--- a/libs/oci/tutorials/02-vision-and-multimodal/README.md
+++ b/libs/oci/tutorials/02-vision-and-multimodal/README.md
@@ -35,7 +35,7 @@ Not all models can process images. Here are the vision-capable models available
 ### Model Registry
 
 ```python
-from langchain_oci.utils.vision import VISION_MODELS
+from langchain_oci import VISION_MODELS
 
 print(VISION_MODELS)
 ```
diff --git a/libs/oci/tutorials/09-provider-deep-dive/README.md b/libs/oci/tutorials/09-provider-deep-dive/README.md
index 5acb1fd0..657fd76a 100644
--- a/libs/oci/tutorials/09-provider-deep-dive/README.md
+++ b/libs/oci/tutorials/09-provider-deep-dive/README.md
@@ -9,6 +9,7 @@ Understand the provider architecture and master provider-specific features.
 - Use Google Gemini multimodal capabilities
 - Work with Cohere Command models (RAG, V2 API)
 - Leverage xAI Grok reasoning features
+- Use OpenAI models via ChatOCIOpenAI
 - Handle provider-specific quirks
 
 ## Prerequisites
@@ -64,11 +65,12 @@ The provider system abstracts model-specific behaviors behind a common interface
 ```
 Provider (base)
 ├── GenericProvider (Meta, xAI, OpenAI, Mistral)
-│   ├── MetaProvider (Llama-specific)
 │   └── GeminiProvider (Gemini-specific)
 └── CohereProvider (Cohere-specific)
 ```
 
+> **Note:** `MetaProvider` is deprecated. Use `GenericProvider` (or let the library auto-detect) for Llama models.
+
 ### Auto-Detection
 
 Providers are auto-detected from model IDs:
@@ -213,7 +215,6 @@ Gemini offers advanced multimodal capabilities.
 | Model | Features |
 |-------|----------|
 | `google.gemini-2.5-flash` | Fast, multimodal |
-| `google.gemini-2.5-flash` | Latest, multimodal |
 | `google.gemini-2.5-pro` | Most capable |
 
 ### Basic Usage
@@ -473,20 +474,69 @@ print(response.content)
 
 ---
 
-## Part 6: Provider Comparison
+## Part 6: OpenAI Models (via ChatOCIOpenAI)
+
+For OpenAI models deployed in OCI, use `ChatOCIOpenAI` instead of `ChatOCIGenAI`.
+
+### Available Models
+
+| Model | Features |
+|-------|----------|
+| `openai.gpt-4.1` | Tools, reasoning |
+| `openai.o1` | Advanced reasoning |
+
+### Basic Usage
+
+```python
+from langchain_oci import ChatOCIOpenAI
+from oci.auth.signers import get_resource_principals_signer
+
+# Using resource principal auth (OCI Functions, Jobs)
+signer = get_resource_principals_signer()
+
+llm = ChatOCIOpenAI(
+    auth=signer,
+    compartment_id="ocid1.compartment.oc1..xxx",
+    model="openai.gpt-4.1",
+    region="us-chicago-1",
+)
+
+response = llm.invoke("What are the benefits of cloud computing?")
+print(response.content)
+```
+
+### With Conversation Store
+
+OpenAI models support persistent conversation memory:
+
+```python
+llm = ChatOCIOpenAI(
+    auth=signer,
+    compartment_id="ocid1.compartment.oc1..xxx",
+    model="openai.gpt-4.1",
+    conversation_store_id="ocid1.generativeaiagentconversation.oc1..xxx",
+    region="us-chicago-1",
+)
+```
+
+> **Note:** `ChatOCIOpenAI` uses the OpenAI Responses API and has different initialization parameters than `ChatOCIGenAI`.
+
+---
+
+## Part 7: Provider Comparison
 
 ### Feature Matrix
 
-| Feature | Meta | Gemini | Cohere | xAI |
-|---------|------|--------|--------|-----|
-| Vision | ✅ Llama 3.2 | ✅ All | ✅ V2/DAC | ✅ |
-| PDF | ❌ | ✅ | ❌ | ❌ |
-| Video | ❌ | ✅ | ❌ | ❌ |
-| Audio | ❌ | ✅ | ❌ | ❌ |
-| Parallel Tools | ✅ Llama 4+ | ❌ | ❌ | ❌ |
-| Citations | ❌ | ❌ | ✅ | ❌ |
-| Reasoning | ❌ | ❌ | ❌ | ✅ |
-| tool_choice | ✅ | ✅ | ❌ | ✅ |
+| Feature | Meta | Gemini | Cohere | xAI | OpenAI |
+|---------|------|--------|--------|-----|--------|
+| Vision | ✅ Llama 3.2 | ✅ All | ✅ V2/DAC | ✅ | ❌ |
+| PDF | ❌ | ✅ | ❌ | ❌ | ❌ |
+| Video | ❌ | ✅ | ❌ | ❌ | ❌ |
+| Audio | ❌ | ✅ | ❌ | ❌ | ❌ |
+| Parallel Tools | ✅ Llama 4+ | ❌ | ❌ | ❌ | ✅ |
+| Citations | ❌ | ❌ | ✅ | ❌ | ❌ |
+| Reasoning | ❌ | ❌ | ❌ | ✅ | ✅ o1 |
+| tool_choice | ✅ | ✅ | ❌ | ✅ | ✅ |
 
 ### Performance Characteristics
 
@@ -496,10 +546,11 @@ print(response.content)
 | Gemini Flash | Very Low | Very High | Multimodal, speed |
 | Cohere Command | Medium | Medium | RAG, search |
 | xAI Grok | Medium | Medium | Reasoning tasks |
+| OpenAI GPT-4 | Medium | Medium | General tasks, tools |
 
 ---
 
-## Part 7: Best Practices
+## Part 8: Best Practices
 
 ### Choosing a Provider
 
diff --git a/libs/oci/tutorials/README.md b/libs/oci/tutorials/README.md
index 6518b2c7..202f7304 100644
--- a/libs/oci/tutorials/README.md
+++ b/libs/oci/tutorials/README.md
@@ -13,42 +13,45 @@ Welcome to the langchain-oci tutorials! These tutorials will take you from begin
                     │     Started     │  First chat, provider intro
                     └────────┬────────┘
                              │
-              ┌──────────────┴──────────────┐
-              ▼                              ▼
-    ┌─────────────────┐            ┌─────────────────┐
-    │  02. Vision &   │            │  03. Building   │  create_oci_agent()
-    │   Multimodal    │            │    AI Agents    │  checkpointing
-    └────────┬────────┘            └────────┬────────┘
-             │                              │
-             └──────────────┬───────────────┘
+                             ▼
+                   ┌─────────────────┐
+                   │  02. Vision &   │  Images, PDFs, video, audio
+                   │   Multimodal    │
+                   └────────┬────────┘
                             │
                        INTERMEDIATE
                             │
               ┌─────────────┴─────────────┐
               ▼                            ▼
     ┌─────────────────┐          ┌─────────────────┐
-    │  04. Tool       │          │  05. Structured │
-    │     Calling     │          │     Output      │
+    │  03. Building   │          │  04. Tool       │
+    │    AI Agents    │          │     Calling     │
     └────────┬────────┘          └────────┬────────┘
              │                            │
              └─────────────┬──────────────┘
                            │
-                      ADVANCED
-                           │
                            ▼
+                 ┌─────────────────┐
+                 │  05. Structured │
+                 │     Output      │
+                 └────────┬────────┘
+                          │
+                      ADVANCED
+                          │
+                          ▼
                  ┌──────────────┐
                  │ 07. Async &  │
                  │ Production   │
                  └──────────────┘
-                           │
-                    SPECIALIZED
-                           │
-              ┌────────────┴────────────┐
-              ▼                          ▼
-    ┌─────────────────┐        ┌─────────────────┐
-    │ 09. Provider    │        │ 10. Embeddings  │
-    │    Deep Dive    │        │  Text & Image   │
-    └─────────────────┘        └─────────────────┘
+                          │
+                   SPECIALIZED
+                          │
+              ┌───────────┴───────────┐
+              ▼                        ▼
+    ┌─────────────────┐      ┌─────────────────┐
+    │ 09. Provider    │      │ 10. Embeddings  │
+    │    Deep Dive    │      │  Text & Image   │
+    └─────────────────┘      └─────────────────┘
 ```
 
 ## Tutorial Index
@@ -57,7 +60,7 @@ Welcome to the langchain-oci tutorials! These tutorials will take you from begin
 |---|----------|-------|-------------|
 | 01 | [Getting Started](./01-getting-started/) | Beginner | Authentication, ChatOCIGenAI, providers |
 | 02 | [Vision & Multimodal](./02-vision-and-multimodal/) | Beginner | Image analysis, PDF, video, audio |
-| 03 | [Building AI Agents](./03-building-ai-agents/) | Beginner | ReAct agents, tools, memory |
+| 03 | [Building AI Agents](./03-building-ai-agents/) | Intermediate | ReAct agents, tools, memory |
 | 04 | [Tool Calling Mastery](./04-tool-calling-mastery/) | Intermediate | bind_tools, parallel calls, workflows |
 | 05 | [Structured Output](./05-structured-output/) | Intermediate | Pydantic schemas, JSON modes |
 | 07 | [Async for Production](./07-async-for-production/) | Advanced | ainvoke, astream, FastAPI |

From 8c1f8b9507d2adfcc7af7cb0c89735412df91877 Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Fri, 27 Feb 2026 17:22:32 -0500
Subject: [PATCH 14/16] Address PR review feedback for documentation

Changes:
- Remove Async Support from CHANGELOG 0.2.0 (in separate PR #147)
- Fix duplicate gemini-2.5-flash row, add gemini-2.5-flash-lite
- Add OpenAI column to feature matrix in MODELS.md
- Add note that feature matrix reflects tested capabilities
- Fix top_k range to note it's model-dependent (not 1-500 for all)
- Add ChatOCIOpenAI to provider table in Tutorial 09
- Fix OpenAI Vision capability (GPT-4o supports vision)
---
 libs/oci/docs/CHANGELOG.md                    |  7 +---
 libs/oci/docs/MODELS.md                       | 32 ++++++++++---------
 .../tutorials/01-getting-started/README.md    |  4 +--
 .../tutorials/09-provider-deep-dive/README.md |  5 +--
 4 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/libs/oci/docs/CHANGELOG.md b/libs/oci/docs/CHANGELOG.md
index de9cf07d..7b47a35d 100644
--- a/libs/oci/docs/CHANGELOG.md
+++ b/libs/oci/docs/CHANGELOG.md
@@ -38,11 +38,6 @@ All notable changes to langchain-oci are documented here.
 - `IMAGE_EMBEDDING_MODELS` registry
 - Support for `cohere.embed-v4.0` multimodal embeddings
 
-#### Async Support
-- Full async support via LangChain base classes
-- `ainvoke()`, `astream()`, `abatch()` methods
-- Async support for `ChatOCIModelDeployment`
-
 #### Providers
 - `GeminiProvider` with `max_output_tokens` → `max_tokens` mapping
 - Enhanced `CohereProvider` with V2 API support
@@ -82,7 +77,7 @@ All notable changes to langchain-oci are documented here.
 
 | Version | Date | Highlights |
 |---------|------|------------|
-| 0.2.0 | 2025 | Vision, agents, parallel tools, async |
+| 0.2.0 | 2025 | Vision, agents, parallel tools, embeddings |
 | 0.1.0 | 2024 | Initial release |
 
 ---
diff --git a/libs/oci/docs/MODELS.md b/libs/oci/docs/MODELS.md
index 78a8bbb5..6c728622 100644
--- a/libs/oci/docs/MODELS.md
+++ b/libs/oci/docs/MODELS.md
@@ -29,7 +29,7 @@
 | Model ID | Type | Features | Context |
 |----------|------|----------|---------|
 | `google.gemini-2.5-flash` | Multimodal | PDF, video, audio, image | 1M |
-| `google.gemini-2.5-flash` | Multimodal | PDF, video, audio, image | 1M |
+| `google.gemini-2.5-flash-lite` | Multimodal | Fast, cost-efficient | 1M |
 | `google.gemini-2.5-pro` | Multimodal | Most capable | 1M |
 
 **Key Features:**
@@ -122,20 +122,22 @@ vector = embeddings.embed_image("photo.jpg")
 
 ## Feature Matrix
 
-| Feature | Meta | Gemini | Cohere | xAI |
-|---------|------|--------|--------|-----|
-| Text Generation | ✅ | ✅ | ✅ | ✅ |
-| Vision (Images) | ✅ 3.2 | ✅ | ✅ DAC | ✅ |
-| PDF Processing | ❌ | ✅ | ❌ | ❌ |
-| Video Analysis | ❌ | ✅ | ❌ | ❌ |
-| Audio Analysis | ❌ | ✅ | ❌ | ❌ |
-| Tool Calling | ✅ | ✅ | ✅ | ✅ |
-| Parallel Tools | ✅ 4+ | ❌ | ❌ | ❌ |
-| `tool_choice` | ✅ | ✅ | ❌ | ✅ |
-| Citations | ❌ | ❌ | ✅ | ❌ |
-| Reasoning Content | ❌ | ❌ | ❌ | ✅ |
-| Streaming | ✅ | ✅ | ✅ | ✅ |
-| Async | ✅ | ✅ | ✅ | ✅ |
+> **Note:** This matrix shows tested capabilities as of documentation date. See [OCI docs](https://docs.oracle.com/en-us/iaas/Content/generative-ai/pretrained-models.htm) for latest features.
+
+| Feature | Meta | Gemini | Cohere | xAI | OpenAI |
+|---------|------|--------|--------|-----|--------|
+| Text Generation | ✅ | ✅ | ✅ | ✅ | ✅ |
+| Vision (Images) | ✅ 3.2 | ✅ | ✅ DAC | ✅ | ✅ |
+| PDF Processing | ❌ | ✅ | ❌ | ❌ | ❌ |
+| Video Analysis | ❌ | ✅ | ❌ | ❌ | ❌ |
+| Audio Analysis | ❌ | ✅ | ❌ | ❌ | ❌ |
+| Tool Calling | ✅ | ✅ | ✅ | ✅ | ✅ |
+| Parallel Tools | ✅ 4+ | ❌ | ❌ | ❌ | ✅ |
+| `tool_choice` | ✅ | ✅ | ❌ | ✅ | ✅ |
+| Citations | ❌ | ❌ | ✅ | ❌ | ❌ |
+| Reasoning Content | ❌ | ❌ | ❌ | ✅ | ✅ o1 |
+| Streaming | ✅ | ✅ | ✅ | ✅ | ✅ |
+| Async | ✅ | ✅ | ✅ | ✅ | ✅ |
 
 ---
 
diff --git a/libs/oci/tutorials/01-getting-started/README.md b/libs/oci/tutorials/01-getting-started/README.md
index 7e3d88ab..d99ec968 100644
--- a/libs/oci/tutorials/01-getting-started/README.md
+++ b/libs/oci/tutorials/01-getting-started/README.md
@@ -271,9 +271,9 @@ llm = ChatOCIGenAI(
 | Parameter | Range | Effect |
 |-----------|-------|--------|
 | `temperature` | 0.0 - 1.0 | Higher = more creative, lower = more focused |
-| `max_tokens` | Model-dependent | Maximum tokens in the response (varies by model) |
+| `max_tokens` | Model-dependent | Maximum tokens in the response (see [OCI docs](https://docs.oracle.com/en-us/iaas/Content/generative-ai/pretrained-models.htm)) |
 | `top_p` | 0.0 - 1.0 | Nucleus sampling cutoff |
-| `top_k` | 1 - 500 | Number of top tokens to consider |
+| `top_k` | Model-dependent | Number of top tokens to consider (typically 1-500, varies by model) |
 
 > **Note:** Maximum token limits vary by model. Check the [OCI Generative AI documentation](https://docs.oracle.com/en-us/iaas/Content/generative-ai/pretrained-models.htm) for specific model limits.
 
diff --git a/libs/oci/tutorials/09-provider-deep-dive/README.md b/libs/oci/tutorials/09-provider-deep-dive/README.md
index 657fd76a..c47c0fd2 100644
--- a/libs/oci/tutorials/09-provider-deep-dive/README.md
+++ b/libs/oci/tutorials/09-provider-deep-dive/README.md
@@ -22,10 +22,11 @@ Understand the provider architecture and master provider-specific features.
 
 | Provider | Key Features |
 |----------|--------------|
-| `GenericProvider` | Base for Meta, xAI, OpenAI, Mistral |
+| `GenericProvider` | Base for Meta, xAI, Mistral |
 | `MetaProvider` | Llama 3.2/3.3/4, vision, parallel tools |
 | `GeminiProvider` | Multimodal (PDF, video, audio) |
 | `CohereProvider` | RAG, citations, V2 vision API |
+| `ChatOCIOpenAI` | OpenAI models (gpt-4.1, o1), conversation stores |
 
 ---
 
@@ -529,7 +530,7 @@ llm = ChatOCIOpenAI(
 
 | Feature | Meta | Gemini | Cohere | xAI | OpenAI |
 |---------|------|--------|--------|-----|--------|
-| Vision | ✅ Llama 3.2 | ✅ All | ✅ V2/DAC | ✅ | ❌ |
+| Vision | ✅ Llama 3.2 | ✅ All | ✅ V2/DAC | ✅ | ✅ GPT-4o |
 | PDF | ❌ | ✅ | ❌ | ❌ | ❌ |
 | Video | ❌ | ✅ | ❌ | ❌ | ❌ |
 | Audio | ❌ | ✅ | ❌ | ❌ | ❌ |

From e25fcda6ee16a177f0832b60536cfbdce674f75e Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Fri, 27 Feb 2026 17:31:55 -0500
Subject: [PATCH 15/16] Add note pointing to ChatOCIOpenAI in Tutorial 01
 provider table

---
 libs/oci/tutorials/01-getting-started/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libs/oci/tutorials/01-getting-started/README.md b/libs/oci/tutorials/01-getting-started/README.md
index d99ec968..9ed233a3 100644
--- a/libs/oci/tutorials/01-getting-started/README.md
+++ b/libs/oci/tutorials/01-getting-started/README.md
@@ -167,6 +167,8 @@ llm = ChatOCIGenAI(
 | **Google** | Gemini 2.0 Flash, 2.5 | Multimodal (PDF, video, audio) |
 | **xAI** | Grok 4 | Fast reasoning, vision |
 
+> **OpenAI Models:** For OpenAI models (GPT-4.1, o1, etc.), use `ChatOCIOpenAI` instead of `ChatOCIGenAI`. See [Tutorial 08: OpenAI Responses API](../08-openai-responses-api/).
+
 ### Popular Model IDs
 
 ```python

From 22443fea0145939c739075ea628d0993a8a66e7b Mon Sep 17 00:00:00 2001
From: Federico Kamelhar <federico.kamelhar@oracle.com>
Date: Fri, 27 Feb 2026 17:50:15 -0500
Subject: [PATCH 16/16] fix: use correct content format for PDF and video in
 tutorials

The multimodal tutorials were using an unsupported 'media' content type.
Changed to use the correct formats:
- PDF: document_url with data URL
- Video: video_url with data URL

This matches the provider's supported content types (text, image_url,
document_url, video_url, audio_url) and fixes runtime errors when
users try to run these tutorials.

Tested with actual PDF and video files against Gemini 2.5 Flash.
---
 .../02-vision-and-multimodal/code/pdf_processing.py        | 7 +++++--
 .../02-vision-and-multimodal/code/video_analysis.py        | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py b/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py
index 0fe75ffb..7c3e008e 100644
--- a/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py
+++ b/libs/oci/tutorials/02-vision-and-multimodal/code/pdf_processing.py
@@ -25,11 +25,14 @@ def analyze_pdf(pdf_path: str, prompt: str):
     with open(pdf_path, "rb") as f:
         pdf_data = base64.b64encode(f.read()).decode("utf-8")
 
-    # Create message with PDF
+    # Create message with PDF using document_url format
     message = HumanMessage(
         content=[
             {"type": "text", "text": prompt},
-            {"type": "media", "data": pdf_data, "mime_type": "application/pdf"},
+            {
+                "type": "document_url",
+                "document_url": {"url": f"data:application/pdf;base64,{pdf_data}"},
+            },
         ]
     )
 
diff --git a/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py b/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py
index 1119cf89..168269b4 100644
--- a/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py
+++ b/libs/oci/tutorials/02-vision-and-multimodal/code/video_analysis.py
@@ -37,11 +37,14 @@ def analyze_video(video_path: str, prompt: str):
     else:
         mime_type = "video/mp4"  # Default
 
-    # Create message with video
+    # Create message with video using video_url format
     message = HumanMessage(
         content=[
             {"type": "text", "text": prompt},
-            {"type": "media", "data": video_data, "mime_type": mime_type},
+            {
+                "type": "video_url",
+                "video_url": {"url": f"data:{mime_type};base64,{video_data}"},
+            },
         ]
     )