changes

abidlabs · abidlabs · commit 8abf0d20d16b · 2026-04-03T17:26:43.000-07:00
diff --git a/demo/cache_manual_demo/run.py b/demo/cache_manual_demo/run.py
@@ -0,0 +1,56 @@
+"""Demo showcasing simple manual caching with gr.Cache()."""
+
+import time
+
+import gradio as gr
+
+WEATHER_BY_CITY = {
+    "san francisco": ("Foggy", 61),
+    "new york": ("Cloudy", 72),
+    "tokyo": ("Sunny", 78),
+    "london": ("Rainy", 58),
+    "nairobi": ("Clear", 75),
+}
+
+
+def normalize_city(city: str) -> str:
+    return " ".join(city.lower().strip().split())
+
+
+def lookup_weather(city: str, c=gr.Cache()):
+    if not city.strip():
+        return "", "Enter a city name.", ""
+
+    cache_key = normalize_city(city)
+    cached = c.get(cache_key)
+    if cached is not None:
+        return cached["forecast"], "Cache hit", cache_key
+
+    time.sleep(2)
+    condition, temperature = WEATHER_BY_CITY.get(cache_key, ("Windy", 68))
+    forecast = (
+        f"{city.strip()}: {condition}, {temperature} degF.\n"
+        f"Normalized cache key: {cache_key}"
+    )
+    c.set(cache_key, forecast=forecast)
+    return forecast, "Computed and stored", cache_key
+
+
+with gr.Blocks(title="gr.Cache() Demo") as demo:
+    gr.Markdown(
+        "# `gr.Cache()` Demo\n"
+        "This demo manually caches a normalized city lookup. "
+        "Try the same city twice, or vary capitalization and spacing "
+        "to reuse the same cached result."
+    )
+
+    city = gr.Textbox(label="City", value=" San   Francisco ")
+    forecast = gr.Textbox(label="Forecast", lines=3)
+    status = gr.Textbox(label="Status")
+    cache_key = gr.Textbox(label="Cache key used")
+
+    gr.Button("Lookup").click(lookup_weather, city, [forecast, status, cache_key])
+
+
+if __name__ == "__main__":
+    demo.launch()
diff --git a/guides/04_additional-features/17_caching.md b/guides/04_additional-features/17_caching.md
@@ -2,6 +2,14 @@
 
 ML inference is often expensive: image classification, text generation, and audio synthesis can each take seconds or more. If a user submits the same inputs twice, there's no reason to re-run the model. Gradio provides two caching mechanisms: `@gr.cache` for automatic exact-match caching, and `gr.Cache()` for manual cache control inside your functions.
 
+## Demo Apps
+
+Try the caching patterns in these demos:
+
+- [`@gr.cache()` function types demo](https://github.com/gradio-app/gradio/blob/main/demo/cache_demo/run.py) - sync, async, generator, and async generator caching
+- [`gr.Cache()` manual cache demo](https://github.com/gradio-app/gradio/blob/main/demo/cache_manual_demo/run.py) - normalized manual cache keys with explicit `get` / `set`
+- [`gr.Cache()` KV cache demo](https://github.com/gradio-app/gradio/blob/main/demo/cache_kv_demo/run.py) - transformer prefix reuse with cached KV state
+
 ## `@gr.cache` — Automatic Caching
 
 Add `@gr.cache` to any function to automatically cache its results. The decorator hashes inputs by their content — two different numpy arrays with the same pixel values will produce a cache hit. Cache hits bypass the Gradio queue entirely.
@@ -81,6 +89,8 @@ def my_function(prompt, c=gr.Cache()):
 
 If a queued function gets a successful hit from `c.get(...)`, Gradio also shows a timing badge in the UI. This badge says `used cache` instead of `from cache`, because the request still ran, but part of its work was reused from `gr.Cache()`.
 
+A minimal example is available in the [`gr.Cache()` manual cache demo](https://github.com/gradio-app/gradio/blob/main/demo/cache_manual_demo/run.py).
+
 ### Why use `gr.Cache()` over a plain dict?
 
 - **Thread-safe** — built-in locking for concurrent requests
@@ -112,6 +122,8 @@ def generate(prompt, c=gr.Cache(per_session=True)):
     return output.text
 ```
 
+For a full runnable version, see the [`gr.Cache()` KV cache demo](https://github.com/gradio-app/gradio/blob/main/demo/cache_kv_demo/run.py).
+
 
 ## When to Use Caching