docs(ai): add upscaling pipeline (#585)

rickstaa · web-flow · commit 938abb231abd · 2024-06-10T21:11:10.000+02:00
* docs(ai): add upscaling pipeline

This commit adds the documentation for the new upscaling pipeline to the
docs.

* docs(ai): add nsfw to pipeline response

This commit adds the new nsfw parameter in the response displayed in the
pipeline documentation.

* docs(ai): fix upscale diagram image path

This commit ensures that the right image path is used in the mermaid
graph.
diff --git a/ai/api-reference/ai-openapi-schema.yml b/ai/api-reference/ai-openapi-schema.yml
@@ -118,6 +118,43 @@ paths:
                 $ref: '#/components/schemas/HTTPValidationError'
       security:
       - HTTPBearer: []
+  /upscale:
+    post:
+      summary: Upscale
+      operationId: upscale
+      requestBody:
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/Body_upscale_upscale_post'
+        required: true
+      responses:
+        '200':
+          description: Successful Response
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ImageResponse'
+        '400':
+          description: Bad Request
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPError'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPError'
+        '422':
+          description: Validation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/HTTPValidationError'
+      security:
+      - HTTPBearer: []
 components:
   schemas:
     APIError:
@@ -217,6 +254,32 @@ components:
       - image
       - model_id
       title: Body_image_to_video_image_to_video_post
+    Body_upscale_upscale_post:
+      properties:
+        prompt:
+          type: string
+          title: Prompt
+        image:
+          type: string
+          format: binary
+          title: Image
+        model_id:
+          type: string
+          title: Model Id
+          default: ''
+        safety_check:
+          type: boolean
+          title: Safety Check
+          default: true
+        seed:
+          type: integer
+          title: Seed
+      type: object
+      required:
+      - prompt
+      - image
+      - model_id
+      title: Body_upscale_upscale_post
     HTTPError:
       properties:
         detail:
diff --git a/ai/api-reference/upscale.mdx b/ai/api-reference/upscale.mdx
@@ -0,0 +1,21 @@
+---
+openapi: post /upscale
+---
+
+<Info>
+  The public [Livepeer.cloud](https://www.livepeer.cloud/) Gateway used in this
+  guide is intended for experimentation and is not guaranteed for production
+  use. It is a free, non-token-gated, but rate-limited service designed for
+  testing purposes. For production-ready applications, consider setting up your
+  own Gateway node or partnering with one via the `ai-video` channel on
+  [Discord](https://discord.gg/livepeer).
+</Info>
+
+<Note>
+  Please note that the **optimal** parameters for a given model may vary
+  depending on the specific model and use case. The parameters provided in this
+  guide are not model-specific and should be used as a starting point.
+  Additionally, some models may have parameters such as `guiding_scale` and
+  `num_inference_steps` disabled by default. For more information on
+  model-specific parameters, please refer to the respective model documentation.
+</Note>
diff --git a/ai/introduction.mdx b/ai/introduction.mdx
@@ -16,11 +16,12 @@ The **AI Video Subnet**, also known as the **AI Subnet**, is the first step
 toward bringing powerful AI video capabilities into the Livepeer network. It
 enables video developers to add a rapidly growing suite of **generative AI
 features** such as [text-to-image](/ai/pipelines/text-to-image),
-[image-to-image](/ai/pipelines/image-to-image) and
-[image-to-video](/ai/pipelines/image-to-video) to their applications. Livepeer
-Node operators are able to **earn revenue by deploying their GPU resources** for
-AI processing tasks. Ready to dive in? Choose one of the cards below to
-kickstart your journey with the AI Subnet.
+[image-to-image](/ai/pipelines/image-to-image),
+[image-to-video](/ai/pipelines/image-to-video) and
+[upscaling](/ai/pipelines/upscale) to their applications. Livepeer Node
+operators are able to **earn revenue by deploying their GPU resources** for AI
+processing tasks. Ready to dive in? Choose one of the cards below to kickstart
+your journey with the AI Subnet.
 
 ## Kickstart Your Journey
 
diff --git a/ai/pipelines/image-to-image.mdx b/ai/pipelines/image-to-image.mdx
@@ -110,6 +110,7 @@ to the Gateway:
 {
   "images": [
     {
+      "nsfw": false,
       "seed": 3197613440,
       "url": "https://<gateway-ip>/stream/dd5ad78d/7adde483.png"
     }
diff --git a/ai/pipelines/image-to-video.mdx b/ai/pipelines/image-to-video.mdx
@@ -99,6 +99,7 @@ to the Gateway:
 {
   "images": [
     {
+      "nsfw": false,
       "seed": 1914955328,
       "url": "/stream/2b835716/01c0e9a6.mp4"
     }
diff --git a/ai/pipelines/text-to-image.mdx b/ai/pipelines/text-to-image.mdx
@@ -117,6 +117,7 @@ to the Gateway:
 {
   "images": [
     {
+      "nsfw": false,
       "seed": 2562822894,
       "url": "https://<gateway-ip>/stream/d0fc1fc6/8fdf5a94.png"
     }
diff --git a/ai/pipelines/upscale.mdx b/ai/pipelines/upscale.mdx
@@ -0,0 +1,121 @@
+---
+title: Upscale
+---
+
+## Overview
+
+The AI Subnet's `upscale` pipeline provides **advanced image upscaling**.
+Powered by the latest diffusion models in HuggingFace's
+[super-resolution](https://huggingface.co/docs/diffusers/en/api/pipelines/stable_diffusion/upscale)
+pipeline, it enhances the resolution of input images by a factor of 4.
+
+<div align="center">
+
+{/* TODO: Replace with relative url when mintlify fixed issue. */}
+
+```mermaid
+graph LR
+    A[<div style="width: 128px;"><img src="https://mintlify.s3-us-west-1.amazonaws.com/na-36-ai-video/images/ai/cool-cat-low-res.png" alt="Image of low resolution cat"/></div>] --> B[Gateway]
+    P[A white cat wearing sunglasses on the beach] --> B
+    B --> C[Orchestrator]
+    C --> B
+    B --> D[<div style="width: 200px;"><img src="https://mintlify.s3-us-west-1.amazonaws.com/na-36-ai-video/images/ai/cool-cat.png" alt="Image of high resolution cat"/></div>]
+```
+
+</div>
+
+## Models
+
+### Warm Models
+
+The current warm model requested for the `upscale` pipeline is:
+
+- [stabilityai/stable-diffusion-x4-upscaler](https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler):
+  A text-guided upscaling diffusion model trained on large LAION images,
+  offering enhanced resolution and controlled noise addition.
+
+<Tip>
+  For faster responses with different
+  [upscale](https://huggingface.co/docs/diffusers/en/api/pipelines/stable_diffusion/upscale)
+  compatible diffusion models, ask Orchestrators to load it on their GPU via the
+  `ai-video` channel in [Discord Server](https://discord.gg/livepeer).
+</Tip>
+
+### On-Demand Models
+
+The following models have been tested and verified for the `upscale` pipeline:
+
+<Note>
+  If a specific model you wish to use is not listed, please submit a [feature
+  request](https://github.com/livepeer/ai-worker/issues/new?assignees=&labels=enhancement%2Cmodel&projects=&template=model_request.yml)
+  on GitHub to get the model verified and added to the list.
+</Note>
+
+{/* prettier-ignore */}
+<Accordion title="Tested and Verified Diffusion Models">
+- [stabilityai/stable-diffusion-x4-upscaler](https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler):
+  A text-guided upscaling diffusion model trained on large LAION images,
+  offering enhanced resolution and controlled noise addition.
+</Accordion>
+
+## Basic Usage Instructions
+
+<Tip>
+  For a detailed understanding of the `upscale` endpoint and to experiment with
+  the API, see the [AI Subnet API Reference](/ai/api-reference/upscale).
+</Tip>
+
+To generate an image with the `upscale` pipeline, send a `POST` request to the
+Gateway's `upscale` API endpoint:
+
+```bash
+curl -X POST https://<gateway-ip>/upscale \
+    -F model_id="stabilityai/stable-diffusion-x4-upscaler" \
+    -F image=@<PATH_TO_IMAGE>/low_res_cat.png \
+    -F prompt="A white cat"
+```
+
+In this command:
+
+- `<gateway-ip>` should be replaced with your AI Gateway's IP address.
+- `model_id` is the diffusion model for image generation.
+- The `image` field holds the **absolute** path to the image file to be
+  upscaled.
+- `prompt` is a descriptive text that provides context about the content of the
+  image.
+
+For additional optional parameters, refer to the
+[AI Subnet API Reference](/ai/api-reference/upscale).
+
+After execution, the Orchestrator processes the request and returns the response
+to the Gateway:
+
+```json
+{
+  "images": [
+    {
+      "nsfw": false,
+      "seed": 3197613440,
+      "url": "https://<gateway-ip>/stream/dd5ad78d/7adde483.png"
+    }
+  ]
+}
+```
+
+The `url` in the response is the URL of the generated image. Download the image
+with:
+
+```bash
+curl -O "https://<STORAGE_ENDPOINT>/stream/dd5ad78d/7adde483.png"
+```
+
+## API Reference
+
+<Card
+  title="API Reference"
+  icon="rectangle-terminal"
+  href="/ai/api-reference/upscale"
+>
+  Explore the `upscale` endpoint and experiment with the API in the AI Subnet
+  API Reference.
+</Card>
diff --git a/images/ai/cool-cat-low-res.png b/images/ai/cool-cat-low-res.png
diff --git a/mint.json b/mint.json
@@ -520,7 +520,8 @@
             "ai/pipelines/overview",
             "ai/pipelines/text-to-image",
             "ai/pipelines/image-to-image",
-            "ai/pipelines/image-to-video"
+            "ai/pipelines/image-to-video",
+            "ai/pipelines/upscale"
           ]
         },
         {

Original file line number	Diff line number	Diff line change
`@@ -110,6 +110,7 @@ to the Gateway:`
`110`	`110`	`{`
`111`	`111`	`"images": [`
`112`	`112`	`{`
	`113`	`+ "nsfw": false,`
`113`	`114`	`"seed": 3197613440,`
`114`	`115`	`"url": "https://<gateway-ip>/stream/dd5ad78d/7adde483.png"`
`115`	`116`	`}`
Original file line number	Diff line number	Diff line change
`@@ -99,6 +99,7 @@ to the Gateway:`
`99`	`99`	`{`
`100`	`100`	`"images": [`
`101`	`101`	`{`
	`102`	`+ "nsfw": false,`
`102`	`103`	`"seed": 1914955328,`
`103`	`104`	`"url": "/stream/2b835716/01c0e9a6.mp4"`
`104`	`105`	`}`
Original file line number	Diff line number	Diff line change
`@@ -117,6 +117,7 @@ to the Gateway:`
`117`	`117`	`{`
`118`	`118`	`"images": [`
`119`	`119`	`{`
	`120`	`+ "nsfw": false,`
`120`	`121`	`"seed": 2562822894,`
`121`	`122`	`"url": "https://<gateway-ip>/stream/d0fc1fc6/8fdf5a94.png"`
`122`	`123`	`}`
Original file line number	Diff line number	Diff line change
`@@ -520,7 +520,8 @@`
`520`	`520`	`"ai/pipelines/overview",`
`521`	`521`	`"ai/pipelines/text-to-image",`
`522`	`522`	`"ai/pipelines/image-to-image",`
`523`		`- "ai/pipelines/image-to-video"`
	`523`	`+ "ai/pipelines/image-to-video",`
	`524`	`+ "ai/pipelines/upscale"`
`524`	`525`	`]`
`525`	`526`	`},`
`526`	`527`	`{`