anyscale · akshay-anyscale · Jul 25, 2023 · Jul 25, 2023 · Jul 25, 2023 · Aug 15, 2023
@@ -0,0 +1,10 @@
+head_node_type:
+  name: head_node_type
+  instance_type: m5.2xlarge
+
+worker_node_types:
+- name: cpu_worker
+  instance_type: m5.2xlarge
+  min_workers: 0
+  max_workers: 2
+  use_spot: false
@@ -0,0 +1,10 @@
+head_node_type:
+  name: head_node_type
+  instance_type: n2-standard-8
+
+worker_node_types:
+- name: cpu_worker
+  instance_type: n2-standard-8
+  min_workers: 0
+  max_workers: 2
+  use_spot: false
diff --git a/templates/audio-whisper/README.md b/templates/audio-whisper/README.md
@@ -0,0 +1,5 @@
+# Audio demo with OpenAI Whisper
+
+This demo shows how to run OpenAI whisper in Anyscale Workspace.  
+And also demos the cluster environment with audio processing related dependencies like ffmpeg.  
+
diff --git a/templates/audio-whisper/sample-ch.mp3 b/templates/audio-whisper/sample-ch.mp3
diff --git a/templates/audio-whisper/sample-en.mp3 b/templates/audio-whisper/sample-en.mp3
diff --git a/templates/audio-whisper/whisper.ipynb b/templates/audio-whisper/whisper.ipynb
@@ -0,0 +1,90 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c4719f38-0166-4b80-a03f-290255b5c528",
+   "metadata": {},
+   "source": [
+    "# OpenAI whisper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aaa04be4-6913-43bb-b20f-93b2cde52acc",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import whisper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ca8fe87e-2912-46b1-8a27-5271b2ffbc9b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "model = whisper.load_model(\"base\")\n",
+    "result = model.transcribe(\"sample-en.mp3\")\n",
+    "print(result[\"text\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "50542e68-b306-4c12-ae72-a8a2b21347db",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "model = whisper.load_model(\"base\")\n",
+    "\n",
+    "# load audio and pad/trim it to fit 30 seconds\n",
+    "audio = whisper.load_audio(\"sample-ch.mp3\")\n",
+    "audio = whisper.pad_or_trim(audio)\n",
+    "\n",
+    "# make log-Mel spectrogram and move to the same device as the model\n",
+    "mel = whisper.log_mel_spectrogram(audio).to(model.device)\n",
+    "\n",
+    "# detect the spoken language\n",
+    "_, probs = model.detect_language(mel)\n",
+    "print(f\"Detected language: {max(probs, key=probs.get)}\")\n",
+    "\n",
+    "# decode the audio\n",
+    "options = whisper.DecodingOptions(fp16 = False)\n",
+    "print(options)\n",
+    "result = whisper.decode(model, mel, options)\n",
+    "\n",
+    "# print the recognized text\n",
+    "print(result.text)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}