Skip to content

Commit b86f01d

Browse files
authored
Merge pull request #43 from ndrean/audio-transcription
#18 1) adding audio transcription
2 parents 442bb69 + 9f4cdf4 commit b86f01d

File tree

15 files changed

+1494
-428
lines changed

15 files changed

+1494
-428
lines changed

.github/workflows/ci.yml

+8-2
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,22 @@ jobs:
2727
- name: Set up Elixir
2828
uses: erlef/setup-beam@v1
2929
with:
30-
elixir-version: '1.14.2' # Define the elixir version [required]
31-
otp-version: '25.1.2' # Define the OTP version [required]
30+
elixir-version: '1.16.0' # Define the elixir version [required]
31+
otp-version: '26.2.1' # Define the OTP version [required]
32+
33+
- name: Installing ffmpeg
34+
uses: FedericoCarboni/setup-ffmpeg@v3
35+
3236
- name: Restore dependencies cache
3337
uses: actions/cache@v2
3438
with:
3539
path: deps
3640
key: ${{ runner.os }}-mix-${{ hashFiles('**/mix.lock') }}
3741
restore-keys: ${{ runner.os }}-mix-
42+
3843
- name: Install dependencies
3944
run: mix deps.get
45+
4046
- name: Run Tests
4147
run: mix coveralls.json
4248
env:

README.md

+879-213
Large diffs are not rendered by default.

assets/js/app.js

+17-11
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,11 @@ import "phoenix_html";
2020
// Establish Phoenix Socket and LiveView configuration.
2121
import { Socket } from "phoenix";
2222
import { LiveSocket } from "phoenix_live_view";
23-
import Toastify from 'toastify-js'
23+
import Toastify from "toastify-js";
24+
import Audio from "./micro.js";
2425
import topbar from "../vendor/topbar";
2526

26-
let Hooks = {};
27+
let Hooks = { Audio };
2728

2829
// Hook to track inactivity
2930
Hooks.ActivityTracker = {
@@ -36,7 +37,7 @@ Hooks.ActivityTracker = {
3637
let processHasBeenSent = false;
3738

3839
// We use the `mounted()` context to push the event. This is used in the `setTimeout` function below.
39-
let ctx = this
40+
let ctx = this;
4041

4142
// Function to reset the timer
4243
function resetInactivityTimer() {
@@ -66,22 +67,27 @@ Hooks.ActivityTracker = {
6667
// Hook to show message toast
6768
Hooks.MessageToaster = {
6869
mounted() {
69-
this.handleEvent('toast', (payload) => {
70+
this.handleEvent("toast", (payload) => {
7071
Toastify({
7172
text: payload.message,
7273
gravity: "bottom",
7374
position: "right",
7475
style: {
7576
background: "linear-gradient(to right, #f27474, #ed87b5)",
7677
},
77-
duration: 4000
78-
}).showToast();
79-
})
80-
}
81-
}
78+
duration: 4000,
79+
}).showToast();
80+
});
81+
},
82+
};
8283

83-
let csrfToken = document.querySelector("meta[name='csrf-token']").getAttribute("content");
84-
let liveSocket = new LiveSocket("/live", Socket, { hooks: Hooks, params: { _csrf_token: csrfToken } });
84+
let csrfToken = document
85+
.querySelector("meta[name='csrf-token']")
86+
.getAttribute("content");
87+
let liveSocket = new LiveSocket("/live", Socket, {
88+
hooks: Hooks,
89+
params: { _csrf_token: csrfToken },
90+
});
8591

8692
// Show progress bar on live navigation and form submits
8793
topbar.config({ barColors: { 0: "#29d" }, shadowColor: "rgba(0, 0, 0, .3)" });

assets/js/micro.js

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
export default {
2+
mounted() {
3+
let mediaRecorder,
4+
audioChunks = [];
5+
6+
// Defining the elements and styles to be used during recording
7+
// and shown on the HTML.
8+
const recordButton = document.getElementById("record"),
9+
audioElement = document.getElementById("audio"),
10+
text = document.getElementById("text"),
11+
blue = ["bg-blue-500", "hover:bg-blue-700"],
12+
pulseGreen = ["bg-green-500", "hover:bg-green-700", "animate-pulse"];
13+
14+
15+
_this = this;
16+
17+
// Adding event listener for "click" event
18+
recordButton.addEventListener("click", () => {
19+
20+
// Check if it's recording.
21+
// If it is, we stop the record and update the elements.
22+
if (mediaRecorder && mediaRecorder.state === "recording") {
23+
mediaRecorder.stop();
24+
text.textContent = "Record";
25+
}
26+
27+
// Otherwise, it means the user wants to start recording.
28+
else {
29+
navigator.mediaDevices.getUserMedia({ audio: true }).then((stream) => {
30+
31+
// Instantiate MediaRecorder
32+
mediaRecorder = new MediaRecorder(stream);
33+
mediaRecorder.start();
34+
35+
// And update the elements
36+
recordButton.classList.remove(...blue);
37+
recordButton.classList.add(...pulseGreen);
38+
text.textContent = "Stop";
39+
40+
// Add "dataavailable" event handler
41+
mediaRecorder.addEventListener("dataavailable", (event) => {
42+
audioChunks.push(event.data);
43+
});
44+
45+
// Add "stop" event handler for when the recording stops.
46+
mediaRecorder.addEventListener("stop", () => {
47+
const audioBlob = new Blob(audioChunks);
48+
// the source of the audio element
49+
audioElement.src = URL.createObjectURL(audioBlob);
50+
51+
_this.upload("speech", [audioBlob]);
52+
audioChunks = [];
53+
recordButton.classList.remove(...pulseGreen);
54+
recordButton.classList.add(...blue);
55+
});
56+
});
57+
}
58+
});
59+
},
60+
};

assets/pnpm-lock.yaml

+16
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/app/application.ex

+16-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ defmodule App.Application do
77

88
@impl true
99
def start(_type, _args) do
10+
1011
App.Models.verify_and_download_models()
1112

1213
children = [
@@ -16,16 +17,29 @@ defmodule App.Application do
1617
App.Repo,
1718
# Start the PubSub system
1819
{Phoenix.PubSub, name: App.PubSub},
20+
# Nx serving for the embedding
21+
# App.TextEmbedding,
22+
23+
# Nx serving for Speech-to-Text
24+
{Nx.Serving,
25+
serving:
26+
if Application.get_env(:app, :use_test_models) == true do
27+
App.Models.audio_serving_test()
28+
else
29+
App.Models.audio_serving()
30+
end,
31+
name: Whisper},
1932
# Nx serving for image classifier
2033
{Nx.Serving,
2134
serving:
2235
if Application.get_env(:app, :use_test_models) == true do
23-
App.Models.serving_test()
36+
App.Models.caption_serving_test()
2437
else
25-
App.Models.serving()
38+
App.Models.caption_serving()
2639
end,
2740
name: ImageClassifier},
2841
{GenMagic.Server, name: :gen_magic},
42+
2943
# Adding a supervisor
3044
{Task.Supervisor, name: App.TaskSupervisor},
3145
# Start the Endpoint (http/https)

0 commit comments

Comments
 (0)