Skip to content

Commit 5fda98c

Browse files
authored
Merge pull request #118 from chingu-voyages/prompt/ayer/speech
Prompt/ayer/speech
2 parents fd80e35 + 87f6b48 commit 5fda98c

File tree

11 files changed

+14039
-4173
lines changed

11 files changed

+14039
-4173
lines changed

backend/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"googleapis": "^148.0.0",
2121
"js-yaml": "^4.1.0",
2222
"jsonwebtoken": "^9.0.2",
23+
"multer": "^1.4.5-lts.2",
2324
"octokit": "^4.1.2",
2425
"swagger-jsdoc": "^6.2.8",
2526
"swagger-ui-express": "^5.0.1",
@@ -42,6 +43,7 @@
4243
"@types/jest": "^29.5.14",
4344
"@types/js-yaml": "^4",
4445
"@types/jsonwebtoken": "^9.0.9",
46+
"@types/multer": "^1",
4547
"@types/node": "^22.13.10",
4648
"@types/supertest": "^6.0.2",
4749
"@types/swagger-jsdoc": "^6",

backend/src/config/settings.ts

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,34 +4,35 @@ import cookieParser from "cookie-parser";
44
import session from "express-session";
55
import cors from "cors";
66

7-
import { corsOptions, securityHeaders, authMiddleware } from "../middleware/index.js";
8-
import { authRoute, promptRoute, userRoute } from "../routes/index.js";
7+
import {authMiddleware, corsOptions, securityHeaders} from "../middleware/index.js";
8+
import {audioRoute, authRoute, promptRoute, userRoute} from "../routes/index.js";
99

1010
export const configApp = async () => {
11-
const app = express();
12-
app.use(express.json());
11+
const app = express();
12+
app.use(express.json());
1313

14-
if (process.env.NODE_ENV !== "test") {
15-
const { setupSwagger } = await import("../swagger.js");
16-
setupSwagger(app);
17-
}
14+
if (process.env.NODE_ENV !== "test") {
15+
const {setupSwagger} = await import("../swagger.js");
16+
setupSwagger(app);
17+
}
1818

19-
app.use(cookieParser());
20-
app.use(securityHeaders); // Security headers for privacy-focused browsers
21-
app.use(cors(corsOptions));
19+
app.use(cookieParser());
20+
app.use(securityHeaders); // Security headers for privacy-focused browsers
21+
app.use(cors(corsOptions));
2222

23-
// Serve static files
24-
const __dirname = path.resolve();
25-
app.use(express.static(path.join(__dirname, "..", "static")));
23+
// Serve static files
24+
const __dirname = path.resolve();
25+
app.use(express.static(path.join(__dirname, "..", "static")));
2626

27-
const sessionSecret = String(process.env.SESSION_SECRET);
28-
app.use(
29-
session({ secret: sessionSecret, resave: false, saveUninitialized: true })
30-
);
27+
const sessionSecret = String(process.env.SESSION_SECRET);
28+
app.use(
29+
session({secret: sessionSecret, resave: false, saveUninitialized: true})
30+
);
3131

32-
app.use("/", authRoute);
33-
app.use("/users", authMiddleware, userRoute);
34-
app.use("/prompts", authMiddleware, promptRoute);
32+
app.use("/", authRoute);
33+
app.use("/users", authMiddleware, userRoute);
34+
app.use("/prompts", authMiddleware, promptRoute);
35+
app.use("/audio", audioRoute);
3536

36-
return app;
37+
return app;
3738
};
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import {Request, Response} from "express";
2+
import {generateGeminiAudioResponse} from "../services/geminiService.js";
3+
4+
5+
export const transcribePrompt = async (req: Request, res: Response) => {
6+
try {
7+
if (!req.file) {
8+
res.status(400).json({error: 'No audio file provided'});
9+
return;
10+
}
11+
12+
const result = await generateGeminiAudioResponse({
13+
audioBuffer: req.file.buffer,
14+
mimeType: req.file.mimetype
15+
});
16+
17+
res.json(result);
18+
} catch (error) {
19+
console.error('Audio transcription error:', error);
20+
res.status(500).json({error: 'Failed to transcribe audio'});
21+
}
22+
};

backend/src/controllers/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
export * as authController from "./authController.js";
22
export * as userController from "./userController.js";
33
export * as promptController from "./promptController.js";
4+
export * as audioController from "./audioController.js";

backend/src/routes/audioRoutes.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import {Router} from "express";
2+
import multer from 'multer';
3+
import {audioController} from "../controllers/index.js";
4+
5+
const storage = multer.memoryStorage();
6+
const upload = multer({
7+
storage,
8+
limits: {fileSize: 20 * 1024 * 1024} // 20MB limit
9+
});
10+
11+
export const audioRoute: Router = Router();
12+
13+
audioRoute.post("/transcribe", upload.single('audio'), audioController.transcribePrompt);
14+

backend/src/routes/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
export * from "./userRoutes.js";
22
export * from "./promptRoutes.js";
33
export * from "./authRoutes.js";
4+
export * from "./audioRoutes.js";

backend/src/routes/promptRoutes.ts

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -345,42 +345,6 @@ promptRoute.get("/", promptController.getAllPrompts);
345345
*/
346346
promptRoute.put("/:promptId", promptController.updateScorePrompt);
347347

348-
// /**
349-
// * @swagger
350-
// * /prompts:
351-
// * delete:
352-
// * summary: Delete all prompts for the authenticated user
353-
// * description: Removes all prompt records associated with the currently authenticated user.
354-
// * tags:
355-
// * - Prompts
356-
// * security:
357-
// * - cookieAuth: []
358-
// * responses:
359-
// * 204:
360-
// * description: All prompts successfully deleted (no content returned)
361-
// * 401:
362-
// * description: Unauthorized - user not authenticated
363-
// * content:
364-
// * application/json:
365-
// * schema:
366-
// * type: object
367-
// * properties:
368-
// * error:
369-
// * type: string
370-
// * example: Unauthorized
371-
// * 500:
372-
// * description: Internal server error
373-
// * content:
374-
// * application/json:
375-
// * schema:
376-
// * type: object
377-
// * properties:
378-
// * error:
379-
// * type: string
380-
// * example: Something went wrong
381-
// */
382-
// promptRoute.delete("/", authMiddleware, promptController.deleteAllPrompts);
383-
384348
/**
385349
* @swagger
386350
* /prompts/{promptId}:
@@ -445,4 +409,3 @@ promptRoute.put("/:promptId", promptController.updateScorePrompt);
445409
*/
446410
promptRoute.delete("/:promptId", promptController.deletePrompt);
447411

448-

backend/src/services/geminiService.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import {GoogleGenerativeAI} from "@google/generative-ai";
22
import {GeminiResponseType} from "../types/promptTypes.js";
3+
import {AudioRequest} from "../types/audioTypes.js";
34

45
// Initialize Gemini AI with the API key from environment variables
56
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
@@ -54,6 +55,39 @@ export const generateGeminiResponse = async (promptText: string): Promise<Gemini
5455
}
5556
};
5657

58+
export const generateGeminiAudioResponse = async (
59+
{ audioBuffer, mimeType }: AudioRequest
60+
): Promise<string> => {
61+
try {
62+
63+
const model = genAI.getGenerativeModel({ model: "gemini-2.0-flash" });
64+
const base64Audio = audioBuffer.toString('base64');
65+
66+
const result = await model.generateContent({
67+
contents: [{
68+
role: "user",
69+
parts: [
70+
{ text: "Generate a transcript of this audio:" },
71+
{
72+
inlineData: {
73+
data: base64Audio,
74+
mimeType
75+
}
76+
}
77+
]
78+
}]
79+
});
80+
81+
return stripMarkdown(result.response.text());
82+
83+
} catch (error) {
84+
console.error("Gemini Audio Error:", error);
85+
throw error;
86+
}
87+
};
88+
89+
90+
5791
/**
5892
* Strips Markdown formatting from the given text.
5993
*
@@ -71,3 +105,6 @@ function stripMarkdown(text: string): string {
71105
.replace(/\n{2,}/g, '\n\n') // Normalize spacing
72106
.trim(); // Remove leading/trailing whitespace
73107
}
108+
109+
110+

backend/src/types/audioTypes.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
export interface AudioRequest {
2+
audioBuffer: Buffer;
3+
mimeType: string;
4+
}

0 commit comments

Comments
 (0)