subhraneel2005 · subhraneel2005 · Jan 31, 2026 · coderabbitai · Jan 31, 2026 · coderabbitai
diff --git a/.gitignore b/.gitignore
@@ -40,3 +40,10 @@ yarn-error.log*
 # typescript
 *.tsbuildinfo
 next-env.d.ts
+
+build
+
+# Add to .gitignore
+public/video-cache/
+public/videos/
+public/audios
diff --git a/README.md b/README.md
@@ -135,6 +135,11 @@ use cli rendering instead of ssr
 `
 npx remotion render remotion/index.ts MyVideo output.mp4
 `
+new
+`
+node -p "JSON.stringify({captions:require('./remotion/captions/captions-1769673372841.json')})" > props.json && \
+npx remotion render remotion/index.ts MyVideo output.mp4 --props=props.json
+`
 - runs directly in node.js
 - bypasses next.js completely
 - bundling and rendering work without errors

diff --git a/app/actions/generate-audio.ts b/app/actions/generate-audio.ts
@@ -4,6 +4,7 @@ import { elevenlabs } from '@ai-sdk/elevenlabs';
 import { experimental_generateSpeech as generateSpeech} from "ai"
 import { writeFile } from 'fs/promises';
 import path from 'path';
+import { mp3ToWav } from './mp3-16k_wav';
 
 export async function generateAudio(cleanedStory: string){
     // const rawText = await generateStory(genre)
@@ -14,7 +15,7 @@ export async function generateAudio(cleanedStory: string){
             model: elevenlabs.speech('eleven_flash_v2'),
             text: cleanedStory,
             voice: "cgSgspJ2msm6clMCkdW9",
-            outputFormat: "pcm_16000",
+            outputFormat: "mp3",
             providerOptions: {
                 elevenlabs: {
                   voiceSettings: {
@@ -27,10 +28,14 @@ export async function generateAudio(cleanedStory: string){
         })
 
         const buffer = Buffer.from(result.audio.base64, "base64");
-        const fileName = `story-${Date.now()}.${result.audio.format}`;
-        const filePath = path.join(process.cwd(), "public/audios", fileName);
+
+        const baseName = `story-${Date.now()}`;
+        const mp3Path = path.join(process.cwd(), "public/audios", `${baseName}.mp3`);
+        const wavPath = path.join(process.cwd(), "public/audios", `${baseName}_16k.wav`);
 
-        await writeFile(filePath, buffer);
+        await writeFile(mp3Path, buffer);
+
+        await mp3ToWav(mp3Path, wavPath);
 
 
         return {
@@ -39,7 +44,7 @@ export async function generateAudio(cleanedStory: string){
                 format: result.audio.format,
                 mediaType: result.audio.mediaType,
                 uint8Array: result.audio.uint8Array,
-                url: `/audios/${fileName}`
+                url: `/audios/${baseName}_16k.wav`,
             },
             metadata: result.providerMetadata
         }

diff --git a/app/actions/mp3-16k_wav.ts b/app/actions/mp3-16k_wav.ts
@@ -0,0 +1,17 @@
+"use server"
+
+import { execFile } from "child_process";
+import { promisify } from "util";
+
+const execFileAsync = promisify(execFile);
+
+export async function mp3ToWav(mp3Path: string, wavPath: string) {
+  await execFileAsync("ffmpeg", [
+    "-y",
+    "-i", mp3Path,
+    "-ac", "1",
+    "-ar", "16000",
+    "-c:a", "pcm_s16le",
+    wavPath,
+  ]);
-const execFileAsync = promisify(execFile);
-
-export async function mp3ToWav(mp3Path: string, wavPath: string) {
-  await execFileAsync("ffmpeg", [
-    "-y",
-    "-i", mp3Path,
-    "-ac", "1",
-    "-ar", "16000",
-    "-c:a", "pcm_s16le",
-    wavPath,
-  ]);
+const execFileAsync = promisify(execFile);
+
+export async function mp3ToWav(mp3Path: string, wavPath: string) {
+  await execFileAsync("ffmpeg", [
+    "-y",
+    "-i", mp3Path,
+    "-ac", "1",
+    "-ar", "16000",
+    "-c:a", "pcm_s16le",
+    wavPath,
+  ], { maxBuffer: 10 * 1024 * 1024 });
+}
-const execFileAsync = promisify(execFile);
-
-export async function mp3ToWav(mp3Path: string, wavPath: string) {
-  await execFileAsync("ffmpeg", [
-    "-y",
-    "-i", mp3Path,
-    "-ac", "1",
-    "-ar", "16000",
-    "-c:a", "pcm_s16le",
-    wavPath,
-  ]);
+const execFileAsync = promisify(execFile);
+
+export async function mp3ToWav(mp3Path: string, wavPath: string) {
+  await execFileAsync("ffmpeg", [
+    "-y",
+    "-i", mp3Path,
+    "-ac", "1",
+    "-ar", "16000",
+    "-c:a", "pcm_s16le",
+    wavPath,
+  ], { maxBuffer: 10 * 1024 * 1024 });
+}
+}
diff --git a/app/api/render-video/route.ts b/app/api/render-video/route.ts
@@ -0,0 +1,102 @@
+// app/api/render-video/route.ts
+import { renderMedia, selectComposition } from "@remotion/renderer";
+import path from "path";
+import fs from "fs";
+import type { Caption } from "@remotion/captions";
+import { cleanOldCache, getCachedVideo } from "@/lib/video-cache";
+
+export async function POST(request: Request) {
+  const renderData = await request.json();
+
+  // load captions
+  let captions: Caption[] = [];
+  if (renderData.captionsPath) {
+    try {
+      const captionsFullPath = path.join(process.cwd(), renderData.captionsPath);
+      const captionsContent = fs.readFileSync(captionsFullPath, "utf-8");
+      captions = JSON.parse(captionsContent);
+    } catch (error) {
+      console.error("Error loading captions:", error);
+      return Response.json(
+        { success: false, error: "Failed to load captions" },
+        { status: 500 }
+      );
+    }
+  }
+
+  // Download and cache background video if it's from Cloudinary
+  let videoUrl = renderData.videoUrl;
+  if (videoUrl && videoUrl.includes("cloudinary.com")) {
+    try {
+      videoUrl = await getCachedVideo(videoUrl);
+      console.log("Using cached video:", videoUrl);
+    } catch (error) {
+      console.error("Failed to cache video:", error);
+      return Response.json(
+        { success: false, error: "Failed to download background video" },
+        { status: 500 }
+      );
+    }
+  }
+
+  // Create the final input props
+  const inputProps = {
+    hook: renderData.hook,
+    story: renderData.story,
+    audioUrl: renderData.audioUrl,
+    videoUrl: videoUrl, // Use cached local path
+    highlightColor: renderData.highlightColor,
+    captions: captions,
+  };
+
+const bundleLocation = path.join(process.cwd(), "build");
+  const outputDir = path.join(process.cwd(), "public", "videos");
+
+  if (!fs.existsSync(outputDir)) {
+    fs.mkdirSync(outputDir, { recursive: true });
+  }
+
+  try {
+    const composition = await selectComposition({
+      serveUrl: bundleLocation,
+      id: "MyVideo",
+      inputProps: inputProps,
+      timeoutInMilliseconds: 60000,
+    });
+
+    const lastCaptionEndMs = captions.length
+  ? Math.max(...captions.map(c => c.endMs))
+  : 2000;
+
+const durationInFrames = Math.ceil((lastCaptionEndMs / 1000) * 30);
+
+// override duration
+composition.durationInFrames = durationInFrames;
+
-  try {
-    const composition = await selectComposition({
-      serveUrl: bundleLocation,
-      id: "MyVideo",
-      inputProps: inputProps,
-      timeoutInMilliseconds: 60000,
-    });
-
-    const lastCaptionEndMs = captions.length
-  ? Math.max(...captions.map(c => c.endMs))
-  : 2000;
-
-const durationInFrames = Math.ceil((lastCaptionEndMs / 1000) * 30);
-
-// override duration
-composition.durationInFrames = durationInFrames;
+  try {
+    const composition = await selectComposition({
+      serveUrl: bundleLocation,
+      id: "MyVideo",
+      inputProps: inputProps,
+      timeoutInMilliseconds: 60000,
+    });
+
+    const lastCaptionEndMs = captions.length
+  ? Math.max(...captions.map(c => c.endMs))
+  : 2000;
+
+const durationInFrames = lastCaptionEndMs
+  ? Math.ceil((lastCaptionEndMs / 1000) * composition.fps)
+  : composition.durationInFrames;
+
-  try {
-    const composition = await selectComposition({
-      serveUrl: bundleLocation,
-      id: "MyVideo",
-      inputProps: inputProps,
-      timeoutInMilliseconds: 60000,
-    });
-
-    const lastCaptionEndMs = captions.length
-  ? Math.max(...captions.map(c => c.endMs))
-  : 2000;
-
-const durationInFrames = Math.ceil((lastCaptionEndMs / 1000) * 30);
-
-// override duration
-composition.durationInFrames = durationInFrames;
+  try {
+    const composition = await selectComposition({
+      serveUrl: bundleLocation,
+      id: "MyVideo",
+      inputProps: inputProps,
+      timeoutInMilliseconds: 60000,
+    });
+
+    const lastCaptionEndMs = captions.length
+  ? Math.max(...captions.map(c => c.endMs))
+  : 2000;
+
+const durationInFrames = lastCaptionEndMs
+  ? Math.ceil((lastCaptionEndMs / 1000) * composition.fps)
+  : composition.durationInFrames;
+
+
+    const outputPath = path.join(outputDir, `story-${Date.now()}.mp4`);
+
+    await renderMedia({
+      composition,
+      serveUrl: bundleLocation,
+      codec: "h264",
+      outputLocation: outputPath,
+      inputProps: inputProps,
+      timeoutInMilliseconds: 300000,
+    });
+
+    // Clean old cache after successful render
+    cleanOldCache(7);
+
+    return Response.json({
+      success: true,
+      videoPath: `/videos/${path.basename(outputPath)}`,
+    });
+  } catch (error) {
+    console.error("Rendering error:", error);
+    return Response.json(
+      { success: false, error: "Failed to render video" },
+      { status: 500 }
+    );
+  }
+}
diff --git a/app/page.tsx b/app/page.tsx
@@ -1,16 +1,31 @@
+import { RenderButton } from "@/components/render-button";
 import ChooseHighlightColor from "@/components/screens/choose-highlight-color";
 import ChooseBGVideo from "@/components/screens/choose-stock-bg";
 import GenerateAudio from "@/components/screens/generate-audio-screen";
+import GenerateCaptions from "@/components/screens/generate-captions-screen";
 import ScriptGenerator from "@/components/screens/script-generator";
 
 
 export default function Page() {
   return (
     <div className="flex-1 mt-6 ml-6 space-y-6">
+
+      {/* correct */}
       <ScriptGenerator />
+
+      {/* correct */}
       <GenerateAudio />
+
+      {/* incorrect, captions are getting generated manually r9*/}
+      <GenerateCaptions />
+
+      {/* correct */}
       <ChooseBGVideo />
+
+      {/* correct */}
       <ChooseHighlightColor />
+
-
-      {/* correct */}
-      <ScriptGenerator />
-
-      {/* correct */}
-      <GenerateAudio />
-
-      {/* incorrect, captions are getting generated manually r9*/}
-      <GenerateCaptions />
-
-      {/* correct */}
-      <ChooseBGVideo />
-
-      {/* correct */}
-      <ChooseHighlightColor />
+      <ScriptGenerator />
+      <GenerateAudio />
+      <GenerateCaptions />
+      <ChooseBGVideo />
+      <ChooseHighlightColor />
-
-      {/* correct */}
-      <ScriptGenerator />
-
-      {/* correct */}
-      <GenerateAudio />
-
-      {/* incorrect, captions are getting generated manually r9*/}
-      <GenerateCaptions />
-
-      {/* correct */}
-      <ChooseBGVideo />
-
-      {/* correct */}
-      <ChooseHighlightColor />
+      <ScriptGenerator />
+      <GenerateAudio />
+      <GenerateCaptions />
+      <ChooseBGVideo />
+      <ChooseHighlightColor />
+      <RenderButton />
     </div>
   );
 }
diff --git a/components/render-button.tsx b/components/render-button.tsx
@@ -0,0 +1,35 @@
+'use client';
+
+import { useVideoStoryStore } from '@/store/useVideoStoryStore';
+import { useState } from 'react';
+import { Button } from './ui/button';
+
+export function RenderButton() {
+    const [isRendering, setIsRendering] = useState(false);
+    const getRenderData = useVideoStoryStore(state => state.getRenderData);
+
+    const handleRender = async () => {
+        setIsRendering(true);
+
+        // get plain json from zustand
+        const renderData = getRenderData();
+
+        // send to /api/render-video
+        const response = await fetch('/api/render-video', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify(renderData) // <-- this is plain json data no zustand
+        });
+
+        const result = await response.json();
+        console.log('Video rendered:', result.videoPath);
+
+        setIsRendering(false);
+    };
+
+    return (
+        <Button onClick={handleRender} disabled={isRendering}>
+            {isRendering ? 'Rendering...' : 'Generate Final Video'}
+        </Button>
+    );
+}
diff --git a/components/screens/generate-captions-screen.tsx b/components/screens/generate-captions-screen.tsx
@@ -0,0 +1,53 @@
+"use client";
+
+import { useVideoStoryStore } from "@/store/useVideoStoryStore";
+import { useState } from "react";
+import { toast } from "sonner";
+import { Button } from "../ui/button";
+import { generateCaptions } from "@/remotion/scripts/generate-captions";
+
+export default function GenerateCaptions() {
+    const { audioUrl, setCaptionsPath, captionsPath } = useVideoStoryStore();
+    const [loading, setLoading] = useState(false);
+
+    const handleGenerateCaptions = async () => {
+        if (!audioUrl) {
+            toast.error("Generate audio first");
+            return;
+        }
+
+        setLoading(true);
+        try {
+            const { captionsPath } = await generateCaptions(`public/${audioUrl}`);
+            setCaptionsPath(captionsPath);
+        } catch {
+            toast.error("Failed to generate captions");
+        } finally {
+            setLoading(false);
+        }
-        setLoading(true);
-        try {
-            const { captionsPath } = await generateCaptions(`public/${audioUrl}`);
-            setCaptionsPath(captionsPath);
-        } catch {
-            toast.error("Failed to generate captions");
-        } finally {
-            setLoading(false);
-        }
+        setLoading(true);
+        try {
+            // Ensure audioUrl doesn't already have public/ prefix
+            const normalizedPath = audioUrl.startsWith('public/') 
+                ? audioUrl 
+                : `public/${audioUrl}`;
+            const { captionsPath } = await generateCaptions(normalizedPath);
+            setCaptionsPath(captionsPath);
+        } catch (error) {
+            console.error("Caption generation failed:", error);
+            toast.error("Failed to generate captions");
+        } finally {
+            setLoading(false);
+        }
-        setLoading(true);
-        try {
-            const { captionsPath } = await generateCaptions(`public/${audioUrl}`);
-            setCaptionsPath(captionsPath);
-        } catch {
-            toast.error("Failed to generate captions");
-        } finally {
-            setLoading(false);
-        }
+        setLoading(true);
+        try {
+            // Ensure audioUrl doesn't already have public/ prefix
+            const normalizedPath = audioUrl.startsWith('public/') 
+                ? audioUrl 
+                : `public/${audioUrl}`;
+            const { captionsPath } = await generateCaptions(normalizedPath);
+            setCaptionsPath(captionsPath);
+        } catch (error) {
+            console.error("Caption generation failed:", error);
+            toast.error("Failed to generate captions");
+        } finally {
+            setLoading(false);
+        }
+    };
+
+    return (
+        <div className="w-full max-w-xl border border-border rounded-lg p-4 bg-secondary">
+            <span className="font-bold text-2xl">Generate captions</span>
+
+            {audioUrl ? (
+                <Button
+                    onClick={handleGenerateCaptions}
+                    disabled={loading}
+                    className="mt-3"
+                >
+                    {loading ? "Generating..." : "Generate Captions"}
+                </Button>
+            ) : (
+                <p className="text-yellow-500 mt-3">Generate audio first</p>
+            )}
+
+            {captionsPath && (
+                <p className="mt-2 text-sm text-green-500">
+                    Captions saved at: {captionsPath}
+                </p>
+            )}
+        </div>
+    );
+}
diff --git a/lib/getCaptions.ts b/lib/getCaptions.ts
@@ -1,18 +1,37 @@
+"use server"
+
 import { toCaptions, transcribe } from "@remotion/install-whisper-cpp";
 import path from "path";
+import fs from "fs";
+
+export async function getCaptions(audioPath: string) {
+  const inputPath = path.isAbsolute(audioPath)
+    ? audioPath
+    : path.join(process.cwd(), audioPath);
 
-export async function getCaptions() {
   const whisperCppOutput = await transcribe({
-    inputPath: path.join(process.cwd(), "public/audios/audio_16k.wav"),
+    inputPath,
     whisperPath: path.join(process.cwd(), "whisper.cpp"),
     whisperCppVersion: "1.5.5",
     model: "medium.en",
     tokenLevelTimestamps: true,
   });
 
-  const { captions } = toCaptions({
-    whisperCppOutput,
-  });
+  const { captions } = toCaptions({ whisperCppOutput });
+
+  const fileName = `captions-${Date.now()}.json`;
+
+  const captionsDir = path.join(process.cwd(), "remotion/captions");
+  if (!fs.existsSync(captionsDir)) {
+    fs.mkdirSync(captionsDir, { recursive: true });
+  }
+
+  const captionsPath = path.join(captionsDir, fileName);
+
+  fs.writeFileSync(captionsPath, JSON.stringify(captions, null, 2));
 
-  return captions;
-}
+
+  return {
+    captionsPath: `remotion/captions/${fileName}`,
+  };
+}