diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8500662..e3d214a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -19,7 +19,7 @@ env: TURBO_TOKEN: ${{ secrets.TURBO_TOKEN }} jobs: - lint: + check: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -27,14 +27,10 @@ jobs: - name: Setup uses: ./tooling/github/setup - - name: Copy env - shell: bash - run: cp .env.example .env + - name: Check (lint and format) + run: pnpm check - - name: Lint - run: pnpm lint && pnpm lint:ws - - format: + typecheck: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -42,10 +38,10 @@ jobs: - name: Setup uses: ./tooling/github/setup - - name: Format - run: pnpm format + - name: Typecheck + run: pnpm typecheck - typecheck: + build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -53,5 +49,5 @@ jobs: - name: Setup uses: ./tooling/github/setup - - name: Typecheck - run: pnpm typecheck + - name: Build + run: pnpm build diff --git a/apps/localtunnel/package.json b/apps/localtunnel/package.json index 511ee8b..c0cee7c 100644 --- a/apps/localtunnel/package.json +++ b/apps/localtunnel/package.json @@ -6,7 +6,7 @@ "scripts": { "check": "biome check", "clean": "git clean -xdf .cache .next .turbo node_modules tsconfig.tsbuildinfo", - "dev": "pnpm with-env concurrently \"pnpm watch\" \"pnpm tunnel\"", + "dev": "NODE_ENV=development pnpm with-env concurrently \"pnpm watch\" \"pnpm tunnel\"", "tunnel": "pnpm with-env sh -c 'pnpm lt --port $LOCALTUNNEL_PORT --subdomain $LOCALTUNNEL_SUBDOMAIN'", "typecheck": "tsc --noEmit", "watch": "pnpm with-env tsx watch ./src/main.ts", diff --git a/apps/web/package.json b/apps/web/package.json index 4531a99..d2947cc 100644 --- a/apps/web/package.json +++ b/apps/web/package.json @@ -4,12 +4,12 @@ "private": true, "type": "module", "scripts": { - "build": "pnpm with-env next build", + "build": "NODE_ENV=production pnpm with-env next build", "check": "biome check", "clean": "git clean -xdf .cache .next .turbo node_modules", - "dev": "pnpm with-env next dev --turbopack", + "dev": "NODE_ENV=development pnpm with-env next dev --turbopack", "shadcn": "pnpm dlx shadcn@latest add && pnpm check --write --unsafe", - "start": "pnpm with-env next start", + "start": "NODE_ENV=production pnpm with-env next start", "typecheck": "tsc --noEmit", "with-env": "dotenv -e ../../.env --" }, diff --git a/apps/web/src/app/_components/not-found-editor.dynamic.tsx b/apps/web/src/app/_components/not-found-editor.dynamic.tsx new file mode 100644 index 0000000..fb23a24 --- /dev/null +++ b/apps/web/src/app/_components/not-found-editor.dynamic.tsx @@ -0,0 +1,8 @@ +import dynamic from "next/dynamic"; + +export const DynamicNotFoundEditor = dynamic( + () => import("./not-found-editor").then((mod) => mod.NotFoundEditor), + { + ssr: false, + }, +); diff --git a/apps/web/src/app/_components/not-found-editor.tsx b/apps/web/src/app/_components/not-found-editor.tsx new file mode 100644 index 0000000..8e5be7d --- /dev/null +++ b/apps/web/src/app/_components/not-found-editor.tsx @@ -0,0 +1,44 @@ +"use client"; + +import { BlockNoteView } from "@blocknote/mantine"; +import { useCreateBlockNote } from "@blocknote/react"; + +export function NotFoundEditor() { + // Creates a new editor instance. + const editor = useCreateBlockNote({ + initialContent: [ + { + content: "Journl #404", + type: "heading", + }, + { + content: "Page not found.", + type: "paragraph", + }, + { + content: "The path was linked, but the note was not.", + type: "bulletListItem", + }, + { + content: "Will investigate later... or probably forget.", + type: "bulletListItem", + }, + { + children: [ + { + content: "window.location.href = '/';", + props: { language: "json" }, + type: "codeBlock", + }, + ], + content: "This should help users find their way back...", + type: "bulletListItem", + }, + { + type: "paragraph", + }, + ], + }); + + return ; +} diff --git a/apps/web/src/app/api/webhooks/page/route.ts b/apps/web/src/app/api/webhooks/page/route.ts index ef531c8..d4d136e 100644 --- a/apps/web/src/app/api/webhooks/page/route.ts +++ b/apps/web/src/app/api/webhooks/page/route.ts @@ -1,287 +1,8 @@ -import crypto from "node:crypto"; -import { and, eq, inArray } from "@acme/db"; -import { db } from "@acme/db/client"; -import { Block, PageEmbedding, zPage } from "@acme/db/schema"; -import { openai } from "@ai-sdk/openai"; -import { embed } from "ai"; +import { zPage } from "@acme/db/schema"; import { NextResponse } from "next/server"; import { handler } from "../_lib/webhook-handler"; -// Optimal chunk size based on research: ~1500 characters with 300 character overlap -const CHUNK_SIZE = 1500; -const CHUNK_OVERLAP = 300; - -interface ChunkMetadata { - block_ids: string[]; - chunk_size: number; - chunk_overlap: number; - total_blocks_in_page: number; -} - -interface BlockWithText { - id: string; - text: string; - startPosition: number; - endPosition: number; -} - -interface TextChunkWithMetadata { - text: string; - blockIds: string[]; -} - -/** - * Chunks text into overlapping segments while tracking which blocks contribute to each chunk - */ -function chunkTextWithBlockMetadata( - blocksWithText: BlockWithText[], -): TextChunkWithMetadata[] { - // Combine all text to get the full content - const fullText = blocksWithText.map((b) => b.text).join("\n"); - - if (fullText.length <= CHUNK_SIZE) { - return [ - { - blockIds: blocksWithText.map((b) => b.id), - text: fullText, - }, - ]; - } - - const chunks: TextChunkWithMetadata[] = []; - let start = 0; - - while (start < fullText.length) { - const end = Math.min(start + CHUNK_SIZE, fullText.length); - let chunkEnd = end; - - // Try to break at sentence boundaries to preserve meaning - if (end < fullText.length) { - const sentenceBreak = fullText.lastIndexOf(". ", end); - const questionBreak = fullText.lastIndexOf("? ", end); - const exclamationBreak = fullText.lastIndexOf("! ", end); - - const lastSentenceBreak = Math.max( - sentenceBreak, - questionBreak, - exclamationBreak, - ); - - // Only use sentence break if it's not too far back (at least 50% of chunk size) - if (lastSentenceBreak > start + CHUNK_SIZE * 0.5) { - chunkEnd = lastSentenceBreak + 2; // Include the punctuation and space - } - } - - const chunkText = fullText.slice(start, chunkEnd).trim(); - if (chunkText) { - // Find which blocks contribute to this chunk - const contributingBlockIds = new Set(); - - for (const block of blocksWithText) { - // Check if this block's text overlaps with the current chunk - const blockOverlapsChunk = - block.startPosition < chunkEnd && block.endPosition > start; - - if (blockOverlapsChunk) { - contributingBlockIds.add(block.id); - } - } - - chunks.push({ - blockIds: Array.from(contributingBlockIds), - text: chunkText, - }); - } - - // Move start position with overlap, but ensure we make progress - start = Math.max(start + 1, chunkEnd - CHUNK_OVERLAP); - - // Prevent infinite loop - if (start >= fullText.length) { - break; - } - } - - return chunks; -} - -/** - * Recursively collects all child block IDs for a given block - */ -async function collectChildBlockIds(blockId: string): Promise { - const block = await db - .select({ children: Block.children }) - .from(Block) - .where(eq(Block.id, blockId)) - .limit(1); - - if (!block[0]) return []; - - const childIds = (block[0].children as string[]) || []; - const allChildIds = [...childIds]; - - // Recursively collect children of children - for (const childId of childIds) { - const grandChildIds = await collectChildBlockIds(childId); - allChildIds.push(...grandChildIds); - } - - return allChildIds; -} - -/** - * Extracts text content from block content structure - */ -function extractTextFromBlock(content: unknown): string { - if (!content || !Array.isArray(content)) { - return ""; - } - - return content - .map((item: unknown) => { - if (typeof item === "string") { - return item; - } - if ( - item && - typeof item === "object" && - "text" in item && - typeof item.text === "string" - ) { - return item.text; - } - return ""; - }) - .filter(Boolean) - .join(" "); -} - -export const POST = handler(zPage, async (payload) => { - if (payload.type === "INSERT" || payload.type === "UPDATE") { - const pageId = payload.record.id; - const userId = payload.record.user_id; - - // Get all direct child blocks of the page - const pageChildren = (payload.record.children as string[]) || []; - - if (pageChildren.length === 0) { - console.debug("Skipping embedding for page with no blocks", pageId); - return NextResponse.json({ success: true }); - } - - // Collect all nested child block IDs - const allBlockIds: string[] = [...pageChildren]; - for (const childId of pageChildren) { - const nestedChildIds = await collectChildBlockIds(childId); - allBlockIds.push(...nestedChildIds); - } - - // Fetch all blocks for the page - const blocks = await db - .select() - .from(Block) - .where(and(inArray(Block.id, allBlockIds), eq(Block.created_by, userId))); - - // Extract text content from all blocks and track positions - const blocksWithText: BlockWithText[] = []; - let currentPosition = 0; - - for (const block of blocks) { - const blockText = extractTextFromBlock(block.content); - if (blockText) { - const startPosition = currentPosition; - const endPosition = currentPosition + blockText.length + 1; // +1 for newline - - blocksWithText.push({ - endPosition, - id: block.id, - startPosition, - text: blockText, - }); - - currentPosition = endPosition; - } - } - - // Skip embedding if there's no meaningful text content - if (blocksWithText.length === 0) { - console.debug("Skipping embedding for page with no text content", pageId); - return NextResponse.json({ success: true }); - } - - // Create combined text for hashing - const allText = blocksWithText.map((b) => b.text).join("\n"); - - // Create a SHA-256 hash of the combined text content for change detection - const pageTextHash = crypto - .createHash("sha256") - .update(allText, "utf8") - .digest("hex"); - - // Check if we already have embeddings for this content - const existingEmbeddings = await db - .select({ id: PageEmbedding.id }) - .from(PageEmbedding) - .where( - and( - eq(PageEmbedding.page_id, pageId), - eq(PageEmbedding.page_text_hash, pageTextHash), - ), - ) - .limit(1); - - if (existingEmbeddings.length > 0) { - console.debug( - "Page content unchanged, skipping embedding update", - pageId, - ); - return NextResponse.json({ success: true }); - } - - // Delete existing embeddings for this page (content has changed) - await db.delete(PageEmbedding).where(eq(PageEmbedding.page_id, pageId)); - - // Chunk the text content with block metadata - const textChunks = chunkTextWithBlockMetadata(blocksWithText); - - // Create embeddings for each chunk - const embeddingPromises = textChunks.map(async (chunk, index) => { - const { embedding } = await embed({ - maxRetries: 5, - model: openai.embedding("text-embedding-3-small"), - value: chunk.text, - }); - - // Create metadata for this chunk - const metadata: ChunkMetadata = { - block_ids: chunk.blockIds, - chunk_overlap: CHUNK_OVERLAP, - chunk_size: chunk.text.length, - total_blocks_in_page: blocksWithText.length, - }; - - return { - chunk_index: index, - chunk_text: chunk.text, - embedding, - metadata, - page_id: pageId, - page_text_hash: pageTextHash, - user_id: userId, - }; - }); - - // Wait for all embeddings to be generated - const embeddingData = await Promise.all(embeddingPromises); - - // Insert all embeddings in a single transaction - await db.insert(PageEmbedding).values(embeddingData); - - console.debug( - `Page embeddings stored for page ${pageId}: ${textChunks.length} chunks, ${allText.length} characters`, - ); - } - +export const POST = handler(zPage, async () => { + // TODO: Refactor the PageEmbedding logic. return NextResponse.json({ success: true }); }); diff --git a/apps/web/src/app/not-found.tsx b/apps/web/src/app/not-found.tsx index 6428dbb..d4f0184 100644 --- a/apps/web/src/app/not-found.tsx +++ b/apps/web/src/app/not-found.tsx @@ -1,60 +1,26 @@ "use client"; -import { BlockNoteView } from "@blocknote/mantine"; -import { useCreateBlockNote } from "@blocknote/react"; import Link from "next/link"; import { Button } from "~/components/ui/button"; import "./styles/blocknote.css"; +import { Suspense } from "react"; +import { DynamicNotFoundEditor } from "./_components/not-found-editor.dynamic"; export default function NotFound() { - // Creates a new editor instance. - const editor = useCreateBlockNote({ - initialContent: [ - { - content: "Journl #404", - type: "heading", - }, - { - content: "Page not found.", - type: "paragraph", - }, - { - content: "The path was linked, but the note was not.", - type: "bulletListItem", - }, - { - content: "Will investigate later... or probably forget.", - type: "bulletListItem", - }, - { - children: [ - { - content: "window.location.href = '/';", - props: { language: "json" }, - type: "codeBlock", - }, - ], - content: "This should help users find their way back...", - type: "bulletListItem", - }, - { - type: "paragraph", - }, - ], - }); - return (
- - - - -
- No account? - {" "} -
+ + + + + +
+ No account? + {" "} +
+
); } diff --git a/apps/web/src/hooks/block-hooks.ts b/apps/web/src/hooks/block-hooks.ts deleted file mode 100644 index 54a8f0d..0000000 --- a/apps/web/src/hooks/block-hooks.ts +++ /dev/null @@ -1,161 +0,0 @@ -import type { Block, BlockWithChildren } from "@acme/db/schema"; -import { useMemo, useRef } from "react"; - -/** - * Hook to convert flat blocks from the database into a nested structure with optimized incremental processing - * @param combinedBlocks - Flat array of blocks from the database - * @returns Nested array of blocks with proper parent-child relationships - */ -export function useNestedBlocks(combinedBlocks: Block[]): BlockWithChildren[] { - // Refs to store previous computation results for memoization - const previousBlocksRef = useRef([]); - const previousResultRef = useRef([]); - const blockMapRef = useRef>(new Map()); - const childBlockIdsRef = useRef>(new Set()); - - return useMemo(() => { - if (combinedBlocks.length === 0) { - // Reset refs when no blocks - previousBlocksRef.current = []; - previousResultRef.current = []; - blockMapRef.current.clear(); - childBlockIdsRef.current.clear(); - return []; - } - - const previousBlocks = previousBlocksRef.current; - const previousBlockMap = blockMapRef.current; - - // Check if this is just adding new blocks to existing ones (incremental loading) - const isIncrementalUpdate = - previousBlocks.length > 0 && - combinedBlocks.length > previousBlocks.length && - combinedBlocks - .slice(0, previousBlocks.length) - .every((block, index) => previousBlocks[index]?.id === block.id); - - if (isIncrementalUpdate) { - // Incremental processing: only add new blocks, then rebuild all relationships - const newBlocks = combinedBlocks.slice(previousBlocks.length); - - // Add new blocks to the existing block map (reset their children arrays) - for (const block of newBlocks) { - previousBlockMap.set(block.id, { - ...block, - children: [] as BlockWithChildren[], - }); - } - - // Now rebuild ALL parent-child relationships from scratch using all blocks - // This ensures children from new chunks connect to parents from previous chunks - - // First, reset all children arrays - for (const [_, blockWithChildren] of previousBlockMap.entries()) { - blockWithChildren.children = []; - } - - // Rebuild child block IDs set from all blocks - const allChildBlockIds = new Set(); - for (const block of combinedBlocks) { - if (Array.isArray(block.children)) { - for (const childId of block.children) { - if (typeof childId === "string") { - allChildBlockIds.add(childId); - } - } - } - } - - // Rebuild all parent-child relationships - for (const block of combinedBlocks) { - const blockWithChildren = previousBlockMap.get(block.id); - if (!blockWithChildren) continue; - - // If this block has children, find them and nest them - if (Array.isArray(block.children) && block.children.length > 0) { - const childrenIds = block.children.filter( - (id): id is string => typeof id === "string" && id.length > 0, - ); - - for (const childId of childrenIds) { - const childBlock = previousBlockMap.get(childId); - if (childBlock) { - blockWithChildren.children.push(childBlock); - } - } - } - } - - // Find root blocks (blocks that are not children of others) - const rootBlocks: BlockWithChildren[] = []; - for (const [blockId, blockWithChildren] of previousBlockMap.entries()) { - if (!allChildBlockIds.has(blockId)) { - rootBlocks.push(blockWithChildren); - } - } - - // Update refs for next iteration - previousBlocksRef.current = combinedBlocks; - previousResultRef.current = rootBlocks; - childBlockIdsRef.current = allChildBlockIds; - - return rootBlocks; - } else { - // Full recalculation: either first load or blocks changed significantly - const blockMap = new Map( - combinedBlocks.map((block) => [ - block.id, - { ...block, children: [] as BlockWithChildren[] }, - ]), - ); - - // Build the nested structure - const rootBlocks: BlockWithChildren[] = []; - - // First, identify which blocks are referenced as children by other blocks - const childBlockIds = new Set(); - for (const block of combinedBlocks) { - if (Array.isArray(block.children)) { - for (const childId of block.children) { - if (typeof childId === "string") { - childBlockIds.add(childId); - } - } - } - } - - // Process each block to build parent-child relationships - for (const block of combinedBlocks) { - const blockWithChildren = blockMap.get(block.id); - if (!blockWithChildren) continue; - - // If this block has children, find them and nest them (only if child blocks are available) - if (Array.isArray(block.children) && block.children.length > 0) { - const childrenIds = block.children.filter( - (id): id is string => typeof id === "string" && id.length > 0, - ); - - for (const childId of childrenIds) { - const childBlock = blockMap.get(childId); - if (childBlock) { - blockWithChildren.children.push(childBlock); - } - } - } - - // If this block is not a child of any other block, it's a root block - if (!childBlockIds.has(block.id)) { - rootBlocks.push(blockWithChildren); - } - } - - // Update refs for next iteration - previousBlocksRef.current = combinedBlocks; - previousResultRef.current = rootBlocks; - blockMapRef.current = blockMap; - childBlockIdsRef.current = childBlockIds; - - return rootBlocks; - } - }, [combinedBlocks]); -} diff --git a/package.json b/package.json index 5232ff7..3a77215 100644 --- a/package.json +++ b/package.json @@ -1,30 +1,31 @@ { - "name": "acme", - "version": "0.0.0", - "private": true, - "type": "module", - "license": "MIT", - "engines": { - "node": ">=22.14.0", - "pnpm": ">=9.6.0" - }, - "packageManager": "pnpm@10.12.4", - "scripts": { - "auth:generate": "pnpm -F @acme/auth generate", - "build": "turbo run build", - "check": "turbo run check --continue -- --write --unsafe", - "clean": "turbo run clean && git clean -xdf .turbo node_modules", - "db:push": "turbo -F @acme/db push", - "db:studio": "turbo -F @acme/db studio", - "dependencies": "pnpm dlx sherif@latest --fix", - "dev:next": "turbo watch dev -F @acme/nextjs...", - "dev": "turbo watch dev --continue", - "postinstall": "pnpm dependencies", - "shadcn": "turbo run shadcn", - "typecheck": "turbo run typecheck" - }, - "devDependencies": { - "@turbo/gen": "^2.5.4", - "turbo": "^2.5.4" - } + "name": "acme", + "version": "0.0.0", + "private": true, + "type": "module", + "license": "MIT", + "engines": { + "node": ">=22.14.0", + "pnpm": ">=9.6.0" + }, + "packageManager": "pnpm@10.12.4", + "scripts": { + "auth:generate": "pnpm -F @acme/auth generate", + "build": "turbo run build", + "check": "turbo run check --continue -- --write --unsafe", + "clean": "turbo run clean && git clean -xdf .turbo node_modules", + "db:push": "turbo -F @acme/db push", + "db:studio": "turbo -F @acme/db studio", + "dependencies": "pnpm dlx sherif@latest --fix", + "dependencies:check": "pnpm dlx sherif@latest", + "dev:next": "turbo watch dev -F @acme/nextjs...", + "dev": "turbo watch dev --continue", + "postinstall": "pnpm dependencies:check", + "shadcn": "turbo run shadcn", + "typecheck": "turbo run typecheck" + }, + "devDependencies": { + "@turbo/gen": "^2.5.4", + "turbo": "^2.5.4" + } } diff --git a/turbo.json b/turbo.json index 436da6c..6c2ff97 100644 --- a/turbo.json +++ b/turbo.json @@ -47,8 +47,7 @@ "dependsOn": ["^topo"] }, "typecheck": { - "dependsOn": ["^topo", "^build"], - "outputs": [".cache/tsbuildinfo.json"] + "dependsOn": ["^topo", "^build"] } }, "ui": "tui"