|
| 1 | +import { CheerioWebBaseLoader } from '@langchain/community/document_loaders/web/cheerio'; |
| 2 | +import { AIMessage, BaseMessage, HumanMessage } from '@langchain/core/messages'; |
| 3 | +import { ChatPromptTemplate, PromptTemplate } from '@langchain/core/prompts'; |
| 4 | +import { Annotation, END, START, StateGraph } from '@langchain/langgraph'; |
| 5 | +import { ToolNode } from '@langchain/langgraph/prebuilt'; |
| 6 | +import { ChatOpenAI, OpenAIClient, OpenAIEmbeddings } from '@langchain/openai'; |
| 7 | +import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; |
| 8 | +import { pull } from 'langchain/hub'; |
| 9 | +import { createRetrieverTool } from 'langchain/tools/retriever'; |
| 10 | +import { MemoryVectorStore } from 'langchain/vectorstores/memory'; |
| 11 | +import { z } from 'zod'; |
| 12 | +import { Document } from '@langchain/core/documents'; |
| 13 | + |
| 14 | +// 🧑🏫 [Build a RAG App: Part 1](https://js.langchain.com/docs/tutorials/rag/) |
| 15 | + |
| 16 | +// const model = new ChatOpenAI({ |
| 17 | +// model: 'gpt-4o', |
| 18 | +// temperature: 0, |
| 19 | +// }); |
| 20 | +const model = new ChatOpenAI({ |
| 21 | + // model: 'qwen/qwen3-4b-2507', |
| 22 | + model: 'google/gemma-3-12b', |
| 23 | + configuration: { |
| 24 | + baseURL: 'http://localhost:1234/v1', |
| 25 | + apiKey: 'not-needed', |
| 26 | + }, |
| 27 | + temperature: 0, |
| 28 | +}); |
| 29 | + |
| 30 | +// const embeddings = new OpenAIEmbeddings({ |
| 31 | +// model: "text-embedding-qwen3-embedding-0.6b", |
| 32 | +// // model: 'text-embedding-embeddinggemma-300m', |
| 33 | +// configuration: { |
| 34 | +// baseURL: 'http://localhost:1234/v1', |
| 35 | +// // check: false, |
| 36 | +// apiKey: 'not-needed', |
| 37 | +// }, |
| 38 | +// }); |
| 39 | + |
| 40 | +const urls = [ |
| 41 | + 'https://dev.to/nyxtom/introduction-to-crdts-for-realtime-collaboration-2eb1', |
| 42 | + 'https://dev.to/foxgem/crdts-achieving-eventual-consistency-in-distributed-systems-296g', |
| 43 | + // "https://lilianweng.github.io/posts/2023-06-23-agent/", |
| 44 | +]; |
| 45 | + |
| 46 | +const docs = await Promise.all( |
| 47 | + urls.map((url) => |
| 48 | + new CheerioWebBaseLoader(url, { |
| 49 | + selector: '.crayons-layout__content', |
| 50 | + // selector: 'p' |
| 51 | + }).load(), |
| 52 | + ), |
| 53 | +); |
| 54 | +const docsList = docs.flat(); |
| 55 | + |
| 56 | +// const cheerioLoader = new CheerioWebBaseLoader( |
| 57 | +// "https://lilianweng.github.io/posts/2023-06-23-agent/", |
| 58 | +// { |
| 59 | +// selector: 'p' |
| 60 | +// } |
| 61 | +// ); |
| 62 | +// const docsList = await cheerioLoader.load(); |
| 63 | + |
| 64 | +console.log(';; docsList ', docsList[0].pageContent.length); |
| 65 | +// console.log(';; docsList ', docsList[0].pageContent.slice(0, 2200)) |
| 66 | + |
| 67 | +const textSplitter = new RecursiveCharacterTextSplitter({ |
| 68 | + chunkSize: 500, |
| 69 | + chunkOverlap: 50, |
| 70 | +}); |
| 71 | +const docSplits = await textSplitter.splitDocuments(docsList); |
| 72 | +console.log(';; docSplits ', docSplits.length); |
| 73 | +// console.log(';; docSplits ', docSplits.slice(0, 6)) |
| 74 | + |
| 75 | +// 🛢️ save embeddings to vectorDB |
| 76 | +// const vectorStore = new MemoryVectorStore(embeddings); |
| 77 | +// await vectorStore.addDocuments(docSplits) |
| 78 | +// const vectorStore = await MemoryVectorStore.fromDocuments( |
| 79 | +// docSplits, |
| 80 | +// embeddings |
| 81 | +// ); |
| 82 | +const openAiClient = new OpenAIClient({ |
| 83 | + apiKey: 'not-needed', |
| 84 | + baseURL: 'http://localhost:1234/v1', |
| 85 | +}); |
| 86 | + |
| 87 | +// Create a proper embeddings interface for OpenAIClient |
| 88 | +class OpenAIClientEmbeddings { |
| 89 | + constructor( |
| 90 | + private client: OpenAIClient, |
| 91 | + private model: string, |
| 92 | + ) {} |
| 93 | + |
| 94 | + async embedDocuments(texts: string[]): Promise<number[][]> { |
| 95 | + const response = await this.client.embeddings.create({ |
| 96 | + model: this.model, |
| 97 | + input: texts, |
| 98 | + encoding_format: 'float', |
| 99 | + }); |
| 100 | + return response.data.map((item) => item.embedding); |
| 101 | + } |
| 102 | + |
| 103 | + async embedQuery(text: string): Promise<number[]> { |
| 104 | + const embeddings = await this.embedDocuments([text]); |
| 105 | + return embeddings[0]; |
| 106 | + } |
| 107 | +} |
| 108 | + |
| 109 | +// Create embeddings instance and use fromDocuments |
| 110 | +const embeddingsInstance = new OpenAIClientEmbeddings( |
| 111 | + openAiClient, |
| 112 | + 'text-embedding-qwen3-embedding-0.6b', |
| 113 | +); |
| 114 | +// const embeddingsInstance = new OpenAIClientEmbeddings(openAiClient, 'text-embedding-embeddinggemma-300m'); |
| 115 | +// const embeddingsInstance = new OpenAIClientEmbeddings(openAiClient, 'text-embedding-granite-embedding-278m-multilingual'); |
| 116 | +const vectorStore = await MemoryVectorStore.fromDocuments( |
| 117 | + docSplits, |
| 118 | + embeddingsInstance, |
| 119 | +); |
| 120 | + |
| 121 | +// const retrievedDocs = await vectorStore.similaritySearch('yjs'); |
| 122 | +// console.log(';; retrievedDocs ', retrievedDocs.length) |
| 123 | +// console.log(';; retrievedDocs ', retrievedDocs) |
| 124 | + |
| 125 | +// Define state for application |
| 126 | +const StateAnnotation = Annotation.Root({ |
| 127 | + question: Annotation<string>, |
| 128 | + context: Annotation<Document[]>, |
| 129 | + answer: Annotation<string>, |
| 130 | +}); |
| 131 | + |
| 132 | +// only used for types |
| 133 | +const InputStateAnnotation = Annotation.Root({ |
| 134 | + question: Annotation<string>, |
| 135 | +}); |
| 136 | + |
| 137 | +// retrieve node |
| 138 | +const retrieve = async (state: typeof InputStateAnnotation.State) => { |
| 139 | + const retrievedDocs = await vectorStore.similaritySearch(state.question); |
| 140 | + console.log(';; retrievedDocs ', retrievedDocs.length); |
| 141 | + // console.log(';; retrievedDocs ', retrievedDocs) |
| 142 | + return { context: retrievedDocs }; |
| 143 | +}; |
| 144 | + |
| 145 | +const generate = async (state: typeof StateAnnotation.State) => { |
| 146 | + const docsContent = state.context.map((doc) => doc.pageContent).join('\n'); |
| 147 | + |
| 148 | + // Define prompt for question-answering |
| 149 | + // const promptTemplate = await pull<ChatPromptTemplate>("rlm/rag-prompt"); |
| 150 | + const promptTemplate = PromptTemplate.fromTemplate( |
| 151 | + `You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise. |
| 152 | + Question: {question} |
| 153 | + Context: {context} |
| 154 | + Answer: |
| 155 | + `, |
| 156 | + ); |
| 157 | + |
| 158 | + const messages = await promptTemplate.invoke({ |
| 159 | + question: state.question, |
| 160 | + context: docsContent, |
| 161 | + }); |
| 162 | + |
| 163 | + const response = await model.invoke(messages); |
| 164 | + return { answer: response.content }; |
| 165 | +}; |
| 166 | + |
| 167 | +// Compile application and test |
| 168 | +const graph = new StateGraph(StateAnnotation) |
| 169 | + .addNode('retrieve', retrieve) |
| 170 | + .addNode('generate', generate) |
| 171 | + .addEdge('__start__', 'retrieve') |
| 172 | + .addEdge('retrieve', 'generate') |
| 173 | + .addEdge('generate', '__end__') |
| 174 | + .compile(); |
| 175 | + |
| 176 | +// ------- |
| 177 | + |
| 178 | +let inputs = { question: 'What is CmRDTs ?' }; |
| 179 | +// let inputs = { question: "What is yjs ?" }; |
| 180 | + |
| 181 | +const result = await graph.invoke(inputs); |
| 182 | + |
| 183 | +console.log('\n👾'); |
| 184 | +console.log(result.answer); |
0 commit comments