chore: langgraphjs + local model

uptonking · uptonking · commit 7c0a03cb6ded · 2025-09-11T05:09:33.000+08:00
diff --git a/README.md b/README.md
@@ -24,7 +24,7 @@ npm i
 # Option 1: use local llm, configure the `baseURL` in code then run
 npx tsx ./langchain/chain-groq1-chat-local-mini.ts
 
-# Option 2: use groq api, configure the `groq_api_key` first
+# Option 2: use groq api, configure the `GROQ_API_KEY` first
 cp .env.example .env
 npx tsx ./server/chain-groq1-starter.ts
 ```
diff --git a/langchain/chain-eg-chatbot1-manual-history.ts b/langchain/chain-eg-chatbot1-manual-history.ts
@@ -0,0 +1,25 @@
+import { ChatOpenAI } from '@langchain/openai';
+
+const llm = new ChatOpenAI({
+  model: 'qwen/qwen3-4b-2507',
+  configuration: {
+    baseURL: 'http://localhost:1234/v1',
+    apiKey: 'not-needed',
+  },
+  temperature: 0,
+});
+
+const res = await llm.invoke([{ role: 'user', content: "hello, I'm York." }]);
+console.log(res);
+
+// 👀 The model on its own does not have any concept of state. response: I don't know who you are
+// const res2 = await llm.invoke([{ role: "user", content: "What's my name?" }]);
+// console.log(res2);
+
+// ✅ To get around this, we need to pass the entire conversation history into the model.
+const res3 = await llm.invoke([
+  { role: 'user', content: "Hello, I'm York" },
+  { role: 'assistant', content: 'Hello York! How can I assist you today?' },
+  { role: 'user', content: "What's my name?" },
+]);
+console.log(res3);
diff --git a/langchain/chain-eg-chatbot2-memory.ts b/langchain/chain-eg-chatbot2-memory.ts
@@ -0,0 +1,72 @@
+import { ChatOpenAI } from '@langchain/openai';
+
+import {
+  START,
+  END,
+  MessagesAnnotation,
+  StateGraph,
+  MemorySaver,
+} from '@langchain/langgraph';
+
+import { ulid } from 'ulid';
+
+const llm = new ChatOpenAI({
+  model: 'qwen/qwen3-4b-2507',
+  configuration: {
+    baseURL: 'http://localhost:1234/v1',
+    apiKey: 'not-needed',
+  },
+  temperature: 0,
+});
+
+// Define the function that calls the model
+const callModel = async (state: typeof MessagesAnnotation.State) => {
+  const response = await llm.invoke(state.messages);
+  return { messages: response };
+};
+
+// Define a new graph
+const workflow = new StateGraph(MessagesAnnotation)
+  // Define the node and edge
+  .addNode('model', callModel)
+  .addEdge(START, 'model')
+  .addEdge('model', END);
+
+// Add memory
+const memory = new MemorySaver();
+const app = workflow.compile({ checkpointer: memory });
+
+const config = { configurable: { thread_id: ulid() } };
+
+const input = [
+  {
+    role: 'user',
+    content: "Hi! I'm York.",
+  },
+];
+// The `output` contains all messages in the state.
+const output = await app.invoke({ messages: input }, config);
+
+console.log('\n👾');
+console.log(output.messages[output.messages.length - 1]);
+
+const input2 = [
+  {
+    role: 'user',
+    content: "What's my name?",
+  },
+];
+const output2 = await app.invoke({ messages: input2 }, config);
+console.log('\n👾');
+console.log(output2.messages[output2.messages.length - 1]);
+
+const config2 = { configurable: { thread_id: ulid() } };
+const input3 = [
+  {
+    role: 'user',
+    content: "What's my name?",
+  },
+];
+const output3 = await app.invoke({ messages: input3 }, config2);
+console.log('\n👾');
+console.log(output3.messages[output3.messages.length - 1]);
diff --git a/langchain/chain-groq1-chat-local-test.ts b/langchain/chain-groq1-chat-local-test.ts
@@ -4,7 +4,7 @@ import { HumanMessage } from '@langchain/core/messages';
 
 import { initChatModel } from 'langchain/chat_models/universal';
 
-// ❌ not working
+// ❌ not working with local model
 const model = await initChatModel('qwen/qwen3-4b-2507', {
   modelProvider: 'openai',
   baseUrl: 'http://localhost:1234/v1',
diff --git a/langchain/chain-groq2-structured-output.ts b/langchain/chain-groq2-structured-output.ts
@@ -1,42 +1,57 @@
-import '@dotenvx/dotenvx/config';
+// import '@dotenvx/dotenvx/config';
 
 import { z } from 'zod';
 
-import { HumanMessage, SystemMessage } from '@langchain/core/messages';
 import { ChatGroq } from '@langchain/groq';
+import { ChatOpenAI } from '@langchain/openai';
 
-const computerTopic = z.object({
-  // syntax: z.string().describe("The syntax"),
-  briefDescription: z.string().describe('Brief description'),
-  usageDetails: z.string().optional().describe('Usage details or examples'),
+// const model = new ChatGroq({
+//   model: 'meta-llama/llama-4-scout-17b-16e-instruct',
+//   temperature: 0,
+// });
+const model = new ChatOpenAI({
+  // model: 'qwen/qwen3-4b-2507',
+  model: 'google/gemma-3-12b',
+  configuration: {
+    baseURL: 'http://localhost:1234/v1',
+    apiKey: 'not-needed',
+  },
+  temperature: 0.5,
 });
 
-const model = new ChatGroq({
-  model: 'meta-llama/llama-4-scout-17b-16e-instruct',
-  temperature: 0,
+const phoneDevice = z.object({
+  name: z.string().describe('Device name'),
+  description: z.string().describe('Brief description'),
+  details: z.string().describe('Device details or use cases'),
 });
 
-// 💡 we can pass a name for our schema in order to give the model additional context as to what our schema represents
-// const structuredLlm = model.withStructuredOutput(computerTopic, { name: 'computerTopic' });
-// const res = await structuredLlm.invoke("introduce sort algorithms");
-
-// 💡 We can also pass in an OpenAI-style JSON schema dict if you prefer not to use Zod
-const structuredLlm = model.withStructuredOutput({
-  name: 'computerTopic',
-  descripttion: 'knowledge about computer',
-  parameters: {
-    title: 'computerTopic',
-    type: 'object',
-    properties: {
-      briefDescription: { type: 'string', description: 'Brief description' },
-      details: { type: 'string', description: 'Usage details or examples' },
-    },
-    required: ['briefDescription', 'details'],
-  },
-});
-const res = await structuredLlm.invoke('introduce sort algorithms', {
-  // @ts-expect-error llm-topic
-  name: 'computerTopic',
+// 💡 Option 1: we can pass a name for our schema in order to give the model additional context as to what our schema represents
+const structuredLlm = model.withStructuredOutput(phoneDevice, {
+  name: 'phoneDevice',
 });
+const res = await structuredLlm.invoke(
+  'give a brief intro to a popular mobile phone',
+);
+
+// 💡 Option 2: We can also pass in an OpenAI-style JSON schema dict if you prefer not to use Zod
+// 👀 大多数本地模型不支持类似下面json schema的方式，但线上模型支持
+// const structuredLlm = model.withStructuredOutput({
+//   name: 'phoneDevice',
+//   descripttion: 'cellphone device intro',
+//   parameters: {
+//     name: 'phoneDevice',
+//     type: 'object',
+//     properties: {
+//       name: { type: 'string', description: 'Device name' },
+//       description: { type: 'string', description: 'Brief description to device' },
+//       details: { type: 'string', description: 'Device details or use cases' },
+//     },
+//     required: ['name', 'description'],
+//   },
+// });
+// const res = await structuredLlm.invoke('give a brief intro to a popular mobile phone', {
+//   // @ts-expect-error llm-topic
+//   name: 'phoneDevice',
+// });
 
 console.log(res);
diff --git a/langchain/chain-groq3-tool-call.ts b/langchain/chain-groq3-tool-call.ts
@@ -1,8 +1,9 @@
-import '@dotenvx/dotenvx/config';
+// import '@dotenvx/dotenvx/config';
 
 import { HumanMessage, SystemMessage } from '@langchain/core/messages';
 import { tool } from '@langchain/core/tools';
 import { ChatGroq } from '@langchain/groq';
+import { ChatOpenAI } from '@langchain/openai';
 import { z } from 'zod';
 
 /**
@@ -39,11 +40,20 @@ const calculatorTool = tool(
   },
 );
 
-const llm = new ChatGroq({
-  model: 'meta-llama/llama-4-scout-17b-16e-instruct',
-  temperature: 0,
+// const llm = new ChatGroq({
+//   model: 'meta-llama/llama-4-scout-17b-16e-instruct',
+//   temperature: 0,
+// });
+const llm = new ChatOpenAI({
+  model: 'qwen/qwen3-4b-2507',
+  configuration: {
+    baseURL: 'http://localhost:1234/v1',
+    apiKey: 'not-needed',
+  },
+  temperature: 0.5,
 });
 
+// conversion from LangChain tool to our model provider’s specific format
 const llmWithTools = llm.bindTools([calculatorTool]);
 
 // const res = await llmWithTools.invoke("What is 11 * 22");
diff --git a/langgraph/graph-groq1-starter.ts b/langgraph/graph-groq1-starter.ts
@@ -1,8 +1,9 @@
-import '@dotenvx/dotenvx/config';
+// import '@dotenvx/dotenvx/config';
 
 import { tool } from '@langchain/core/tools';
 import { ChatGroq } from '@langchain/groq';
 import { createReactAgent } from '@langchain/langgraph/prebuilt';
+import { ChatOpenAI } from '@langchain/openai';
 
 import { z } from 'zod';
 
@@ -25,9 +26,18 @@ const search = tool(
   },
 );
 
-const model = new ChatGroq({
-  model: 'meta-llama/llama-4-scout-17b-16e-instruct',
-  temperature: 0,
+// const model = new ChatGroq({
+//   model: 'meta-llama/llama-4-scout-17b-16e-instruct',
+//   temperature: 0,
+// });
+
+const model = new ChatOpenAI({
+  model: 'qwen/qwen3-4b-2507',
+  configuration: {
+    baseURL: 'http://localhost:1234/v1',
+    apiKey: 'not-needed',
+  },
+  temperature: 0.5,
 });
 
 const agent = createReactAgent({
diff --git a/langgraph/graph-guide1-create-react-agent.ts b/langgraph/graph-guide1-create-react-agent.ts
@@ -4,28 +4,37 @@ import { HumanMessage } from '@langchain/core/messages';
 import { ChatGroq } from '@langchain/groq';
 import { MemorySaver } from '@langchain/langgraph';
 import { createReactAgent } from '@langchain/langgraph/prebuilt';
+import { ChatOpenAI } from '@langchain/openai';
 import { TavilySearch } from '@langchain/tavily';
 
-// import { TavilySearchResults } from "@langchain/community/tools/tavily_search";
-// const agentTools = [new TavilySearchResults({ maxResults: 2 })];
-// const agentModel = new ChatOpenAI({ temperature: 0 });
-
 const agentTools = [new TavilySearch({ maxResults: 2 })];
-const agentModel = new ChatGroq({
-  model: 'meta-llama/llama-4-scout-17b-16e-instruct',
+// const model = new ChatGroq({
+//   model: 'meta-llama/llama-4-scout-17b-16e-instruct',
+//   temperature: 0,
+// });
+// 👷👀 local qwen3-4b failed tool-call, local gemma3-12b succeeded
+const model = new ChatOpenAI({
+  // model: 'qwen/qwen3-4b-2507',
+  model: 'google/gemma-3-12b',
+  configuration: {
+    baseURL: 'http://localhost:1234/v1',
+    apiKey: 'not-needed',
+  },
   temperature: 0,
 });
 
 // Initialize memory to persist state between graph runs
 const agentCheckpointer = new MemorySaver();
 const agent = createReactAgent({
-  llm: agentModel,
+  llm: model,
   tools: agentTools,
   checkpointSaver: agentCheckpointer,
 });
 
 const agentFinalState = await agent.invoke(
-  { messages: [new HumanMessage('what is the current weather in guangzhou')] },
+  {
+    messages: [new HumanMessage('what is the current weather in guangzhou ?')],
+  },
   { configurable: { thread_id: '42' } },
 );
 
@@ -34,7 +43,7 @@ console.log(
 );
 
 const agentNextState = await agent.invoke(
-  { messages: [new HumanMessage('what about Beijing')] },
+  { messages: [new HumanMessage('what about Beijing ?')] },
   { configurable: { thread_id: '42' } },
 );
 
diff --git a/langgraph/graph-guide2-chat-local.ts b/langgraph/graph-guide2-chat-local.ts
@@ -0,0 +1,70 @@
+import '@dotenvx/dotenvx/config';
+
+import { AIMessage, HumanMessage } from '@langchain/core/messages';
+import {
+  MemorySaver,
+  MessagesAnnotation,
+  StateGraph,
+} from '@langchain/langgraph';
+import { createReactAgent, ToolNode } from '@langchain/langgraph/prebuilt';
+import { ChatOpenAI } from '@langchain/openai';
+import { TavilySearch } from '@langchain/tavily';
+
+const tools = [new TavilySearch({ maxResults: 2 })];
+// 👷👀 local qwen3-4b failed tool-call, local gemma3-12b succeeded
+const llm = new ChatOpenAI({
+  // model: 'qwen/qwen3-4b-2507',
+  model: 'google/gemma-3-12b',
+  configuration: {
+    baseURL: 'http://localhost:1234/v1',
+    apiKey: 'not-needed',
+  },
+  temperature: 0,
+});
+const model = llm.bindTools(tools);
+
+// Define the function that determines whether to call tools or not
+function shouldContinue({ messages }: typeof MessagesAnnotation.State) {
+  const lastMessage = messages[messages.length - 1] as AIMessage;
+
+  // If the LLM makes a tool call, then we route to the "tools" node
+  if (lastMessage.tool_calls?.length) {
+    return 'tools';
+  }
+  // Otherwise, we stop (reply to the user) using the special "__end__" node
+  return '__end__';
+}
+
+// Define the function that calls the model
+async function callModel(state: typeof MessagesAnnotation.State) {
+  const response = await model.invoke(state.messages);
+
+  // We return a list, because this will get added to the existing list
+  return { messages: [response] };
+}
+
+const toolNode = new ToolNode(tools);
+
+const workflow = new StateGraph(MessagesAnnotation)
+  .addNode('agent', callModel)
+  .addNode('tools', toolNode)
+  .addEdge('__start__', 'agent') // __start__ is a special name for the entrypoint
+  .addEdge('tools', 'agent')
+  .addConditionalEdges('agent', shouldContinue);
+
+const agentCheckpointer = new MemorySaver();
+const graph = workflow.compile();
+// 👀 不使用memory时, ai也能通过tool call查询beijing天气
+// const graph = workflow.compile({checkpointer: agentCheckpointer});
+
+const finalState = await graph.invoke({
+  messages: [new HumanMessage('what is the weather in guangzhou ?')],
+});
+console.log(finalState.messages[finalState.messages.length - 1].content);
+
+const nextState = await graph.invoke({
+  // Including the messages from the previous run gives the LLM context.
+  // This way it knows we're asking about the weather in NY
+  messages: [...finalState.messages, new HumanMessage('what about Beijing ?')],
+});
+console.log(nextState.messages[nextState.messages.length - 1].content);
diff --git a/langgraph/graph-rag-eg-agentic.ts b/langgraph/graph-rag-eg-agentic.ts
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json