Merge pull request #371 from harmony-one/message-sliding-window

fegloff · web-flow · commit 6b075d9d385a · 2024-11-04T12:33:14.000-05:00
add sliding window logic + add timestamp field
diff --git a/src/bot.ts b/src/bot.ts
@@ -509,6 +509,19 @@ const logErrorHandler = (ex: any): void => {
   logger.error(ex)
 }
 
+// bot.command('testcleanup', async (ctx) => {
+//   await openAiBot.testCleanup(ctx as OnMessageContext)
+// })
+
+bot.command('new', async (ctx) => {
+  writeCommandLog(ctx as OnMessageContext).catch(logErrorHandler)
+  await openAiBot.onStop(ctx as OnMessageContext)
+  return await ctx.reply('Chat history reseted', {
+    parse_mode: 'Markdown',
+    message_thread_id: ctx.message?.message_thread_id
+  })
+})
+
 bot.command('more', async (ctx) => {
   writeCommandLog(ctx as OnMessageContext).catch(logErrorHandler)
   return await ctx.reply(commandsHelpText.more, {
diff --git a/src/helpers.ts b/src/helpers.ts
@@ -1,4 +1,5 @@
 import config from './config'
+import { conversationManager } from './modules/llms/utils/conversationManager'
 import { LlmModelsEnum } from './modules/llms/utils/llmModelsManager'
 import { type DalleImageSize } from './modules/llms/utils/types'
 import { type BotSessionData } from './modules/types'
@@ -26,7 +27,8 @@ export function createInitialSessionData (): BotSessionData {
       price: 0,
       usage: 0,
       isProcessingQueue: false,
-      requestQueue: []
+      requestQueue: [],
+      cleanupState: conversationManager.initializeCleanupTimes()
     },
     chatGpt: {
       model: config.llms.model,
@@ -36,7 +38,8 @@ export function createInitialSessionData (): BotSessionData {
       price: 0,
       usage: 0,
       isProcessingQueue: false,
-      requestQueue: []
+      requestQueue: [],
+      cleanupState: conversationManager.initializeCleanupTimes()
     },
     dalle: {
       numImages: config.openAi.dalle.sessionDefault.numImages,
diff --git a/src/modules/llms/api/athropic.ts b/src/modules/llms/api/athropic.ts
@@ -49,7 +49,8 @@ export const anthropicCompletion = async (
       completion: {
         content: completion[0].text,
         role: 'assistant',
-        model
+        model,
+        timestamp: Date.now()
       },
       usage: totalOutputTokens + totalInputTokens,
       price: 0,
@@ -92,7 +93,8 @@ export const xaiCompletion = async (
       completion: {
         content: completion[0].text,
         role: 'assistant',
-        model
+        model,
+        timestamp: Date.now()
       },
       usage: totalOutputTokens + totalInputTokens,
       price: 0,
@@ -202,7 +204,8 @@ export const anthropicStreamCompletion = async (
     completion: {
       content: completion,
       role: 'assistant',
-      model
+      model,
+      timestamp: Date.now()
     },
     usage: parseInt(totalOutputTokens, 10) + parseInt(totalInputTokens, 10),
     price: 0,
@@ -252,7 +255,8 @@ export const toolsChatCompletion = async (
         completion: {
           content: completion[0].text,
           role: 'assistant',
-          model
+          model,
+          timestamp: Date.now()
         },
         usage: totalOutputTokens + totalInputTokens,
         price: 0,
@@ -264,7 +268,8 @@ export const toolsChatCompletion = async (
         completion: {
           content: 'Timeout error',
           role: 'assistant',
-          model
+          model,
+          timestamp: Date.now()
         },
         usage: 0,
         price: 0
diff --git a/src/modules/llms/api/llmApi.ts b/src/modules/llms/api/llmApi.ts
@@ -1,6 +1,6 @@
 import axios from 'axios'
 import config from '../../../config'
-import { type ChatConversation } from '../../types'
+import { type ChatConversationWithoutTimestamp, type ChatConversation } from '../../types'
 import pino from 'pino'
 import { type ChatModel } from '../utils/types'
 import { headers } from './helper'
@@ -36,7 +36,7 @@ interface LlmAddUrlDocument {
 interface QueryUrlDocument {
   collectioName: string
   prompt: string
-  conversation?: ChatConversation[]
+  conversation?: ChatConversationWithoutTimestamp[]
 }
 
 export const getChatModel = (modelName: string): ChatModel | undefined => {
@@ -130,7 +130,8 @@ export const llmCompletion = async (
       completion: {
         content: completion[0].message?.content,
         role: 'system',
-        model
+        model,
+        timestamp: Date.now()
       },
       usage: totalOutputTokens + totalInputTokens,
       price: 0
diff --git a/src/modules/llms/api/openai.ts b/src/modules/llms/api/openai.ts
@@ -79,7 +79,9 @@ export async function alterGeneratedImg (
   }
 }
 
-const prepareConversation = (conversation: ChatConversation[], model: string): ChatConversation[] => {
+type ConversationOutput = Omit<ChatConversation, 'timestamp' | 'model' | 'id' | 'author' | 'numSubAgents'>
+
+const prepareConversation = (conversation: ChatConversation[], model: string): ConversationOutput[] => {
   const messages = conversation.filter(c => c.model === model).map(m => { return { content: m.content, role: m.role } })
   if (messages.length !== 1 || model === LlmModelsEnum.O1) {
     return messages
@@ -125,7 +127,8 @@ export async function chatCompletion (
   return {
     completion: {
       content: response.choices[0].message?.content ?? 'Error - no completion available',
-      role: 'assistant'
+      role: 'assistant',
+      timestamp: Date.now()
     },
     usage: response.usage?.total_tokens, // 2010
     price: price * config.openAi.chatGpt.priceAdjustment,
@@ -215,7 +218,8 @@ export const streamChatCompletion = async (
   return {
     completion: {
       content: completion,
-      role: 'assistant'
+      role: 'assistant',
+      timestamp: Date.now()
     },
     usage: outputTokens + inputTokens,
     price: 0,
@@ -308,7 +312,8 @@ export const streamChatVisionCompletion = async (
   return {
     completion: {
       content: completion,
-      role: 'assistant'
+      role: 'assistant',
+      timestamp: Date.now()
     },
     usage: outputTokens + inputTokens,
     price: 0,
@@ -319,7 +324,7 @@ export const streamChatVisionCompletion = async (
 
 export async function improvePrompt (promptText: string, model: string): Promise<string> {
   const prompt = `Improve this picture description using max 100 words and don't add additional text to the image: ${promptText} `
-  const conversation = [{ role: 'user', content: prompt }]
+  const conversation = [{ role: 'user', content: prompt, timestamp: Date.now() }]
   const response = await chatCompletion(conversation, model)
   return response.completion?.content as string ?? ''
 }
diff --git a/src/modules/llms/api/pdfHandler.ts b/src/modules/llms/api/pdfHandler.ts
@@ -19,7 +19,8 @@ export const handlePdf = async (prompt: string): Promise<PdfCompletion> => {
       return {
         completion: {
           content: response.data.response,
-          role: 'system'
+          role: 'system',
+          timestamp: Date.now()
         },
         prompt,
         price: response.data.cost
diff --git a/src/modules/llms/api/vertex.ts b/src/modules/llms/api/vertex.ts
@@ -1,6 +1,6 @@
 import axios, { type AxiosResponse } from 'axios'
 import config from '../../../config'
-import { type OnMessageContext, type ChatConversation, type OnCallBackQueryData } from '../../types'
+import { type OnMessageContext, type ChatConversation, type OnCallBackQueryData, type ChatConversationWithoutTimestamp } from '../../types'
 import { type LlmCompletion } from './llmApi'
 import { type Readable } from 'stream'
 import { GrammyError } from 'grammy'
@@ -29,7 +29,7 @@ export const vertexCompletion = async (
     stream: false,
     messages: conversation.filter(c => c.model === model)
       .map((msg) => {
-        const msgFiltered: ChatConversation = { content: msg.content, model: msg.model }
+        const msgFiltered: ChatConversationWithoutTimestamp = { content: msg.content, model: msg.model }
         if (model === LlmModelsEnum.CHAT_BISON) {
           msgFiltered.author = msg.role
         } else {
@@ -48,7 +48,8 @@ export const vertexCompletion = async (
       completion: {
         content: response.data._prediction_response[0][0].candidates[0].content,
         role: 'bot', // role replace to author attribute will be done later
-        model
+        model,
+        timestamp: Date.now()
       },
       usage: totalOutputTokens + totalInputTokens,
       price: 0
@@ -145,7 +146,8 @@ export const vertexStreamCompletion = async (
     completion: {
       content: completion,
       role: 'assistant',
-      model
+      model,
+      timestamp: Date.now()
     },
     usage: parseInt(totalOutputTokens, 10) + parseInt(totalInputTokens, 10),
     price: 0,
diff --git a/src/modules/llms/llmsBase.ts b/src/modules/llms/llmsBase.ts
@@ -37,6 +37,7 @@ import {
   type LLMModelsManager,
   type ModelVersion
 } from './utils/llmModelsManager'
+import { conversationManager } from './utils/conversationManager'
 
 export abstract class LlmsBase implements PayableBot {
   public module: string
@@ -205,7 +206,8 @@ export abstract class LlmsBase implements PayableBot {
           id: ctx.message?.message_id,
           model,
           content: await preparePrompt(ctx, prompt as string),
-          numSubAgents: 0
+          numSubAgents: 0,
+          timestamp: Date.now()
         })
         if (!session.isProcessingQueue) {
           session.isProcessingQueue = true
@@ -218,7 +220,8 @@ export abstract class LlmsBase implements PayableBot {
           id: ctx.message?.message_id ?? ctx.message?.message_thread_id ?? 0,
           model,
           content: prompt as string ?? '', // await preparePrompt(ctx, prompt as string),
-          numSubAgents: supportedAgents
+          numSubAgents: supportedAgents,
+          timestamp: Date.now()
         }
         await this.runSubagents(ctx, msg, stream, usesTools) //  prompt as string)
       }
@@ -230,6 +233,7 @@ export abstract class LlmsBase implements PayableBot {
 
   async onChatRequestHandler (ctx: OnMessageContext | OnCallBackQueryData, stream: boolean, usesTools: boolean): Promise<void> {
     const session = this.getSession(ctx)
+    session.chatConversation = conversationManager.manageConversationWindow(session.chatConversation, ctx, this.sessionDataKey)
     while (session.requestQueue.length > 0) {
       try {
         const msg = session.requestQueue.shift()
@@ -272,7 +276,8 @@ export abstract class LlmsBase implements PayableBot {
           const chat: ChatConversation = {
             content: enhancedPrompt || prompt,
             role: 'user',
-            model: modelVersion
+            model: modelVersion,
+            timestamp: Date.now()
           }
           chatConversation.push(chat)
           const payload = {
@@ -358,7 +363,8 @@ export abstract class LlmsBase implements PayableBot {
           conversation.push({
             role: 'assistant',
             content: completion.completion?.content ?? '',
-            model
+            model,
+            timestamp: Date.now()
           })
           return {
             price: price.price,
@@ -371,7 +377,8 @@ export abstract class LlmsBase implements PayableBot {
         conversation.push({
           role: 'assistant',
           content: response.completion?.content ?? '',
-          model
+          model,
+          timestamp: Date.now()
         })
         return {
           price: response.price,
@@ -470,6 +477,49 @@ export abstract class LlmsBase implements PayableBot {
     session.price = 0
   }
 
+  async testCleanup (ctx: OnMessageContext | OnCallBackQueryData): Promise<void> {
+    const session = this.getSession(ctx)
+    // Force cleanup times for testing
+    const now = new Date()
+    const forcedCleanupTime = new Date(now)
+    forcedCleanupTime.setHours(2, 59, 0, 0) // Set to 2:59 AM
+    session.cleanupState = {
+      nextCleanupTime: forcedCleanupTime.getTime() + (60 * 1000), // 3 AM
+      lastCleanupTime: forcedCleanupTime.getTime() - (24 * 60 * 60 * 1000) // Yesterday 2:59 AM
+    }
+    console.log('Testing cleanup with forced times:', {
+      nextCleanup: new Date(session.cleanupState.nextCleanupTime).toLocaleString(),
+      lastCleanup: new Date(session.cleanupState.lastCleanupTime).toLocaleString(),
+      currentTime: now.toLocaleString()
+    })
+    // Add some test messages with various timestamps
+    if (session.chatConversation.length === 0) {
+      const yesterday = new Date(now)
+      yesterday.setDate(yesterday.getDate() - 1)
+      session.chatConversation = [
+        {
+          role: 'user',
+          content: 'Message from 2 days ago',
+          model: 'test',
+          timestamp: yesterday.getTime() - (24 * 60 * 60 * 1000)
+        },
+        {
+          role: 'assistant',
+          content: 'Message from yesterday',
+          model: 'test',
+          timestamp: yesterday.getTime()
+        },
+        {
+          role: 'user',
+          content: 'Message from today',
+          model: 'test',
+          timestamp: now.getTime()
+        }
+      ]
+    }
+    await this.onChatRequestHandler(ctx, false, false)
+  }
+
   async onError (
     ctx: OnMessageContext | OnCallBackQueryData,
     e: any,
diff --git a/src/modules/llms/utils/conversationManager.ts b/src/modules/llms/utils/conversationManager.ts
diff --git a/src/modules/subagents/llamaSubagent.ts b/src/modules/subagents/llamaSubagent.ts
diff --git a/src/modules/types.ts b/src/modules/types.ts
diff --git a/src/modules/voice-to-voice-gpt/index.ts b/src/modules/voice-to-voice-gpt/index.ts