add deepseek bot + fix deep seek streaming service

fegloff · fegloff · commit 1072c6f8a3fa · 2025-02-10T18:37:03.000-05:00
diff --git a/src/bot.ts b/src/bot.ts
@@ -60,6 +60,7 @@ import { llmModelManager } from './modules/llms/utils/llmModelsManager'
 import { HmnyBot } from './modules/hmny'
 import { LumaBot } from './modules/llms/lumaBot'
 import { XaiBot } from './modules/llms/xaiBot'
+import { DeepSeekBot } from './modules/llms/deepSeekBot'
 
 Events.EventEmitter.defaultMaxListeners = 30
 
@@ -216,6 +217,7 @@ const lumaBot = new LumaBot(payments)
 const claudeBot = new ClaudeBot(payments)
 const vertexBot = new VertexBot(payments, [llamaAgent])
 const xaiBot = new XaiBot(payments)
+const deepSeekBot = new DeepSeekBot(payments)
 const oneCountryBot = new OneCountryBot(payments)
 const translateBot = new TranslateBot()
 const telegramPayments = new TelegramPayments(payments)
@@ -347,6 +349,7 @@ const PayableBots: Record<string, PayableBotConfig> = {
   vertexBot: { bot: vertexBot },
   lumaBot: { bot: lumaBot },
   aixBot: { bot: xaiBot },
+  deepSeekBot: { bot: deepSeekBot },
   openAiBot: {
     enabled: (ctx: OnMessageContext) => ctx.session.dalle.isEnabled,
     bot: openAiBot
diff --git a/src/modules/llms/api/deepseek.ts b/src/modules/llms/api/deepseek.ts
@@ -0,0 +1,128 @@
+import axios, { type AxiosResponse } from 'axios'
+import { type Readable } from 'stream'
+import { GrammyError } from 'grammy'
+import { pino } from 'pino'
+
+import config from '../../../config'
+import { type OnCallBackQueryData, type OnMessageContext, type ChatConversation } from '../../types'
+import { type LlmCompletion } from './llmApi'
+import { headersStream } from './helper'
+import { LlmModelsEnum } from '../utils/llmModelsManager'
+import { type ModelParameters } from '../utils/types'
+import { prepareConversation } from './openai'
+
+const logger = pino({
+  name: 'deepSeek - llmsBot',
+  transport: {
+    target: 'pino-pretty',
+    options: { colorize: true }
+  }
+})
+
+const API_ENDPOINT = config.llms.apiEndpoint
+
+export const deepSeekStreamCompletion = async (
+  conversation: ChatConversation[],
+  model = LlmModelsEnum.GPT_35_TURBO,
+  ctx: OnMessageContext | OnCallBackQueryData,
+  msgId: number,
+  limitTokens = true,
+  parameters?: ModelParameters
+): Promise<LlmCompletion> => {
+  logger.info(`Handling ${model} stream completion`)
+  parameters = parameters ?? {
+    system: ctx.session.currentPrompt,
+    max_tokens: +config.openAi.chatGpt.maxTokens
+  }
+  const data = {
+    model,
+    stream: true,
+    system: parameters.system,
+    max_tokens: limitTokens ? parameters.max_tokens : undefined,
+    messages: prepareConversation(conversation, model, ctx) // .map(m => { return { content: m.content, role: m.role } })
+  }
+  let wordCount = 0
+  let wordCountMinimum = 2
+  const url = `${API_ENDPOINT}/deepseek/completions`
+  if (!ctx.chat?.id) {
+    throw new Error('Context chat id should not be empty after openAI streaming')
+  }
+
+  const response: AxiosResponse = await axios.post(url, data, headersStream)
+
+  const completionStream: Readable = response.data
+  let completion = ''
+  let outputTokens = ''
+  let inputTokens = ''
+  let message = ''
+  for await (const chunk of completionStream) {
+    const msg = chunk.toString()
+    if (msg) {
+      if (msg.includes('Input Tokens:')) {
+        const tokenMsg = msg.split('Input Tokens: ')[1]
+        inputTokens = tokenMsg.split('Output Tokens: ')[0]
+        outputTokens = tokenMsg.split('Output Tokens: ')[1]
+        completion += msg.split('Input Tokens: ')[0]
+        completion = completion.split('Input Tokens: ')[0]
+      } else if (msg.includes('Output Tokens: ')) {
+        outputTokens = msg.split('Output Tokens: ')[1]
+        completion = completion.split('Output Tokens: ')[0]
+      } else {
+        wordCount++
+        completion += msg
+        if (wordCount > wordCountMinimum) {
+          if (wordCountMinimum < 64) {
+            wordCountMinimum *= 2
+          }
+          completion = completion.replaceAll('...', '')
+          completion += '...'
+          wordCount = 0
+          if (ctx.chat?.id && message !== completion) {
+            message = completion
+            await ctx.api
+              .editMessageText(ctx.chat?.id, msgId, completion)
+              .catch(async (e: any) => {
+                if (e instanceof GrammyError) {
+                  if (e.error_code !== 400) {
+                    throw e
+                  } else {
+                    logger.error(e.message)
+                  }
+                } else {
+                  throw e
+                }
+              })
+          }
+        }
+      }
+    }
+  }
+  completion = completion.replaceAll('...', '')
+  await ctx.api
+    .editMessageText(ctx.chat?.id, msgId, completion)
+    .catch((e: any) => {
+      if (e instanceof GrammyError) {
+        if (e.error_code !== 400) {
+          throw e
+        } else {
+          logger.error(e)
+        }
+      } else {
+        throw e
+      }
+    })
+  const totalOutputTokens = outputTokens // response.headers['x-openai-output-tokens']
+  const totalInputTokens = inputTokens // response.headers['x-openai-input-tokens']
+  return {
+    completion: {
+      content: completion,
+      role: 'assistant',
+      model,
+      timestamp: Date.now()
+    },
+    usage: parseInt(totalOutputTokens, 10) + parseInt(totalInputTokens, 10),
+    price: 0,
+    inputTokens: parseInt(totalInputTokens, 10),
+    outputTokens: parseInt(totalOutputTokens, 10)
+  }
+}
diff --git a/src/modules/llms/api/openai.ts b/src/modules/llms/api/openai.ts
@@ -81,7 +81,7 @@ export async function alterGeneratedImg (
 
 type ConversationOutput = Omit<ChatConversation, 'timestamp' | 'model' | 'id' | 'author' | 'numSubAgents'>
 
-const prepareConversation = (conversation: ChatConversation[], model: string, ctx: OnMessageContext | OnCallBackQueryData): ConversationOutput[] => {
+export const prepareConversation = (conversation: ChatConversation[], model: string, ctx: OnMessageContext | OnCallBackQueryData): ConversationOutput[] => {
   const messages = conversation.filter(c => c.model === model).map(m => { return { content: m.content, role: m.role } })
   if (messages.length !== 1 || model === LlmModelsEnum.O1) {
     return messages
diff --git a/src/modules/llms/deepSeekBot.ts b/src/modules/llms/deepSeekBot.ts
@@ -0,0 +1,82 @@
+import { type BotPayments } from '../payment'
+import {
+  type OnMessageContext,
+  type OnCallBackQueryData,
+  type ChatConversation
+} from '../types'
+import { deepSeekStreamCompletion } from './api/deepseek'
+import { type LlmCompletion } from './api/llmApi'
+import { LlmsBase } from './llmsBase'
+import { type ModelVersion } from './utils/llmModelsManager'
+import { type ModelParameters } from './utils/types'
+
+export class DeepSeekBot extends LlmsBase {
+  private readonly claudeModels: ModelVersion[]
+
+  constructor (payments: BotPayments) {
+    super(payments, 'deepSeekBot', 'llms')
+  }
+
+  public getEstimatedPrice (ctx: any): number {
+    return 0
+  }
+
+  public isSupportedEvent (
+    ctx: OnMessageContext | OnCallBackQueryData
+  ): boolean {
+    const hasCommand = ctx.hasCommand(this.supportedCommands)
+
+    const chatPrefix = this.hasPrefix(ctx.message?.text ?? '')
+    if (chatPrefix !== '') {
+      return true
+    }
+    return hasCommand
+  }
+
+  async chatStreamCompletion (
+    conversation: ChatConversation[],
+    model: ModelVersion,
+    ctx: OnMessageContext | OnCallBackQueryData,
+    msgId: number,
+    limitTokens: boolean,
+    parameters?: ModelParameters): Promise<LlmCompletion> {
+    if (parameters) {
+      parameters.system = ctx.session.currentPrompt
+    }
+    return await deepSeekStreamCompletion(conversation, model, ctx, msgId, limitTokens, parameters)
+  }
+
+  async chatCompletion (
+    conversation: ChatConversation[],
+    model: ModelVersion,
+    ctx: OnMessageContext | OnCallBackQueryData,
+    hasTools: boolean,
+    parameters?: ModelParameters
+  ): Promise<LlmCompletion> {
+    return {
+      completion: undefined,
+      usage: 0,
+      price: 0,
+      inputTokens: 0,
+      outputTokens: 0
+    }
+  }
+
+  public async onEvent (ctx: OnMessageContext | OnCallBackQueryData): Promise<void> {
+    ctx.transient.analytics.module = this.module
+    const isSupportedEvent = this.isSupportedEvent(ctx)
+    if (!isSupportedEvent && ctx.chat?.type !== 'private') {
+      this.logger.warn(`### unsupported command ${ctx.message?.text}`)
+      return
+    }
+    const model = this.getModelFromContext(ctx)
+    if (!model) {
+      this.logger.warn(`### unsupported model for command ${ctx.message?.text}`)
+      return
+    }
+    this.updateSessionModel(ctx, model.version)
+
+    const usesTools = ctx.hasCommand([this.commandsEnum.CTOOL, this.commandsEnum.STOOL])
+    await this.onChat(ctx, model.version, usesTools ? false : this.getStreamOption(model.version), usesTools)
+  }
+}
diff --git a/src/modules/llms/utils/llmsData.ts b/src/modules/llms/utils/llmsData.ts
@@ -209,6 +209,21 @@ export const llmData: LLMData = {
       maxContextTokens: 131072,
       chargeType: 'TOKEN',
       stream: false
+    },
+    deepSeek: {
+      provider: 'deepseek',
+      name: 'gpt-35-turbo',
+      fullName: 'GPT-3.5 Turbo',
+      botName: 'deepSeekBot',
+      version: 'gpt-3.5-turbo',
+      commands: ['ds'],
+      prefix: ['ds. '],
+      apiSpec: 'https://docs.x.ai/api#introduction',
+      inputPrice: 0.005,
+      outputPrice: 0.015,
+      maxContextTokens: 131072,
+      chargeType: 'TOKEN',
+      stream: true
     }
   },
   imageModels: {
@@ -269,6 +284,12 @@ export const llmData: LLMData = {
         max_tokens: +config.openAi.chatGpt.maxTokens
       }
     },
+    deepseek: {
+      defaultParameters: {
+        // system: config.openAi.chatGpt.chatCompletionContext,
+        max_tokens: +config.openAi.chatGpt.maxTokens
+      }
+    },
     luma: {
       defaultParameters: {
         // system: config.openAi.chatGpt.chatCompletionContext,
diff --git a/src/modules/llms/utils/types.ts b/src/modules/llms/utils/types.ts
@@ -1,4 +1,4 @@
-export type Provider = 'openai' | 'claude' | 'vertex' | 'xai' | 'luma'
+export type Provider = 'openai' | 'claude' | 'vertex' | 'xai' | 'luma' | 'deepseek'
 export type ChargeType = 'TOKEN' | 'CHAR'
 
 export type DalleImageSize = '1024x1024' | '1024x1792' | '1792x1024'

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-export type Provider = 'openai' \| 'claude' \| 'vertex' \| 'xai' \| 'luma'`
	`1`	`+export type Provider = 'openai' \| 'claude' \| 'vertex' \| 'xai' \| 'luma' \| 'deepseek'`
`2`	`2`	`export type ChargeType = 'TOKEN' \| 'CHAR'`
`3`	`3`
`4`	`4`	`export type DalleImageSize = '1024x1024' \| '1024x1792' \| '1792x1024'`