1- import { OpenAIPayload , OpenAIStream , generateText , huggingFaceStream } from '@/lib/utils'
1+ import { OpenAIPayload , OpenAIStream , generateText , huggingFaceStream , openaiCompletion } from '@/lib/utils'
22import cors from '@/utils/cors'
33import * as Sentry from '@sentry/nextjs'
44import { defaultChatSystem } from '../../utils/constants'
@@ -31,11 +31,12 @@ const track = async (userId: string, model: string) => {
3131}
3232type LLM = 'openai/gpt-4' | 'openai/gpt-3.5-turbo' | 'openai/gpt-3.5-turbo-16k' | 'tiiuae/falcon-7b' | 'google/bison' | 'bigscience/bloomz-7b1'
3333
34+
3435interface RequestPayload {
3536 prompt : string
3637 history : Chat [ ]
3738 system ?: string
38- model : LLM
39+ model : LLM | string
3940 stream : boolean
4041 max_new_tokens ?: number ;
4142 stop ?: string [ ] ;
@@ -103,7 +104,7 @@ const handler = async (req: Request, res: Response): Promise<Response> => {
103104 } )
104105 }
105106
106- console . log ( 'streaming chat with model' , model )
107+ console . log ( 'generating text with model' , model , 'stream' , stream , 'max_new_tokens' , max_new_tokens )
107108
108109 const messages : Chat [ ] = [
109110 {
@@ -129,37 +130,7 @@ const handler = async (req: Request, res: Response): Promise<Response> => {
129130 let readableStream : ReadableStream
130131
131132
132- // TODO: not supported atm
133- if ( model === 'tiiuae/falcon-7b' ) {
134- const url = 'http://34.127.99.191:9090'
135- if ( ! stream ) {
136- const res = await generateText ( url , {
137- inputs : prompt ,
138- stream : false ,
139- parameters : {
140- max_new_tokens : max_new_tokens || 1000 ,
141- return_full_text : false ,
142- stop : stop || [ ] ,
143- } ,
144- } )
145- console . log ( 'res' , res )
146- return new Response ( JSON . stringify ( {
147- generated_text : res . generated_text
148- } ) , {
149- status : 200 ,
150- } )
151- }
152- readableStream = await huggingFaceStream ( url , {
153- inputs : prompt ,
154- stream : true ,
155- parameters : {
156- // { model_id: "tiiuae/falcon-7b", revision: None, sharded: None, num_shard: Some(1), quantize: None, trust_remote_code: false, max_concurrent_requests: 128, max_best_of: 2, max_stop_sequences: 4, max_input_length: 1000, max_total_tokens: 1512, max_batch_size: None, waiting_served_ratio: 1.2, max_batch_total_tokens: 32000, max_waiting_tokens: 20, port: 80, shard_uds_path: "/tmp/text-generation-server", master_addr: "localhost", master_port: 29500, huggingface_hub_cache: Some("/data"), weights_cache_override: None, disable_custom_kernels: false, json_output: false, otlp_endpoint: None, cors_allow_origin: [], watermark_gamma: None, watermark_delta: None, env: false }
157- max_new_tokens : max_new_tokens || 1000 ,
158- return_full_text : false ,
159- stop : stop || [ ] ,
160- }
161- } )
162- } else if ( model === 'bigscience/bloomz-7b1' ) {
133+ if ( model === 'bigscience/bloomz-7b1' ) {
163134 const url = 'https://api.differentai.xyz'
164135 if ( ! stream ) {
165136 const res = await generateText ( url , {
@@ -249,6 +220,38 @@ const handler = async (req: Request, res: Response): Promise<Response> => {
249220 } )
250221 }
251222 readableStream = await OpenAIStream ( payload )
223+ } else if ( model === 'NousResearch/Nous-Hermes-13b' ) {
224+ const text = await openaiCompletion (
225+ 'https://6976-35-203-131-148.ngrok-free.app' , 'NousResearch/Nous-Hermes-13b' , prompt , max_new_tokens || 100 )
226+ return new Response ( JSON . stringify ( {
227+ generated_text : text || ''
228+ } ) , {
229+ status : 200 ,
230+ } )
231+ } else if ( model === 'TheBloke/mpt-7b-chat-GGML' ) {
232+ const text = await openaiCompletion (
233+ 'https://3e85-34-139-159-248.ngrok-free.app' , 'TheBloke/mpt-7b-chat-GGML' , prompt , max_new_tokens || 100 )
234+ return new Response ( JSON . stringify ( {
235+ generated_text : text || ''
236+ } ) , {
237+ status : 200 ,
238+ } )
239+ } else if ( model === 'TheBloke/Nous-Hermes-13B-GGML' ) {
240+ const text = await openaiCompletion (
241+ 'https://28b6-2a01-e0a-3ee-1cb0-505a-5158-140c-80f8.ngrok-free.app' , 'TheBloke/Nous-Hermes-13B-GGML' , prompt , max_new_tokens || 100 )
242+ return new Response ( JSON . stringify ( {
243+ generated_text : text || ''
244+ } ) , {
245+ status : 200 ,
246+ } )
247+ } else if ( model === 'nomic-ai/ggml-replit-code-v1-3b' ) {
248+ const text = await openaiCompletion (
249+ 'https://430699a51145-11712225068814657101.ngrok-free.app' , 'nomic-ai/ggml-replit-code-v1-3b' , prompt , max_new_tokens || 100 )
250+ return new Response ( JSON . stringify ( {
251+ generated_text : text || ''
252+ } ) , {
253+ status : 200 ,
254+ } )
252255 } else {
253256 if ( ! stream ) {
254257 payload . stream = stream
0 commit comments