feat(js): add 4 new LLMs

louis030195 · louis030195 · commit 7429ea3f6e43 · 2023-07-03T17:13:49.000-07:00
diff --git a/dashboard/lib/utils.ts b/dashboard/lib/utils.ts
@@ -151,47 +151,21 @@ const huggingFaceStream = async (modelUrl: string, payload: HuggingFacePayload):
       controller.close();
     },
   });
-
-  // const encoder = new TextEncoder();
-
-  // payload.stream = true;
-  // const response = await fetch(modelUrl, {
-  //   headers: {
-  //     'Content-Type': 'application/json',
-  //     // Authorization: `Bearer ${process.env.HUGGINGFACE_API_KEY ?? ''}`,
-  //     Accept: 'text/event-stream'
-  //   },
-  //   method: 'POST',
-  //   body: JSON.stringify(payload),
-  // });
-
-  // return new ReadableStream({
-  //   async start(controller) {
-  //     const decoder = new TextDecoder();
-  //     const parser = createParser((event: ParsedEvent | ReconnectInterval) => {
-  //       console.log('event', event);
-
-  //       if (event.type === 'event') {
-  //         let data = JSON.parse(event.data)
-  //         data = data?.token?.text
-  //         if (data?.token?.special) return null;
-  //         if (data === null) return null;
-  //         console.log('data', data);
-  //         const queue = encoder.encode(data)
-  //         controller.enqueue(queue);
-  //         return data;
-  //       }
-  //       return null;
-  //     });
-
-  //     for await (const buffer of response.body as any) {
-  //       const text = decoder.decode(buffer);
-  //       console.log('text', text);
-  //       parser.feed(text);
-  //     }
-  //   },
-  // });
 }
 
-export { generateText, huggingFaceStream }
+const openaiCompletion = async (url: string, model: string, prompt: string, maxTokens = 100) => fetch(`${url}/v1/completions`, {
+  method: "POST",
+  headers: {
+    "Content-Type": "application/json",
+    Authorization: "Bearer EMPTY",
+  },
+  body: JSON.stringify({
+    model: model,
+    prompt: prompt,
+    max_tokens: maxTokens,
+    stream: false,
+  })
+}).then((response) => response.json()).then((response) => response.choices?.[0]?.text || response)
+
+export { generateText, huggingFaceStream, openaiCompletion }
 
diff --git a/dashboard/pages/api/chat.ts b/dashboard/pages/api/chat.ts
@@ -1,4 +1,4 @@
-import { OpenAIPayload, OpenAIStream, generateText, huggingFaceStream } from '@/lib/utils'
+import { OpenAIPayload, OpenAIStream, generateText, huggingFaceStream, openaiCompletion } from '@/lib/utils'
 import cors from '@/utils/cors'
 import * as Sentry from '@sentry/nextjs'
 import { defaultChatSystem } from '../../utils/constants'
@@ -31,11 +31,12 @@ const track = async (userId: string, model: string) => {
 }
 type LLM = 'openai/gpt-4' | 'openai/gpt-3.5-turbo' | 'openai/gpt-3.5-turbo-16k' | 'tiiuae/falcon-7b' | 'google/bison' | 'bigscience/bloomz-7b1'
 
+
 interface RequestPayload {
   prompt: string
   history: Chat[]
   system?: string
-  model: LLM
+  model: LLM | string
   stream: boolean
   max_new_tokens?: number;
   stop?: string[];
@@ -103,7 +104,7 @@ const handler = async (req: Request, res: Response): Promise<Response> => {
     })
   }
 
-  console.log('streaming chat with model', model)
+  console.log('generating text with model', model, 'stream', stream, 'max_new_tokens', max_new_tokens)
 
   const messages: Chat[] = [
     {
@@ -129,37 +130,7 @@ const handler = async (req: Request, res: Response): Promise<Response> => {
     let readableStream: ReadableStream
 
 
-    // TODO: not supported atm
-    if (model === 'tiiuae/falcon-7b') {
-      const url = 'http://34.127.99.191:9090'
-      if (!stream) {
-        const res = await generateText(url, {
-          inputs: prompt,
-          stream: false,
-          parameters: {
-            max_new_tokens: max_new_tokens || 1000,
-            return_full_text: false,
-            stop: stop || [],
-          },
-        })
-        console.log('res', res)
-        return new Response(JSON.stringify({
-          generated_text: res.generated_text
-        }), {
-          status: 200,
-        })
-      }
-      readableStream = await huggingFaceStream(url, {
-        inputs: prompt,
-        stream: true,
-        parameters: {
-          // { model_id: "tiiuae/falcon-7b", revision: None, sharded: None, num_shard: Some(1), quantize: None, trust_remote_code: false, max_concurrent_requests: 128, max_best_of: 2, max_stop_sequences: 4, max_input_length: 1000, max_total_tokens: 1512, max_batch_size: None, waiting_served_ratio: 1.2, max_batch_total_tokens: 32000, max_waiting_tokens: 20, port: 80, shard_uds_path: "/tmp/text-generation-server", master_addr: "localhost", master_port: 29500, huggingface_hub_cache: Some("/data"), weights_cache_override: None, disable_custom_kernels: false, json_output: false, otlp_endpoint: None, cors_allow_origin: [], watermark_gamma: None, watermark_delta: None, env: false }
-          max_new_tokens: max_new_tokens || 1000,
-          return_full_text: false,
-          stop: stop || [],
-        }
-      })
-    } else if (model === 'bigscience/bloomz-7b1') {
+    if (model === 'bigscience/bloomz-7b1') {
       const url = 'https://api.differentai.xyz'
       if (!stream) {
         const res = await generateText(url, {
@@ -249,6 +220,38 @@ const handler = async (req: Request, res: Response): Promise<Response> => {
         })
       }
       readableStream = await OpenAIStream(payload)
+    } else if (model === 'NousResearch/Nous-Hermes-13b') {
+      const text = await openaiCompletion(
+        'https://6976-35-203-131-148.ngrok-free.app', 'NousResearch/Nous-Hermes-13b', prompt, max_new_tokens || 100)
+      return new Response(JSON.stringify({
+        generated_text: text || ''
+      }), {
+        status: 200,
+      })
+    } else if (model === 'TheBloke/mpt-7b-chat-GGML') {
+      const text = await openaiCompletion(
+        'https://3e85-34-139-159-248.ngrok-free.app', 'TheBloke/mpt-7b-chat-GGML', prompt, max_new_tokens || 100)
+      return new Response(JSON.stringify({
+        generated_text: text || ''
+      }), {
+        status: 200,
+      })
+    } else if (model === 'TheBloke/Nous-Hermes-13B-GGML') {
+      const text = await openaiCompletion(
+        'https://28b6-2a01-e0a-3ee-1cb0-505a-5158-140c-80f8.ngrok-free.app', 'TheBloke/Nous-Hermes-13B-GGML', prompt, max_new_tokens || 100)
+      return new Response(JSON.stringify({
+        generated_text: text || ''
+      }), {
+        status: 200,
+      })
+    } else if (model === 'nomic-ai/ggml-replit-code-v1-3b') {
+      const text = await openaiCompletion(
+        'https://430699a51145-11712225068814657101.ngrok-free.app', 'nomic-ai/ggml-replit-code-v1-3b', prompt, max_new_tokens || 100)
+      return new Response(JSON.stringify({
+        generated_text: text || ''
+      }), {
+        status: 200,
+      })
     } else {
       if (!stream) {
         payload.stream = stream
diff --git a/sdk/embedbase-js/src/EmbedbaseClient.ts b/sdk/embedbase-js/src/EmbedbaseClient.ts
@@ -738,6 +738,8 @@ export default class EmbedbaseClient {
     return data.results
   }
 
+
+
   /**
  * Retrieves a list of LLM models available for use.
  * @returns {Promise<LLMDescription[]>} - Returns a list of available LLM models.
@@ -747,12 +749,14 @@ export default class EmbedbaseClient {
  * console.log(models);
  */
   public async getModels(): Promise<LLMDescription[]> {
+    const { data: otherModels } = await listModels();
     const models: LLMDescription[] = [
       { name: "openai/gpt-4", description: "OpenAI's GPT-4 model" },
       { name: "openai/gpt-3.5-turbo-16k", description: "OpenAI's GPT-3.5 Turbo 16k model" },
+      { name: "openai/gpt-3.5-turbo", description: "OpenAI's GPT-3.5 Turbo model" },
       { name: "google/bison", description: "Google's Bison model" },
-      // { name: "tiiuae/falcon-7b", description: "Tiiuae's Falcon 7b model" },
-      // { name: "bigscience/bloomz-7b1", description: "BigScience's Bloomz 7b1 model" },
+      { name: "bigscience/bloomz-7b1", description: "BigScience's Bloomz 7b1 model" },
+      ...otherModels.map((model: any) => ({ name: model.model, description: JSON.stringify(model) })),
     ];
     return models;
   }
@@ -771,7 +775,7 @@ export default class EmbedbaseClient {
    * const generatedText = await gpt4.generateText("input-text");
    * console.log(generatedText);
    */
-  public useModel(modelName: LLM): {
+  public useModel(modelName: LLM | string): {
     generateText: (input: string, options?: GenerateOptions) => Promise<string>;
     streamText: (input: string, options?: GenerateOptions) => AsyncGenerator<string>;
   } {
@@ -825,3 +829,32 @@ export default class EmbedbaseClient {
     }
   };
 }
+
+
+/**
+ * Lists all models available on the platform.
+ * @async
+ * @returns {Promise<Object>} A promise that resolves to an object containing a list of models.
+ * @example
+ * listModels().then(models => console.log(models));
+ */
+async function listModels() {
+  const response = await fetch("https://api.airtable.com/v0/appwJMZ6IAUnKpSwV/all", {
+    headers: {
+      Authorization: "Bearer patBrBkdsFw0ArVlF.89a5669f5fd05d20e1d0f77216d072d929b13a215c0471b9a1a2d764537cbe8d"
+    }
+  });
+  const data = await response.json();
+  return {
+    data: data.records
+      .filter((record) => record.fields["url"] !== undefined)
+      .map((record) => ({
+        id: record.id,
+        object: "model",
+        owned_by: record.fields["contact"] || "anonymous",
+        permission: ["read"],
+        createdTime: record.createdTime,
+        ...record.fields
+      }))
+  }
+}
diff --git a/sdk/embedbase-js/src/types.ts b/sdk/embedbase-js/src/types.ts
@@ -71,7 +71,7 @@ export interface ClientDatasets {
 export type LLM = 'openai/gpt-4' | 'openai/gpt-3.5-turbo-16k' | 'google/bison' | 'bigscience/bloomz-7b1' //| 'tiiuae/falcon-7b'
 
 export interface LLMDescription {
-  name: LLM
+  name: LLM | string
   description: string
 }
 
diff --git a/sdk/embedbase-js/test/client.test.ts b/sdk/embedbase-js/test/client.test.ts
@@ -396,6 +396,13 @@ test('should be able to generate text sync', async () => {
   expect(res).toBeDefined()
 }, TIMEOUT)
 
+test('should be able to list models', async () => {
+  const models = await embedbase.getModels()
+  console.log(JSON.stringify(models.map((m: any) => m.name), null, 2))
+  expect(models).toBeDefined()
+}, TIMEOUT)
+
+
 
 test('should be able to replace my documents', async () => {
   const documents = [

Original file line number	Diff line number	Diff line change
`@@ -71,7 +71,7 @@ export interface ClientDatasets {`
`71`	`71`	`export type LLM = 'openai/gpt-4' \| 'openai/gpt-3.5-turbo-16k' \| 'google/bison' \| 'bigscience/bloomz-7b1' //\| 'tiiuae/falcon-7b'`
`72`	`72`
`73`	`73`	`export interface LLMDescription {`
`74`		`- name: LLM`
	`74`	`+ name: LLM \| string`
`75`	`75`	`description: string`
`76`	`76`	`}`
`77`	`77`