braintrustdata · ankrgyl · Nov 17, 2024
diff --git a/packages/proxy/schema/index.ts b/packages/proxy/schema/index.ts
@@ -213,6 +213,10 @@ export const AvailableEndpointTypes: { [name: string]: ModelEndpointType[] } = {
   "anthropic.claude-3-haiku-20240307-v1:0": ["bedrock"],
   "anthropic.claude-3-sonnet-20240229-v1:0": ["bedrock"],
   "anthropic.claude-3-5-sonnet-20240620-v1:0": ["bedrock"],
+  "us.meta.llama3-2-1b-instruct-v1:0": ["bedrock"],
+  "us.meta.llama3-2-3b-instruct-v1:0": ["bedrock"],
+  "us.meta.llama3-2-11b-instruct-v1:0": ["bedrock"],
+  "us.meta.llama3-2-90b-instruct-v1:0": ["bedrock"],
   "grok-beta": ["xAI"],
 };
 

diff --git a/packages/proxy/schema/models.ts b/packages/proxy/schema/models.ts
@@ -336,6 +336,43 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     output_cost_per_mil_tokens: 75,
     displayName: "Claude 3 Opus",
   },
+  "claude-instant-1.2": {
+    format: "anthropic",
+    flavor: "chat",
+    input_cost_per_mil_tokens: 0.8,
+    output_cost_per_mil_tokens: 2.4,
+    displayName: "Claude Instant 1.2",
+  },
+  "claude-instant-1": {
+    format: "anthropic",
+    flavor: "chat",
+    input_cost_per_mil_tokens: 0.8,
+    output_cost_per_mil_tokens: 2.4,
+    displayName: "Claude Instant 1",
+  },
+  "claude-2.1": {
+    format: "anthropic",
+    flavor: "chat",
+    input_cost_per_mil_tokens: 8,
+    output_cost_per_mil_tokens: 24,
+    displayName: "Claude 2.1",
+  },
+  "claude-2.0": {
+    format: "anthropic",
+    flavor: "chat",
+    input_cost_per_mil_tokens: 8,
+    output_cost_per_mil_tokens: 24,
+    displayName: "Claude 2.0",
+  },
+  "claude-2": {
+    format: "anthropic",
+    flavor: "chat",
+    input_cost_per_mil_tokens: 8,
+    output_cost_per_mil_tokens: 24,
+    displayName: "Claude 2",
+  },
+
+  // BEDROCK MODELS
   "anthropic.claude-3-5-sonnet-20241022-v2:0": {
     format: "anthropic",
     flavor: "chat",
@@ -376,40 +413,37 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     output_cost_per_mil_tokens: 75,
     displayName: "Claude 3 Opus v1.0",
   },
-  "claude-instant-1.2": {
-    format: "anthropic",
-    flavor: "chat",
-    input_cost_per_mil_tokens: 0.8,
-    output_cost_per_mil_tokens: 2.4,
-    displayName: "Claude Instant 1.2",
-  },
-  "claude-instant-1": {
-    format: "anthropic",
+
+  "us.meta.llama3-2-1b-instruct-v1:0": {
+    format: "openai",
     flavor: "chat",
-    input_cost_per_mil_tokens: 0.8,
-    output_cost_per_mil_tokens: 2.4,
-    displayName: "Claude Instant 1",
+    input_cost_per_mil_tokens: 0.1,
+    output_cost_per_mil_tokens: 0.1,
+    displayName: "LLaMA 3.2 1B Instruct v1.0",
   },
-  "claude-2.1": {
-    format: "anthropic",
+
+  "us.meta.llama3-2-3b-instruct-v1:0": {
+    format: "openai",
     flavor: "chat",
-    input_cost_per_mil_tokens: 8,
-    output_cost_per_mil_tokens: 24,
-    displayName: "Claude 2.1",
+    input_cost_per_mil_tokens: 0.15,
+    output_cost_per_mil_tokens: 0.15,
+    displayName: "LLaMA 3.2 3B Instruct v1.0",
   },
-  "claude-2.0": {
-    format: "anthropic",
+
+  "us.meta.llama3-2-11b-instruct-v1:0": {
+    format: "openai",
     flavor: "chat",
-    input_cost_per_mil_tokens: 8,
-    output_cost_per_mil_tokens: 24,
-    displayName: "Claude 2.0",
+    input_cost_per_mil_tokens: 0.35,
+    output_cost_per_mil_tokens: 0.35,
+    displayName: "LLaMA 3.2 11B Instruct v1.0",
   },
-  "claude-2": {
-    format: "anthropic",
+
+  "us.meta.llama3-2-90b-instruct-v1:0": {
+    format: "openai",
     flavor: "chat",
-    input_cost_per_mil_tokens: 8,
-    output_cost_per_mil_tokens: 24,
-    displayName: "Claude 2",
+    input_cost_per_mil_tokens: 2,
+    output_cost_per_mil_tokens: 2,
+    displayName: "LLaMA 3.2 90B Instruct v1.0",
   },
 
   // REPLICATE MODELS

diff --git a/packages/proxy/src/providers/bedrock.ts b/packages/proxy/src/providers/bedrock.ts
@@ -11,6 +11,7 @@ import {
 import { CompletionUsage } from "openai/resources";
 
 const brt = new BedrockRuntimeClient({});
+
 export async function fetchBedrockAnthropic({
   secret,
   body,
@@ -127,3 +128,108 @@ export async function fetchBedrockAnthropic({
     response: httpResponse,
   };
 }
+
+export async function fetchBedrockOpenAI({
+  secret,
+  body,
+}: {
+  secret: APISecret;
+  body: Record<string, unknown>;
+}) {
+  if (secret.type !== "bedrock") {
+    throw new Error("Bedrock: expected secret");
+  }
+
+  const { model, stream, messages, max_tokens, ...rest } = body;
+  if (!model || typeof model !== "string") {
+    throw new Error("Bedrock: expected model");
+  }
+
+  const metadata = secret.metadata as BedrockMetadata;
+
+  const brt = new BedrockRuntimeClient({
+    region: metadata.region,
+    credentials: {
+      accessKeyId: metadata.access_key,
+      secretAccessKey: secret.secret,
+      ...(metadata.session_token
+        ? { sessionToken: metadata.session_token }
+        : {}),
+    },
+  });
+
+  console.log(messages, max_tokens, rest);
+  const input = {
+    body: new TextEncoder().encode(
+      JSON.stringify({
+        inputs: messages,
+        max_new_tokens: max_tokens,
+        ...rest,
+      }),
+    ),
+    contentType: "application/json",
+    modelId: model,
+  };
+
+  const httpResponse = new Response(null, {
+    status: 200,
+  });
+
+  let usage: Partial<CompletionUsage> = {};
+  let responseStream;
+  if (stream) {
+    const command = new InvokeModelWithResponseStreamCommand(input);
+    const response = await brt.send(command);
+    if (!response.body) {
+      throw new Error("Bedrock: empty response body");
+    }
+    const bodyStream = response.body;
+    const iterator = bodyStream[Symbol.asyncIterator]();
+    let idx = 0;
+    responseStream = new ReadableStream<Uint8Array>({
+      async pull(controller) {
+        const { value, done } = await iterator.next();
+        if (done) {
+          // Close the stream when no more data is available
+          controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n"));
+          controller.close();
+        } else {
+          // Enqueue the next piece of data into the stream
+          if (value.chunk?.bytes) {
+            const valueData = JSON.parse(
+              new TextDecoder().decode(value.chunk.bytes),
+            );
+            idx += 1;
+            controller.enqueue(
+              new TextEncoder().encode(
+                "data: " + JSON.stringify(valueData) + "\n\n",
+              ),
+            );
+          }
+        }
+      },
+      async cancel() {
+        // Optional: Handle any cleanup if necessary when the stream is canceled
+        if (typeof iterator.return === "function") {
+          await iterator.return();
+        }
+      },
+    });
+  } else {
+    const command = new InvokeModelCommand(input);
+    const response = await brt.send(command);
+    responseStream = new ReadableStream<Uint8Array>({
+      start(controller) {
+        const valueData = JSON.parse(new TextDecoder().decode(response.body));
+        controller.enqueue(new TextEncoder().encode(JSON.stringify(valueData)));
+        controller.close();
+      },
+    });
+    httpResponse.headers.set("Content-Type", "application/json");
+  }
+
+  return {
+    stream: responseStream,
+    response: httpResponse,
+  };
+}
diff --git a/packages/proxy/src/proxy.ts b/packages/proxy/src/proxy.ts
@@ -47,7 +47,7 @@ import {
   CompletionUsage,
   CreateEmbeddingResponse,
 } from "openai/resources";
-import { fetchBedrockAnthropic } from "./providers/bedrock";
+import { fetchBedrockAnthropic, fetchBedrockOpenAI } from "./providers/bedrock";
 import { Buffer } from "node:buffer";
 import { ExperimentLogPartialArgs } from "@braintrust/core";
 import { MessageParam } from "@anthropic-ai/sdk/resources";
@@ -967,6 +967,13 @@ async function fetchOpenAI(
   secret: APISecret,
   setHeader: (name: string, value: string) => void,
 ): Promise<ModelResponse> {
+  if (secret.type === "bedrock") {
+    return await fetchBedrockOpenAI({
+      secret,
+      body: bodyData,
+    });
+  }
+
   let baseURL =
     (secret.metadata &&
       "api_base" in secret.metadata &&