diff --git a/packages/proxy/schema/index.ts b/packages/proxy/schema/index.ts index 7786a113..3990058c 100644 --- a/packages/proxy/schema/index.ts +++ b/packages/proxy/schema/index.ts @@ -213,6 +213,10 @@ export const AvailableEndpointTypes: { [name: string]: ModelEndpointType[] } = { "anthropic.claude-3-haiku-20240307-v1:0": ["bedrock"], "anthropic.claude-3-sonnet-20240229-v1:0": ["bedrock"], "anthropic.claude-3-5-sonnet-20240620-v1:0": ["bedrock"], + "us.meta.llama3-2-1b-instruct-v1:0": ["bedrock"], + "us.meta.llama3-2-3b-instruct-v1:0": ["bedrock"], + "us.meta.llama3-2-11b-instruct-v1:0": ["bedrock"], + "us.meta.llama3-2-90b-instruct-v1:0": ["bedrock"], "grok-beta": ["xAI"], }; diff --git a/packages/proxy/schema/models.ts b/packages/proxy/schema/models.ts index b81376fd..f615b0b7 100644 --- a/packages/proxy/schema/models.ts +++ b/packages/proxy/schema/models.ts @@ -336,6 +336,43 @@ export const AvailableModels: { [name: string]: ModelSpec } = { output_cost_per_mil_tokens: 75, displayName: "Claude 3 Opus", }, + "claude-instant-1.2": { + format: "anthropic", + flavor: "chat", + input_cost_per_mil_tokens: 0.8, + output_cost_per_mil_tokens: 2.4, + displayName: "Claude Instant 1.2", + }, + "claude-instant-1": { + format: "anthropic", + flavor: "chat", + input_cost_per_mil_tokens: 0.8, + output_cost_per_mil_tokens: 2.4, + displayName: "Claude Instant 1", + }, + "claude-2.1": { + format: "anthropic", + flavor: "chat", + input_cost_per_mil_tokens: 8, + output_cost_per_mil_tokens: 24, + displayName: "Claude 2.1", + }, + "claude-2.0": { + format: "anthropic", + flavor: "chat", + input_cost_per_mil_tokens: 8, + output_cost_per_mil_tokens: 24, + displayName: "Claude 2.0", + }, + "claude-2": { + format: "anthropic", + flavor: "chat", + input_cost_per_mil_tokens: 8, + output_cost_per_mil_tokens: 24, + displayName: "Claude 2", + }, + + // BEDROCK MODELS "anthropic.claude-3-5-sonnet-20241022-v2:0": { format: "anthropic", flavor: "chat", @@ -376,40 +413,37 @@ export const AvailableModels: { [name: string]: ModelSpec } = { output_cost_per_mil_tokens: 75, displayName: "Claude 3 Opus v1.0", }, - "claude-instant-1.2": { - format: "anthropic", - flavor: "chat", - input_cost_per_mil_tokens: 0.8, - output_cost_per_mil_tokens: 2.4, - displayName: "Claude Instant 1.2", - }, - "claude-instant-1": { - format: "anthropic", + + "us.meta.llama3-2-1b-instruct-v1:0": { + format: "openai", flavor: "chat", - input_cost_per_mil_tokens: 0.8, - output_cost_per_mil_tokens: 2.4, - displayName: "Claude Instant 1", + input_cost_per_mil_tokens: 0.1, + output_cost_per_mil_tokens: 0.1, + displayName: "LLaMA 3.2 1B Instruct v1.0", }, - "claude-2.1": { - format: "anthropic", + + "us.meta.llama3-2-3b-instruct-v1:0": { + format: "openai", flavor: "chat", - input_cost_per_mil_tokens: 8, - output_cost_per_mil_tokens: 24, - displayName: "Claude 2.1", + input_cost_per_mil_tokens: 0.15, + output_cost_per_mil_tokens: 0.15, + displayName: "LLaMA 3.2 3B Instruct v1.0", }, - "claude-2.0": { - format: "anthropic", + + "us.meta.llama3-2-11b-instruct-v1:0": { + format: "openai", flavor: "chat", - input_cost_per_mil_tokens: 8, - output_cost_per_mil_tokens: 24, - displayName: "Claude 2.0", + input_cost_per_mil_tokens: 0.35, + output_cost_per_mil_tokens: 0.35, + displayName: "LLaMA 3.2 11B Instruct v1.0", }, - "claude-2": { - format: "anthropic", + + "us.meta.llama3-2-90b-instruct-v1:0": { + format: "openai", flavor: "chat", - input_cost_per_mil_tokens: 8, - output_cost_per_mil_tokens: 24, - displayName: "Claude 2", + input_cost_per_mil_tokens: 2, + output_cost_per_mil_tokens: 2, + displayName: "LLaMA 3.2 90B Instruct v1.0", }, // REPLICATE MODELS diff --git a/packages/proxy/src/providers/bedrock.ts b/packages/proxy/src/providers/bedrock.ts index 289c7cf0..b6710e22 100644 --- a/packages/proxy/src/providers/bedrock.ts +++ b/packages/proxy/src/providers/bedrock.ts @@ -11,6 +11,7 @@ import { import { CompletionUsage } from "openai/resources"; const brt = new BedrockRuntimeClient({}); + export async function fetchBedrockAnthropic({ secret, body, @@ -127,3 +128,108 @@ export async function fetchBedrockAnthropic({ response: httpResponse, }; } + +export async function fetchBedrockOpenAI({ + secret, + body, +}: { + secret: APISecret; + body: Record; +}) { + if (secret.type !== "bedrock") { + throw new Error("Bedrock: expected secret"); + } + + const { model, stream, messages, max_tokens, ...rest } = body; + if (!model || typeof model !== "string") { + throw new Error("Bedrock: expected model"); + } + + const metadata = secret.metadata as BedrockMetadata; + + const brt = new BedrockRuntimeClient({ + region: metadata.region, + credentials: { + accessKeyId: metadata.access_key, + secretAccessKey: secret.secret, + ...(metadata.session_token + ? { sessionToken: metadata.session_token } + : {}), + }, + }); + + console.log(messages, max_tokens, rest); + const input = { + body: new TextEncoder().encode( + JSON.stringify({ + inputs: messages, + max_new_tokens: max_tokens, + ...rest, + }), + ), + contentType: "application/json", + modelId: model, + }; + + const httpResponse = new Response(null, { + status: 200, + }); + + let usage: Partial = {}; + let responseStream; + if (stream) { + const command = new InvokeModelWithResponseStreamCommand(input); + const response = await brt.send(command); + if (!response.body) { + throw new Error("Bedrock: empty response body"); + } + const bodyStream = response.body; + const iterator = bodyStream[Symbol.asyncIterator](); + let idx = 0; + responseStream = new ReadableStream({ + async pull(controller) { + const { value, done } = await iterator.next(); + if (done) { + // Close the stream when no more data is available + controller.enqueue(new TextEncoder().encode("data: [DONE]\n\n")); + controller.close(); + } else { + // Enqueue the next piece of data into the stream + if (value.chunk?.bytes) { + const valueData = JSON.parse( + new TextDecoder().decode(value.chunk.bytes), + ); + idx += 1; + controller.enqueue( + new TextEncoder().encode( + "data: " + JSON.stringify(valueData) + "\n\n", + ), + ); + } + } + }, + async cancel() { + // Optional: Handle any cleanup if necessary when the stream is canceled + if (typeof iterator.return === "function") { + await iterator.return(); + } + }, + }); + } else { + const command = new InvokeModelCommand(input); + const response = await brt.send(command); + responseStream = new ReadableStream({ + start(controller) { + const valueData = JSON.parse(new TextDecoder().decode(response.body)); + controller.enqueue(new TextEncoder().encode(JSON.stringify(valueData))); + controller.close(); + }, + }); + httpResponse.headers.set("Content-Type", "application/json"); + } + + return { + stream: responseStream, + response: httpResponse, + }; +} diff --git a/packages/proxy/src/proxy.ts b/packages/proxy/src/proxy.ts index a53ae613..8aac0384 100644 --- a/packages/proxy/src/proxy.ts +++ b/packages/proxy/src/proxy.ts @@ -47,7 +47,7 @@ import { CompletionUsage, CreateEmbeddingResponse, } from "openai/resources"; -import { fetchBedrockAnthropic } from "./providers/bedrock"; +import { fetchBedrockAnthropic, fetchBedrockOpenAI } from "./providers/bedrock"; import { Buffer } from "node:buffer"; import { ExperimentLogPartialArgs } from "@braintrust/core"; import { MessageParam } from "@anthropic-ai/sdk/resources"; @@ -967,6 +967,13 @@ async function fetchOpenAI( secret: APISecret, setHeader: (name: string, value: string) => void, ): Promise { + if (secret.type === "bedrock") { + return await fetchBedrockOpenAI({ + secret, + body: bodyData, + }); + } + let baseURL = (secret.metadata && "api_base" in secret.metadata &&