Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion packages/sdk/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ try {
// Load a model into memory
const modelId = await loadModel({
modelSrc: LLAMA_3_2_1B_INST_Q4_0,
modelType: "llm",
onProgress: (progress) => {
console.log(progress);
},
Expand Down
19 changes: 11 additions & 8 deletions packages/sdk/client/api/load-model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,10 @@ export function loadModel<S extends ModelDescriptor>(
* @overloadLabel "Load new model"
* @param options - An object that defines all configuration parameters required for loading the model, including:
* - modelSrc: The location from which the model weights are fetched (local path, remote URL, or Hyperdrive URL)
* - modelType: The type of model ("llm", "whisper", "embeddings", "nmt", or "tts")
* - modelType: The canonical type of model ("llamacpp-completion",
* "whispercpp-transcription", "llamacpp-embedding", "nmtcpp-translation",
* "tts-ggml", ...). May be omitted when `modelSrc` is a registry descriptor
* that already carries the engine.
* - modelConfig: Model-specific configuration options (companion sources, model parameters, etc.)
* - onProgress: Callback for download progress updates
* - logger: Logger instance for model operation logs
Expand All @@ -95,27 +98,27 @@ export function loadModel<S extends ModelDescriptor>(
* // Local file path - absolute path
* const localModelId = await loadModel({
* modelSrc: "/home/user/models/llama-7b.gguf",
* modelType: "llm",
* modelType: "llamacpp-completion",
* modelConfig: { ctx_size: 2048 }
* });
*
* // Local file path - relative path
* const relativeModelId = await loadModel({
* modelSrc: "./models/whisper-base.gguf",
* modelType: "whisper"
* modelType: "whispercpp-transcription"
* });
*
* // Hyperdrive URL with key and path
* const hyperdriveId = await loadModel({
* modelSrc: "pear://<hyperdrive-key>/llama-7b.gguf",
* modelType: "llm",
* modelType: "llamacpp-completion",
* modelConfig: { ctx_size: 2048 }
* });
*
* // Remote HTTP/HTTPS URL with progress tracking
* const remoteId = await loadModel({
* modelSrc: "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf",
* modelType: "llm",
* modelType: "llamacpp-completion",
* onProgress: (progress) => {
* console.log(`Downloaded: ${progress.percentage}%`);
* }
Expand All @@ -124,7 +127,7 @@ export function loadModel<S extends ModelDescriptor>(
* // Multimodal model with projection
* const multimodalId = await loadModel({
* modelSrc: "https://huggingface.co/.../main-model.gguf",
* modelType: "llm",
* modelType: "llamacpp-completion",
* modelConfig: {
* ctx_size: 512,
* projectionModelSrc: "https://huggingface.co/.../projection-model.gguf"
Expand All @@ -137,7 +140,7 @@ export function loadModel<S extends ModelDescriptor>(
* // Whisper with VAD model
* const whisperId = await loadModel({
* modelSrc: "https://huggingface.co/.../whisper-model.gguf",
* modelType: "whisper",
* modelType: "whispercpp-transcription",
* modelConfig: {
* mode: "caption",
* output_format: "plaintext",
Expand All @@ -153,7 +156,7 @@ export function loadModel<S extends ModelDescriptor>(
*
* const modelId = await loadModel({
* modelSrc: "/path/to/model.gguf",
* modelType: "llm",
* modelType: "llamacpp-completion",
* logger // Pass logger in options
* });
* ```
Expand Down
2 changes: 1 addition & 1 deletion packages/sdk/examples/diffusion-img2vid.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ try {
console.log("Loading Wan 2.1 I2V model (diffusion + UMT5-XXL + VAE + CLIP vision)...");
const modelId = await loadModel({
modelSrc: diffusionModelSrc,
modelType: "diffusion",
modelType: "sdcpp-generation",
modelConfig: {
mode: "video",
device: "gpu",
Expand Down
9 changes: 4 additions & 5 deletions packages/sdk/schemas/model-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,11 @@ const canonicalValuesSet = new Set<string>(Object.values(ModelType));
*
* @example
* ```typescript
* // Using alias (backward compatible)
* loadModel({ modelSrc: "...", modelType: MODEL_TYPES.nmt });
* // MODEL_TYPES.nmt resolves to "nmtcpp-translation"
*
* // Using canonical name directly
* // Preferred: canonical name
* loadModel({ modelSrc: "...", modelType: MODEL_TYPES.nmtcppTranslation });
*
* // Deprecated: alias (still resolves to "nmtcpp-translation")
* loadModel({ modelSrc: "...", modelType: MODEL_TYPES.nmt });
* ```
*/
export const PUBLIC_MODEL_TYPES = {
Expand Down
2 changes: 1 addition & 1 deletion packages/sdk/test/bare/tts-resolve-config.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ test(
await ttsPlugin.resolveConfig!(legacyConfig, {
resolveModelPath: async () => "",
modelSrc: "s3:///legacy/model",
modelType: "tts",
modelType: "tts-ggml",
});
t.ok(false, "expected LegacyTtsModelDeprecatedError");
} catch (err) {
Expand Down
2 changes: 1 addition & 1 deletion packages/sdk/test/mocks/pr-body-bc-valid.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@ const model = await loadModel("model-path");
**AFTER:**

```typescript
const modelId = await loadModel("model-path", { modelType: "llm" });
const modelId = await loadModel("model-path", { modelType: "llamacpp-completion" });
```

2 changes: 1 addition & 1 deletion packages/sdk/test/unit/bci-schemas.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ test("loadModelOptionsToRequestSchema: resolves the 'bci' alias to the canonical

test("loadModelOptionsToRequestSchema: rejects unknown modelConfig keys for BCI (strict)", (t) => {
const result = loadModelOptionsToRequestSchema.safeParse({
modelType: "bci",
modelType: "bci-whispercpp-transcription",
modelSrc: "ggml-bci-windowed.bin",
modelConfig: { notABciField: true },
});
Expand Down
4 changes: 2 additions & 2 deletions packages/sdk/test/unit/classification-schemas.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -202,15 +202,15 @@ test("loadModelOptionsBaseSchema: accepts classification alias", (t) => {
test("loadModelOptionsBaseSchema: accepts classification with custom modelSrc", (t) => {
const result = loadModelOptionsBaseSchema.safeParse({
modelSrc: "/abs/path/to/my-classifier.gguf",
modelType: "classification",
modelType: "ggml-classification",
modelConfig: { topK: 3 },
});
t.is(result.success, true);
});

test("loadModelOptionsBaseSchema: rejects classification config with unknown key (strict)", (t) => {
const result = loadModelOptionsBaseSchema.safeParse({
modelType: "classification",
modelType: "ggml-classification",
modelConfig: { topK: 3, unknownKey: true },
});
t.is(result.success, false);
Expand Down
6 changes: 3 additions & 3 deletions packages/sdk/test/unit/inference-handler-migrations.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ test("translateRequestSchema (NMT): accepts an optional requestId", (t) => {
modelId: "m1",
text: "hello",
stream: true,
modelType: "nmt",
modelType: "nmtcpp-translation",
requestId: "req-nmt",
});
t.is(result.success, true);
Expand All @@ -66,7 +66,7 @@ test("translateRequestSchema (LLM): accepts an optional requestId", (t) => {
modelId: "m1",
text: "hello",
stream: true,
modelType: "llm",
modelType: "llamacpp-completion",
from: "en",
to: "fr",
requestId: "req-llm",
Expand All @@ -80,7 +80,7 @@ test("translateRequestSchema: rejects empty-string requestId", (t) => {
modelId: "m1",
text: "hello",
stream: true,
modelType: "nmt",
modelType: "nmtcpp-translation",
requestId: "",
});
t.is(result.success, false);
Expand Down
10 changes: 5 additions & 5 deletions packages/sdk/test/unit/profiler-operation-transport.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ test("operation metrics: loadModel extracts gauges and tags", (t) => {
"profile-1",
100,
500,
{ modelType: "llm" },
{ modelType: "llamacpp-completion" },
{
__profilingMeta: {
sourceType: "registry",
Expand All @@ -37,7 +37,7 @@ test("operation metrics: loadModel extracts gauges and tags", (t) => {
);

t.ok(event, "event is built");
t.alike(event!.tags, { modelType: "llm", sourceType: "registry" });
t.alike(event!.tags, { modelType: "llamacpp-completion", sourceType: "registry" });
t.is(event!.gauges?.downloadTime, 220);
t.is(event!.gauges?.totalBytesDownloaded, 4096);
t.is(event!.gauges?.downloadSpeedBps, 18618);
Expand All @@ -51,7 +51,7 @@ test("operation metrics: omits unavailable gauges (no fabrication)", (t) => {
"profile-2",
100,
90,
{ modelType: "llm" },
{ modelType: "llamacpp-completion" },
{
__profilingMeta: {
sourceType: "filesystem",
Expand Down Expand Up @@ -83,7 +83,7 @@ test("transport: operation event survives injection/extraction round-trip", (t)
ms: 500,
profileId: "round-trip-test",
gauges: { totalLoadTime: 500, downloadTime: 200 },
tags: { modelType: "llm", sourceType: "registry", cacheHit: "true" },
tags: { modelType: "llamacpp-completion", sourceType: "registry", cacheHit: "true" },
};

const baseJson = '{"type":"loadModel","success":true}';
Expand All @@ -99,7 +99,7 @@ test("transport: operation event survives injection/extraction round-trip", (t)
t.is(extracted!.operation!.profileId, "round-trip-test");
t.alike(extracted!.operation!.gauges, { totalLoadTime: 500, downloadTime: 200 });
t.alike(extracted!.operation!.tags, {
modelType: "llm",
modelType: "llamacpp-completion",
sourceType: "registry",
cacheHit: "true",
});
Expand Down
4 changes: 2 additions & 2 deletions packages/sdk/test/unit/sdcpp-plugin.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -839,7 +839,7 @@ test("loadModelOptionsBaseSchema: accepts diffusion with alias", (t) => {
test("loadModelOptionsBaseSchema: rejects diffusion with unknown config key (strict)", (t) => {
const result = loadModelOptionsBaseSchema.safeParse({
modelSrc: "model.safetensors",
modelType: "diffusion",
modelType: "sdcpp-generation",
modelConfig: { device: "gpu", notAField: true },
});
t.is(result.success, false);
Expand All @@ -848,7 +848,7 @@ test("loadModelOptionsBaseSchema: rejects diffusion with unknown config key (str
test("loadModelOptionsBaseSchema: accepts diffusion with mode: 'upscale' (standalone ESRGAN)", (t) => {
const result = loadModelOptionsBaseSchema.safeParse({
modelSrc: "RealESRGAN_x4plus_anime_6B.pth",
modelType: "diffusion",
modelType: "sdcpp-generation",
modelConfig: {
mode: "upscale",
upscaler: { tile_size: 128 },
Expand Down
2 changes: 1 addition & 1 deletion packages/sdk/test/unit/vla-schemas.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ test("loadModelOptionsBaseSchema: accepts vla alias", (t) => {
test("loadModelOptionsBaseSchema: rejects vla config with unknown key (strict)", (t) => {
const result = loadModelOptionsBaseSchema.safeParse({
modelSrc: "smolvla.gguf",
modelType: "vla",
modelType: "ggml-vla",
modelConfig: { backend: "cpu", unknownKey: true },
});
t.is(result.success, false);
Expand Down
Loading