Skip to content
Open
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions packages/lms-client/src/llm/.test-snapshots/LLM.heavy.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,78 @@ exports[`LLM can get model info 1`] = `
"vision": false,
}
`;

exports[`LLM with default model fixture Can tokenize correctly 1`] = `
[
1143,
64866,
374,
264,
35765,
13,
]
`;

exports[`LLM with default model fixture Can tokenize multiple strings correctly 1`] = `
[
[
34,
261,
72768,
30769,
279,
15917,
315,
1059,
19265,
],
[
437,
1340,
374,
27211,
13657,
],
]
`;

exports[`LLM with default model fixture can apply prompt template to a regular chat 1`] = `
"<|im_start|>system
This is the system prompt.<|im_end|>
<|im_start|>user
User message 1<|im_end|>
<|im_start|>assistant
Assistant message 1<|im_end|>
<|im_start|>user
User message 2<|im_end|>
<|im_start|>assistant
"
`;

exports[`LLM with default model fixture can get model info 1`] = `
{
"architecture": "qwen2",
"contextLength": 4096,
"deviceIdentifier": null,
"displayName": "Qwen2.5 0.5B Instruct",
"format": "gguf",
"identifier": Any<String>,
"indexedModelIdentifier": "lmstudio-community/Qwen2.5-0.5B-Instruct-GGUF/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf",
"instanceReference": Any<String>,
"lastUsedTime": 1773374807159,
"maxContextLength": 32768,
"modelKey": Any<String>,
"paramsString": "0.5B",
"path": "lmstudio-community/Qwen2.5-0.5B-Instruct-GGUF/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf",
"publisher": "lmstudio-community",
"quantization": {
"bits": 4,
"name": "Q4_K_M",
},
"sizeBytes": 397807936,
"trainedForToolUse": true,
"ttlMs": 3600000,
"type": "llm",
"vision": false,
}
`;
170 changes: 124 additions & 46 deletions packages/lms-client/src/llm/LLM.heavy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,64 +3,142 @@ import { ensureHeavyTestsEnvironment, llmTestingQwen05B } from "../shared.heavy.

describe("LLM", () => {
let client: LMStudioClient;
let model: LLM;
const chat = Chat.from([
{ role: "system", content: "This is the system prompt." },
{ role: "user", content: "User message 1" },
{ role: "assistant", content: "Assistant message 1" },
{ role: "user", content: "User message 2" },
]);
const defaultLoadConfig = {
llamaKCacheQuantizationType: "f32" as const,
llamaVCacheQuantizationType: "f32" as const,
};
beforeAll(async () => {
client = new LMStudioClient();
await ensureHeavyTestsEnvironment(client);
});
beforeEach(async () => {
model = await client.llm.model(llmTestingQwen05B, {
verbose: false,
config: {
llamaKCacheQuantizationType: "f32",
llamaVCacheQuantizationType: "f32",
},
describe("with default model fixture", () => {
let model: LLM;

beforeEach(async () => {
model = await client.llm.model(llmTestingQwen05B, {
verbose: false,
config: defaultLoadConfig,
});
}, 60_000);
it("can apply prompt template to a regular chat", async () => {
const formatted = await model.applyPromptTemplate(chat);
expect(formatted).toMatchSnapshot();
});
}, 60_000);
it("can apply prompt template to a regular chat", async () => {
const formatted = await model.applyPromptTemplate(chat);
expect(formatted).toMatchSnapshot();
});
it("can get model context length", async () => {
const contextLength = await model.getContextLength();
expect(contextLength).toMatchInlineSnapshot(`4096`);
});
it("can get model info", async () => {
const modelInfo = await model.getModelInfo();
expect(modelInfo).toMatchSnapshot({
identifier: expect.any(String),
instanceReference: expect.any(String),
modelKey: expect.any(String),
it("can get model context length", async () => {
const contextLength = await model.getContextLength();
expect(contextLength).toMatchInlineSnapshot(`4096`);
});
it("can get model info", async () => {
const modelInfo = await model.getModelInfo();
expect(modelInfo).toMatchSnapshot({
identifier: expect.any(String),
instanceReference: expect.any(String),
modelKey: expect.any(String),
});
});
it("Can tokenize correctly", async () => {
const tokens = await model.tokenize("Chaos is a ladder.");
expect(tokens).toMatchSnapshot();
});
it("Can tokenize multiple strings correctly", async () => {
const tokens = await model.tokenize([
"Cersei understands the consequences of her absence",
"and she is absent anyway",
]);
expect(tokens).toMatchSnapshot();
});
it("Can count tokens correctly", async () => {
const count = await model.countTokens("Chaos is a ladder.");
expect(count).toMatchInlineSnapshot(`6`);
});
it("Has correct properties", async () => {
expect(model.displayName).toMatchInlineSnapshot(`"Qwen2.5 0.5B Instruct"`);
expect(model.format).toMatchInlineSnapshot(`"gguf"`);
expect(model.identifier).toEqual(llmTestingQwen05B);
expect(model.path).toEqual(llmTestingQwen05B);
expect(model.sizeBytes).toMatchInlineSnapshot(`397807936`);
expect(model.trainedForToolUse).toMatchInlineSnapshot(`true`);
expect(model.vision).toMatchInlineSnapshot(`false`);
});
});
it("Can tokenize correctly", async () => {
const tokens = await model.tokenize("Chaos is a ladder.");
expect(tokens).toMatchSnapshot();
});
it("Can tokenize multiple strings correctly", async () => {
const tokens = await model.tokenize([
"Cersei understands the consequences of her absence",
"and she is absent anyway",
]);
expect(tokens).toMatchSnapshot();
});
it("Can count tokens correctly", async () => {
const count = await model.countTokens("Chaos is a ladder.");
expect(count).toMatchInlineSnapshot(`6`);
});
it("Has correct properties", async () => {
expect(model.displayName).toMatchInlineSnapshot(`"Qwen2.5 0.5B Instruct"`);
expect(model.format).toMatchInlineSnapshot(`"gguf"`);
expect(model.identifier).toEqual(llmTestingQwen05B);
expect(model.path).toEqual(llmTestingQwen05B);
expect(model.sizeBytes).toMatchInlineSnapshot(`397807936`);
expect(model.trainedForToolUse).toMatchInlineSnapshot(`true`);
expect(model.vision).toMatchInlineSnapshot(`false`);

describe("load config round-trips", () => {
it("preserves fit=true through getLoadConfig() to load() round-trip", async () => {
let firstModel: LLM | undefined;
let roundTripModel: LLM | undefined;
try {
firstModel = await client.llm.model(llmTestingQwen05B, {
verbose: false,
config: defaultLoadConfig,
});

const firstLoadConfig = await firstModel.getLoadConfig();

expect(firstLoadConfig.fit).toBe(true);
expect(firstLoadConfig.gpu?.splitStrategy).toBe("evenly");

roundTripModel = await client.llm.load(llmTestingQwen05B, {
identifier: `fit-roundtrip-${Date.now()}-${Math.round(Math.random() * 1_000_000)}`,
verbose: false,
config: firstLoadConfig,
});

const secondLoadConfig = await roundTripModel.getLoadConfig();

expect(secondLoadConfig.fit).toBe(true);
expect(secondLoadConfig.gpu?.splitStrategy).toBe("evenly");
} finally {
if (roundTripModel !== undefined) {
await roundTripModel.unload();
}
if (firstModel !== undefined) {
await firstModel.unload();
}
}
}, 60_000);
it("preserves fit=false through getLoadConfig() to load() round-trip", async () => {
let firstManualModel: LLM | undefined;
let secondManualModel: LLM | undefined;
try {
firstManualModel = await client.llm.load(llmTestingQwen05B, {
identifier: `fit-disabled-${Date.now()}-${Math.round(Math.random() * 1_000_000)}`,
verbose: false,
config: {
fit: false,
gpu: { ratio: "off" },
...defaultLoadConfig,
},
});

const firstLoadConfig = await firstManualModel.getLoadConfig();

expect(firstLoadConfig.fit).toBe(false);
expect(firstLoadConfig.gpu?.ratio).toBe("off");

secondManualModel = await client.llm.load(llmTestingQwen05B, {
identifier: `fit-disabled-roundtrip-${Date.now()}-${Math.round(Math.random() * 1_000_000)}`,
verbose: false,
config: firstLoadConfig,
});

const secondLoadConfig = await secondManualModel.getLoadConfig();

expect(secondLoadConfig.fit).toBe(false);
expect(secondLoadConfig.gpu?.ratio).toBe("off");
} finally {
if (secondManualModel !== undefined) {
await secondManualModel.unload();
}
if (firstManualModel !== undefined) {
await firstManualModel.unload();
}
}
}, 60_000);
});
});
100 changes: 100 additions & 0 deletions packages/lms-kv-config/src/conversion/llmLoadModelConfig.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import { type LLMLoadModelConfig } from "@lmstudio/lms-shared-types";
import { kvConfigField, makeKVConfigFromFields } from "../KVConfig.js";
import { llmLlamaMoeLoadConfigSchematics } from "../schema.js";
import {
kvConfigToLLMLoadModelConfig,
llmLoadModelConfigToKVConfig,
} from "./llmLoadModelConfig.js";

/**
* Helper: convert an LLMLoadModelConfig to KVConfig, then parse back the `llama.fit`
* field. Returns `undefined` when the field is absent from the produced KVConfig.
*/
function fitFieldAfterConversion(config: LLMLoadModelConfig): boolean | undefined {
const kvConfig = llmLoadModelConfigToKVConfig(config);
const parsed = llmLlamaMoeLoadConfigSchematics.parsePartial(kvConfig);
return parsed.get("llama.fit");
}

describe("llmLoadModelConfigToKVConfig — fit inference", () => {
it("preserves explicit fit: true", () => {
expect(fitFieldAfterConversion({ fit: true })).toBe(true);
});

it("preserves explicit fit: false", () => {
expect(fitFieldAfterConversion({ fit: false })).toBe(false);
});

it("infers fit=false when ratio is set without fit", () => {
expect(fitFieldAfterConversion({ gpu: { ratio: 0.5 } })).toBe(false);
});

it("infers fit=false when numCpuExpertLayersRatio is set without fit", () => {
expect(fitFieldAfterConversion({ gpu: { numCpuExpertLayersRatio: 0.5 } })).toBe(false);
});

it("infers fit=false when mainGpu is set without fit (including mainGpu: 0)", () => {
expect(fitFieldAfterConversion({ gpu: { mainGpu: 0 } })).toBe(false);
expect(fitFieldAfterConversion({ gpu: { mainGpu: 1 } })).toBe(false);
});

it("infers fit=false when splitStrategy is set without fit", () => {
expect(fitFieldAfterConversion({ gpu: { splitStrategy: "evenly" } })).toBe(false);
});

it("does NOT infer fit=false when only disabledGpus is set", () => {
expect(fitFieldAfterConversion({ gpu: { disabledGpus: [1] } })).toBeUndefined();
});

it("does NOT infer fit when no GPU config is provided", () => {
expect(fitFieldAfterConversion({})).toBeUndefined();
});

it("explicit fit: true wins even when ratio is also set", () => {
expect(fitFieldAfterConversion({ fit: true, gpu: { ratio: 0.5 } })).toBe(true);
});

it("explicit fit: false is preserved even with no other GPU params", () => {
expect(fitFieldAfterConversion({ fit: false })).toBe(false);
});
});

describe("kvConfigToLLMLoadModelConfig — fit field read-back", () => {
// KVConfig field keys use the full global path (llm.load.* prefix) because
// the schematics preserve the original fullKey even after scoping.
it("reads fit=true from KVConfig", () => {
const kvConfig = makeKVConfigFromFields([kvConfigField("llm.load.llama.fit", true)]);
const result = kvConfigToLLMLoadModelConfig(kvConfig);
expect(result.fit).toBe(true);
});

it("reads fit=false from KVConfig", () => {
const kvConfig = makeKVConfigFromFields([kvConfigField("llm.load.llama.fit", false)]);
const result = kvConfigToLLMLoadModelConfig(kvConfig);
expect(result.fit).toBe(false);
});

it("fit is undefined when absent from KVConfig", () => {
const kvConfig = makeKVConfigFromFields([]);
const result = kvConfigToLLMLoadModelConfig(kvConfig);
expect(result.fit).toBeUndefined();
});
});

describe("round-trip", () => {
it("preserves fit and ratio through config → KVConfig → config", () => {
const original: LLMLoadModelConfig = { fit: true, gpu: { ratio: 0.5 } };
const kvConfig = llmLoadModelConfigToKVConfig(original);
const result = kvConfigToLLMLoadModelConfig(kvConfig);
expect(result.fit).toBe(true);
expect(result.gpu?.ratio).toBe(0.5);
});

it("preserves inferred fit=false through round-trip when ratio is set", () => {
const original: LLMLoadModelConfig = { gpu: { ratio: 0.75 } };
const kvConfig = llmLoadModelConfigToKVConfig(original);
const result = kvConfigToLLMLoadModelConfig(kvConfig);
expect(result.fit).toBe(false);
expect(result.gpu?.ratio).toBe(0.75);
});
});
Loading
Loading