-
Notifications
You must be signed in to change notification settings - Fork 236
Add llama.acceleration.fit
#560
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
will-lms
wants to merge
23
commits into
main
Choose a base branch
from
will/llama-fit
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 21 commits
Commits
Show all changes
23 commits
Select commit
Hold shift + click to select a range
46e33ab
llama fit
will-lms 906f1f1
nest llama.acceleration.fitMinContext
will-lms 3d095af
remove fitMinContext
will-lms 73decb1
New LLMLlamaLayerRatio type
will-lms 26b24fe
Use llamaLayerRatio for embeddings offloadRatio
will-lms 424f25a
Merge remote-tracking branch 'origin/main' into will/llama-fit
will-lms 93d3f17
Separate `fit` boolean
will-lms c37fc39
[temp] lms-cli to my branch
will-lms 88e7441
schema level default false
will-lms 0c7ff32
remove "unknown"
will-lms 51c026f
Don't ignore explicit params
will-lms 5be33bf
stray
will-lms 2a25f1e
comments
will-lms 837e032
comment
will-lms 79dab19
improve check
will-lms 680b610
restore lms-cli to main
will-lms 5e639b1
add test
will-lms 94c8074
heavy test for round trip
will-lms a4a6d85
clean
will-lms 0df603d
move `fit` to LLMLoadModelConfig
will-lms 1222ccd
llama.fit
will-lms 45725f5
helper
will-lms b48e74f
snapshot
will-lms File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
100 changes: 100 additions & 0 deletions
100
packages/lms-kv-config/src/conversion/llmLoadModelConfig.test.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| import { type LLMLoadModelConfig } from "@lmstudio/lms-shared-types"; | ||
| import { kvConfigField, makeKVConfigFromFields } from "../KVConfig.js"; | ||
| import { llmLlamaMoeLoadConfigSchematics } from "../schema.js"; | ||
| import { | ||
| kvConfigToLLMLoadModelConfig, | ||
| llmLoadModelConfigToKVConfig, | ||
| } from "./llmLoadModelConfig.js"; | ||
|
|
||
| /** | ||
| * Helper: convert an LLMLoadModelConfig to KVConfig, then parse back the `llama.fit` | ||
| * field. Returns `undefined` when the field is absent from the produced KVConfig. | ||
| */ | ||
| function fitFieldAfterConversion(config: LLMLoadModelConfig): boolean | undefined { | ||
| const kvConfig = llmLoadModelConfigToKVConfig(config); | ||
| const parsed = llmLlamaMoeLoadConfigSchematics.parsePartial(kvConfig); | ||
| return parsed.get("llama.fit"); | ||
| } | ||
|
|
||
| describe("llmLoadModelConfigToKVConfig — fit inference", () => { | ||
| it("preserves explicit fit: true", () => { | ||
| expect(fitFieldAfterConversion({ fit: true })).toBe(true); | ||
| }); | ||
|
|
||
| it("preserves explicit fit: false", () => { | ||
| expect(fitFieldAfterConversion({ fit: false })).toBe(false); | ||
| }); | ||
|
|
||
| it("infers fit=false when ratio is set without fit", () => { | ||
| expect(fitFieldAfterConversion({ gpu: { ratio: 0.5 } })).toBe(false); | ||
| }); | ||
|
|
||
| it("infers fit=false when numCpuExpertLayersRatio is set without fit", () => { | ||
| expect(fitFieldAfterConversion({ gpu: { numCpuExpertLayersRatio: 0.5 } })).toBe(false); | ||
| }); | ||
|
|
||
| it("infers fit=false when mainGpu is set without fit (including mainGpu: 0)", () => { | ||
| expect(fitFieldAfterConversion({ gpu: { mainGpu: 0 } })).toBe(false); | ||
| expect(fitFieldAfterConversion({ gpu: { mainGpu: 1 } })).toBe(false); | ||
| }); | ||
|
|
||
| it("infers fit=false when splitStrategy is set without fit", () => { | ||
| expect(fitFieldAfterConversion({ gpu: { splitStrategy: "evenly" } })).toBe(false); | ||
| }); | ||
|
|
||
| it("does NOT infer fit=false when only disabledGpus is set", () => { | ||
| expect(fitFieldAfterConversion({ gpu: { disabledGpus: [1] } })).toBeUndefined(); | ||
| }); | ||
|
|
||
| it("does NOT infer fit when no GPU config is provided", () => { | ||
| expect(fitFieldAfterConversion({})).toBeUndefined(); | ||
| }); | ||
|
|
||
| it("explicit fit: true wins even when ratio is also set", () => { | ||
| expect(fitFieldAfterConversion({ fit: true, gpu: { ratio: 0.5 } })).toBe(true); | ||
| }); | ||
|
|
||
| it("explicit fit: false is preserved even with no other GPU params", () => { | ||
| expect(fitFieldAfterConversion({ fit: false })).toBe(false); | ||
| }); | ||
| }); | ||
|
|
||
| describe("kvConfigToLLMLoadModelConfig — fit field read-back", () => { | ||
| // KVConfig field keys use the full global path (llm.load.* prefix) because | ||
| // the schematics preserve the original fullKey even after scoping. | ||
| it("reads fit=true from KVConfig", () => { | ||
| const kvConfig = makeKVConfigFromFields([kvConfigField("llm.load.llama.fit", true)]); | ||
| const result = kvConfigToLLMLoadModelConfig(kvConfig); | ||
| expect(result.fit).toBe(true); | ||
| }); | ||
|
|
||
| it("reads fit=false from KVConfig", () => { | ||
| const kvConfig = makeKVConfigFromFields([kvConfigField("llm.load.llama.fit", false)]); | ||
| const result = kvConfigToLLMLoadModelConfig(kvConfig); | ||
| expect(result.fit).toBe(false); | ||
| }); | ||
|
|
||
| it("fit is undefined when absent from KVConfig", () => { | ||
| const kvConfig = makeKVConfigFromFields([]); | ||
| const result = kvConfigToLLMLoadModelConfig(kvConfig); | ||
| expect(result.fit).toBeUndefined(); | ||
| }); | ||
| }); | ||
|
|
||
| describe("round-trip", () => { | ||
| it("preserves fit and ratio through config → KVConfig → config", () => { | ||
| const original: LLMLoadModelConfig = { fit: true, gpu: { ratio: 0.5 } }; | ||
| const kvConfig = llmLoadModelConfigToKVConfig(original); | ||
| const result = kvConfigToLLMLoadModelConfig(kvConfig); | ||
| expect(result.fit).toBe(true); | ||
| expect(result.gpu?.ratio).toBe(0.5); | ||
| }); | ||
|
|
||
| it("preserves inferred fit=false through round-trip when ratio is set", () => { | ||
| const original: LLMLoadModelConfig = { gpu: { ratio: 0.75 } }; | ||
| const kvConfig = llmLoadModelConfigToKVConfig(original); | ||
| const result = kvConfigToLLMLoadModelConfig(kvConfig); | ||
| expect(result.fit).toBe(false); | ||
| expect(result.gpu?.ratio).toBe(0.75); | ||
| }); | ||
| }); |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.