diff --git a/packages/lms-client/src/llm/.test-snapshots/LLM.heavy.test.ts.snap b/packages/lms-client/src/llm/.test-snapshots/LLM.heavy.test.ts.snap
index 3119f543..5cd42950 100644
--- a/packages/lms-client/src/llm/.test-snapshots/LLM.heavy.test.ts.snap
+++ b/packages/lms-client/src/llm/.test-snapshots/LLM.heavy.test.ts.snap
@@ -1,6 +1,6 @@
 // Jest Snapshot v1, https://goo.gl/fbAQLP
 
-exports[`LLM Can tokenize correctly 1`] = `
+exports[`LLM with default model fixture Can tokenize correctly 1`] = `
 [
   1143,
   64866,
@@ -11,7 +11,7 @@ exports[`LLM Can tokenize correctly 1`] = `
 ]
 `;
 
-exports[`LLM Can tokenize multiple strings correctly 1`] = `
+exports[`LLM with default model fixture Can tokenize multiple strings correctly 1`] = `
 [
   [
     34,
@@ -34,7 +34,7 @@ exports[`LLM Can tokenize multiple strings correctly 1`] = `
 ]
 `;
 
-exports[`LLM can apply prompt template to a regular chat 1`] = `
+exports[`LLM with default model fixture can apply prompt template to a regular chat 1`] = `
 "<|im_start|>system
 This is the system prompt.<|im_end|>
 <|im_start|>user
@@ -47,24 +47,29 @@ User message 2<|im_end|>
 "
 `;
 
-exports[`LLM can get model info 1`] = `
+exports[`LLM with default model fixture can get model info 1`] = `
 {
   "architecture": "qwen2",
   "contextLength": 4096,
+  "deviceIdentifier": null,
   "displayName": "Qwen2.5 0.5B Instruct",
   "format": "gguf",
   "identifier": Any<String>,
+  "indexedModelIdentifier": "lmstudio-community/Qwen2.5-0.5B-Instruct-GGUF/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf",
   "instanceReference": Any<String>,
+  "lastUsedTime": 1773374807159,
   "maxContextLength": 32768,
   "modelKey": Any<String>,
   "paramsString": "0.5B",
   "path": "lmstudio-community/Qwen2.5-0.5B-Instruct-GGUF/Qwen2.5-0.5B-Instruct-Q4_K_M.gguf",
+  "publisher": "lmstudio-community",
   "quantization": {
     "bits": 4,
     "name": "Q4_K_M",
   },
   "sizeBytes": 397807936,
   "trainedForToolUse": true,
+  "ttlMs": 3600000,
   "type": "llm",
   "vision": false,
 }
diff --git a/packages/lms-client/src/llm/LLM.heavy.test.ts b/packages/lms-client/src/llm/LLM.heavy.test.ts
index a6e4a11c..d669bf70 100644
--- a/packages/lms-client/src/llm/LLM.heavy.test.ts
+++ b/packages/lms-client/src/llm/LLM.heavy.test.ts
@@ -3,64 +3,142 @@ import { ensureHeavyTestsEnvironment, llmTestingQwen05B } from "../shared.heavy.
 
 describe("LLM", () => {
   let client: LMStudioClient;
-  let model: LLM;
   const chat = Chat.from([
     { role: "system", content: "This is the system prompt." },
     { role: "user", content: "User message 1" },
     { role: "assistant", content: "Assistant message 1" },
     { role: "user", content: "User message 2" },
   ]);
+  const defaultLoadConfig = {
+    llamaKCacheQuantizationType: "f32" as const,
+    llamaVCacheQuantizationType: "f32" as const,
+  };
   beforeAll(async () => {
     client = new LMStudioClient();
     await ensureHeavyTestsEnvironment(client);
   });
-  beforeEach(async () => {
-    model = await client.llm.model(llmTestingQwen05B, {
-      verbose: false,
-      config: {
-        llamaKCacheQuantizationType: "f32",
-        llamaVCacheQuantizationType: "f32",
-      },
+  describe("with default model fixture", () => {
+    let model: LLM;
+
+    beforeEach(async () => {
+      model = await client.llm.model(llmTestingQwen05B, {
+        verbose: false,
+        config: defaultLoadConfig,
+      });
+    }, 60_000);
+    it("can apply prompt template to a regular chat", async () => {
+      const formatted = await model.applyPromptTemplate(chat);
+      expect(formatted).toMatchSnapshot();
     });
-  }, 60_000);
-  it("can apply prompt template to a regular chat", async () => {
-    const formatted = await model.applyPromptTemplate(chat);
-    expect(formatted).toMatchSnapshot();
-  });
-  it("can get model context length", async () => {
-    const contextLength = await model.getContextLength();
-    expect(contextLength).toMatchInlineSnapshot(`4096`);
-  });
-  it("can get model info", async () => {
-    const modelInfo = await model.getModelInfo();
-    expect(modelInfo).toMatchSnapshot({
-      identifier: expect.any(String),
-      instanceReference: expect.any(String),
-      modelKey: expect.any(String),
+    it("can get model context length", async () => {
+      const contextLength = await model.getContextLength();
+      expect(contextLength).toMatchInlineSnapshot(`4096`);
+    });
+    it("can get model info", async () => {
+      const modelInfo = await model.getModelInfo();
+      expect(modelInfo).toMatchSnapshot({
+        identifier: expect.any(String),
+        instanceReference: expect.any(String),
+        modelKey: expect.any(String),
+      });
+    });
+    it("Can tokenize correctly", async () => {
+      const tokens = await model.tokenize("Chaos is a ladder.");
+      expect(tokens).toMatchSnapshot();
+    });
+    it("Can tokenize multiple strings correctly", async () => {
+      const tokens = await model.tokenize([
+        "Cersei understands the consequences of her absence",
+        "and she is absent anyway",
+      ]);
+      expect(tokens).toMatchSnapshot();
+    });
+    it("Can count tokens correctly", async () => {
+      const count = await model.countTokens("Chaos is a ladder.");
+      expect(count).toMatchInlineSnapshot(`6`);
+    });
+    it("Has correct properties", async () => {
+      expect(model.displayName).toMatchInlineSnapshot(`"Qwen2.5 0.5B Instruct"`);
+      expect(model.format).toMatchInlineSnapshot(`"gguf"`);
+      expect(model.identifier).toEqual(llmTestingQwen05B);
+      expect(model.path).toEqual(llmTestingQwen05B);
+      expect(model.sizeBytes).toMatchInlineSnapshot(`397807936`);
+      expect(model.trainedForToolUse).toMatchInlineSnapshot(`true`);
+      expect(model.vision).toMatchInlineSnapshot(`false`);
     });
   });
-  it("Can tokenize correctly", async () => {
-    const tokens = await model.tokenize("Chaos is a ladder.");
-    expect(tokens).toMatchSnapshot();
-  });
-  it("Can tokenize multiple strings correctly", async () => {
-    const tokens = await model.tokenize([
-      "Cersei understands the consequences of her absence",
-      "and she is absent anyway",
-    ]);
-    expect(tokens).toMatchSnapshot();
-  });
-  it("Can count tokens correctly", async () => {
-    const count = await model.countTokens("Chaos is a ladder.");
-    expect(count).toMatchInlineSnapshot(`6`);
-  });
-  it("Has correct properties", async () => {
-    expect(model.displayName).toMatchInlineSnapshot(`"Qwen2.5 0.5B Instruct"`);
-    expect(model.format).toMatchInlineSnapshot(`"gguf"`);
-    expect(model.identifier).toEqual(llmTestingQwen05B);
-    expect(model.path).toEqual(llmTestingQwen05B);
-    expect(model.sizeBytes).toMatchInlineSnapshot(`397807936`);
-    expect(model.trainedForToolUse).toMatchInlineSnapshot(`true`);
-    expect(model.vision).toMatchInlineSnapshot(`false`);
+
+  describe("load config round-trips", () => {
+    it("preserves fit=true through getLoadConfig() to load() round-trip", async () => {
+      let firstModel: LLM | undefined;
+      let roundTripModel: LLM | undefined;
+      try {
+        firstModel = await client.llm.model(llmTestingQwen05B, {
+          verbose: false,
+          config: defaultLoadConfig,
+        });
+
+        const firstLoadConfig = await firstModel.getLoadConfig();
+
+        expect(firstLoadConfig.fit).toBe(true);
+        expect(firstLoadConfig.gpu?.splitStrategy).toBe("evenly");
+
+        roundTripModel = await client.llm.load(llmTestingQwen05B, {
+          identifier: `fit-roundtrip-${Date.now()}-${Math.round(Math.random() * 1_000_000)}`,
+          verbose: false,
+          config: firstLoadConfig,
+        });
+
+        const secondLoadConfig = await roundTripModel.getLoadConfig();
+
+        expect(secondLoadConfig.fit).toBe(true);
+        expect(secondLoadConfig.gpu?.splitStrategy).toBe("evenly");
+      } finally {
+        if (roundTripModel !== undefined) {
+          await roundTripModel.unload();
+        }
+        if (firstModel !== undefined) {
+          await firstModel.unload();
+        }
+      }
+    }, 60_000);
+    it("preserves fit=false through getLoadConfig() to load() round-trip", async () => {
+      let firstManualModel: LLM | undefined;
+      let secondManualModel: LLM | undefined;
+      try {
+        firstManualModel = await client.llm.load(llmTestingQwen05B, {
+          identifier: `fit-disabled-${Date.now()}-${Math.round(Math.random() * 1_000_000)}`,
+          verbose: false,
+          config: {
+            fit: false,
+            gpu: { ratio: "off" },
+            ...defaultLoadConfig,
+          },
+        });
+
+        const firstLoadConfig = await firstManualModel.getLoadConfig();
+
+        expect(firstLoadConfig.fit).toBe(false);
+        expect(firstLoadConfig.gpu?.ratio).toBe("off");
+
+        secondManualModel = await client.llm.load(llmTestingQwen05B, {
+          identifier: `fit-disabled-roundtrip-${Date.now()}-${Math.round(Math.random() * 1_000_000)}`,
+          verbose: false,
+          config: firstLoadConfig,
+        });
+
+        const secondLoadConfig = await secondManualModel.getLoadConfig();
+
+        expect(secondLoadConfig.fit).toBe(false);
+        expect(secondLoadConfig.gpu?.ratio).toBe("off");
+      } finally {
+        if (secondManualModel !== undefined) {
+          await secondManualModel.unload();
+        }
+        if (firstManualModel !== undefined) {
+          await firstManualModel.unload();
+        }
+      }
+    }, 60_000);
   });
 });
diff --git a/packages/lms-kv-config/src/conversion/llmLoadModelConfig.test.ts b/packages/lms-kv-config/src/conversion/llmLoadModelConfig.test.ts
new file mode 100644
index 00000000..9f4799fb
--- /dev/null
+++ b/packages/lms-kv-config/src/conversion/llmLoadModelConfig.test.ts
@@ -0,0 +1,100 @@
+import { type LLMLoadModelConfig } from "@lmstudio/lms-shared-types";
+import { kvConfigField, makeKVConfigFromFields } from "../KVConfig.js";
+import { llmLlamaMoeLoadConfigSchematics } from "../schema.js";
+import {
+  kvConfigToLLMLoadModelConfig,
+  llmLoadModelConfigToKVConfig,
+} from "./llmLoadModelConfig.js";
+
+/**
+ * Helper: convert an LLMLoadModelConfig to KVConfig, then parse back the `llama.fit`
+ * field. Returns `undefined` when the field is absent from the produced KVConfig.
+ */
+function fitFieldAfterConversion(config: LLMLoadModelConfig): boolean | undefined {
+  const kvConfig = llmLoadModelConfigToKVConfig(config);
+  const parsed = llmLlamaMoeLoadConfigSchematics.parsePartial(kvConfig);
+  return parsed.get("llama.fit");
+}
+
+describe("llmLoadModelConfigToKVConfig — fit inference", () => {
+  it("preserves explicit fit: true", () => {
+    expect(fitFieldAfterConversion({ fit: true })).toBe(true);
+  });
+
+  it("preserves explicit fit: false", () => {
+    expect(fitFieldAfterConversion({ fit: false })).toBe(false);
+  });
+
+  it("infers fit=false when ratio is set without fit", () => {
+    expect(fitFieldAfterConversion({ gpu: { ratio: 0.5 } })).toBe(false);
+  });
+
+  it("infers fit=false when numCpuExpertLayersRatio is set without fit", () => {
+    expect(fitFieldAfterConversion({ gpu: { numCpuExpertLayersRatio: 0.5 } })).toBe(false);
+  });
+
+  it("infers fit=false when mainGpu is set without fit (including mainGpu: 0)", () => {
+    expect(fitFieldAfterConversion({ gpu: { mainGpu: 0 } })).toBe(false);
+    expect(fitFieldAfterConversion({ gpu: { mainGpu: 1 } })).toBe(false);
+  });
+
+  it("infers fit=false when splitStrategy is set without fit", () => {
+    expect(fitFieldAfterConversion({ gpu: { splitStrategy: "evenly" } })).toBe(false);
+  });
+
+  it("does NOT infer fit=false when only disabledGpus is set", () => {
+    expect(fitFieldAfterConversion({ gpu: { disabledGpus: [1] } })).toBeUndefined();
+  });
+
+  it("does NOT infer fit when no GPU config is provided", () => {
+    expect(fitFieldAfterConversion({})).toBeUndefined();
+  });
+
+  it("explicit fit: true wins even when ratio is also set", () => {
+    expect(fitFieldAfterConversion({ fit: true, gpu: { ratio: 0.5 } })).toBe(true);
+  });
+
+  it("explicit fit: false is preserved even with no other GPU params", () => {
+    expect(fitFieldAfterConversion({ fit: false })).toBe(false);
+  });
+});
+
+describe("kvConfigToLLMLoadModelConfig — fit field read-back", () => {
+  // KVConfig field keys use the full global path (llm.load.* prefix) because
+  // the schematics preserve the original fullKey even after scoping.
+  it("reads fit=true from KVConfig", () => {
+    const kvConfig = makeKVConfigFromFields([kvConfigField("llm.load.llama.fit", true)]);
+    const result = kvConfigToLLMLoadModelConfig(kvConfig);
+    expect(result.fit).toBe(true);
+  });
+
+  it("reads fit=false from KVConfig", () => {
+    const kvConfig = makeKVConfigFromFields([kvConfigField("llm.load.llama.fit", false)]);
+    const result = kvConfigToLLMLoadModelConfig(kvConfig);
+    expect(result.fit).toBe(false);
+  });
+
+  it("fit is undefined when absent from KVConfig", () => {
+    const kvConfig = makeKVConfigFromFields([]);
+    const result = kvConfigToLLMLoadModelConfig(kvConfig);
+    expect(result.fit).toBeUndefined();
+  });
+});
+
+describe("round-trip", () => {
+  it("preserves fit and ratio through config → KVConfig → config", () => {
+    const original: LLMLoadModelConfig = { fit: true, gpu: { ratio: 0.5 } };
+    const kvConfig = llmLoadModelConfigToKVConfig(original);
+    const result = kvConfigToLLMLoadModelConfig(kvConfig);
+    expect(result.fit).toBe(true);
+    expect(result.gpu?.ratio).toBe(0.5);
+  });
+
+  it("preserves inferred fit=false through round-trip when ratio is set", () => {
+    const original: LLMLoadModelConfig = { gpu: { ratio: 0.75 } };
+    const kvConfig = llmLoadModelConfigToKVConfig(original);
+    const result = kvConfigToLLMLoadModelConfig(kvConfig);
+    expect(result.fit).toBe(false);
+    expect(result.gpu?.ratio).toBe(0.75);
+  });
+});
diff --git a/packages/lms-kv-config/src/conversion/llmLoadModelConfig.ts b/packages/lms-kv-config/src/conversion/llmLoadModelConfig.ts
index a1c0eab3..4136c749 100644
--- a/packages/lms-kv-config/src/conversion/llmLoadModelConfig.ts
+++ b/packages/lms-kv-config/src/conversion/llmLoadModelConfig.ts
@@ -22,6 +22,20 @@ interface KvConfigToLLMLoadModelConfigOpts {
   modelFormat?: ModelCompatibilityType;
 }
 
+function resolveLlamaFit(config: Pick<LLMLoadModelConfig, "fit" | "gpu">): boolean | undefined {
+  // If the caller explicitly set any GPU param that fit mode would ignore (ratio, MoE expert
+  // offload, mainGpu, splitStrategy) but didn't set root-level fit, infer fit=false so the
+  // modelDefault layer's fit=true doesn't silently override their intent. disabledGpus is excluded
+  // because fit still respects it.
+  const hasGpuParamIgnoredByFit =
+    config.gpu?.ratio !== undefined
+    || config.gpu?.numCpuExpertLayersRatio !== undefined
+    || config.gpu?.mainGpu !== undefined
+    || config.gpu?.splitStrategy !== undefined;
+
+  return config.fit ?? (hasGpuParamIgnoredByFit ? false : undefined);
+}
+
 function kvConfigToLLMLlamaLoadModelConfig(
   config: KVConfig,
   { useDefaultsForMissingKeys }: Omit<KvConfigToLLMLoadModelConfigOpts, "modelFormat"> = {},
@@ -51,6 +65,11 @@ function kvConfigToLLMLlamaLoadModelConfig(
     result.gpuStrictVramCap = gpuStrictVramCap;
   }
 
+  const llamaFit = parsed.get("llama.fit");
+  if (llamaFit !== undefined) {
+    result.fit = llamaFit;
+  }
+
   const llamaAccelerationOffloadRatio = parsed.get("llama.acceleration.offloadRatio");
   if (llamaAccelerationOffloadRatio !== undefined) {
     gpuFields = {
@@ -213,6 +232,7 @@ export function llmLoadModelConfigToKVConfig(config: LLMLoadModelConfig): KVConf
   const top = llmLoadSchematics.buildPartialConfig({
     "gpuSplitConfig": convertGPUSettingToGPUSplitConfig(config.gpu),
     "gpuStrictVramCap": config.gpuStrictVramCap,
+    "llama.fit": resolveLlamaFit(config),
     "llama.acceleration.offloadRatio": config.gpu?.ratio,
     "numCpuExpertLayersRatio": config.gpu?.numCpuExpertLayersRatio,
     "numParallelSessions": config.maxParallelPredictions,
diff --git a/packages/lms-kv-config/src/schema.ts b/packages/lms-kv-config/src/schema.ts
index dc2c28d3..2fa0d828 100644
--- a/packages/lms-kv-config/src/schema.ts
+++ b/packages/lms-kv-config/src/schema.ts
@@ -267,6 +267,7 @@ export const globalConfigSchematics = new KVConfigSchematicsBuilder(kvValueTypes
       .field("useUnifiedKvCache", "boolean", { isExperimental: true }, true)
       .scope("llama", builder =>
         builder
+          .field("fit", "boolean", { machineDependent: true }, false)
           .scope("acceleration", builder =>
             builder.field(
               "offloadRatio",
diff --git a/packages/lms-shared-types/src/GPUSplitStrategy.ts b/packages/lms-shared-types/src/GPUSplitStrategy.ts
index fed9f909..e11f5a31 100644
--- a/packages/lms-shared-types/src/GPUSplitStrategy.ts
+++ b/packages/lms-shared-types/src/GPUSplitStrategy.ts
@@ -62,7 +62,7 @@ export function convertGPUSettingToGPUSplitConfig(
         ? "priorityOrder"
         : gpuSetting?.splitStrategy ?? "evenly",
     disabledGpus: gpuSetting?.disabledGpus ?? [],
-    priority: gpuSetting?.mainGpu ? [gpuSetting.mainGpu] : [],
+    priority: gpuSetting?.mainGpu !== undefined ? [gpuSetting.mainGpu] : [],
     customRatio: [],
   };
 }
diff --git a/packages/lms-shared-types/src/llm/LLMLoadModelConfig.ts b/packages/lms-shared-types/src/llm/LLMLoadModelConfig.ts
index 1661dbf3..ccde7a88 100644
--- a/packages/lms-shared-types/src/llm/LLMLoadModelConfig.ts
+++ b/packages/lms-shared-types/src/llm/LLMLoadModelConfig.ts
@@ -38,6 +38,8 @@ export type GPUSetting = {
    * A number between 0 to 1 representing the ratio of the work should be distributed to the GPU,
    * where 0 means no work is distributed and 1 means all work is distributed. Can also specify the
    * string "off" to mean 0 and the string "max" to mean 1.
+   *
+   * When the root-level `fit` setting is true, this field is ignored.
    */
   ratio?: LLMLlamaAccelerationOffloadRatio;
   /**
@@ -45,14 +47,20 @@ export type GPUSetting = {
    * forced into CPU memory, where 1 means all expert layers will be in CPU memory regardless of
    * GPU offload configuration and 0 means the expert offload will be determined by GPU offload.
    * Can also specify the string "off" to mean 0 and the string "max" to mean 1.
+   *
+   * When the root-level `fit` setting is true, this field is ignored.
    */
   numCpuExpertLayersRatio?: LLMLlamaAccelerationOffloadRatio;
   /**
    * The index of the GPU to use as the main GPU.
+   *
+   * When the root-level `fit` setting is true, this field is ignored.
    */
   mainGpu?: number;
   /**
    * How to split computation across multiple GPUs.
+   *
+   * When the root-level `fit` setting is true, this field is ignored.
    */
   splitStrategy?: LLMSplitStrategy;
   /**
@@ -141,6 +149,15 @@ export const llmMlxKvCacheQuantizationSchema = z.object({
 
 /** @public */
 export interface LLMLoadModelConfig {
+  /**
+   * When true, enables a fit algorithm that determines optimal layer placement across available
+   * GPUs automatically.
+   *
+   * When `fit` is true, manual GPU placement settings under `gpu` are ignored, except
+   * `disabledGpus`.
+   */
+  fit?: boolean;
+
   /**
    * How to distribute the work to your GPUs. See {@link GPUSetting} for more information.
    *
@@ -333,6 +350,7 @@ export interface LLMLoadModelConfig {
   mlxKvCacheQuantization?: LLMMlxKvCacheQuantization | false;
 }
 export const llmLoadModelConfigSchema = z.object({
+  fit: z.boolean().optional(),
   gpu: gpuSettingSchema.optional(),
   maxParallelPredictions: z.number().int().min(1).optional(),
   useUnifiedKvCache: z.boolean().optional(),