lmstudio-js/packages/lms-client/src/llm/LLM.heavy.test.ts at 45725f5624abf2886978353f083f17cc95f3cc58 · lmstudio-ai/lmstudio-js · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import { Chat, type LLM, LMStudioClient } from "../index.js";
import { ensureHeavyTestsEnvironment, llmTestingQwen05B } from "../shared.heavy.test.js";

describe("LLM", () => {
  let client: LMStudioClient;
  const chat = Chat.from([
    { role: "system", content: "This is the system prompt." },
    { role: "user", content: "User message 1" },
    { role: "assistant", content: "Assistant message 1" },
    { role: "user", content: "User message 2" },
  ]);
  const defaultLoadConfig = {
    llamaKCacheQuantizationType: "f32" as const,
    llamaVCacheQuantizationType: "f32" as const,
  };
  beforeAll(async () => {
    client = new LMStudioClient();
    await ensureHeavyTestsEnvironment(client);
  });
  describe("with default model fixture", () => {
    let model: LLM;

    beforeEach(async () => {
      model = await client.llm.model(llmTestingQwen05B, {
        verbose: false,
        config: defaultLoadConfig,
      });
    }, 60_000);
    it("can apply prompt template to a regular chat", async () => {
      const formatted = await model.applyPromptTemplate(chat);
      expect(formatted).toMatchSnapshot();
    });
    it("can get model context length", async () => {
      const contextLength = await model.getContextLength();
      expect(contextLength).toMatchInlineSnapshot(`4096`);
    });
    it("can get model info", async () => {
      const modelInfo = await model.getModelInfo();
      expect(modelInfo).toMatchSnapshot({
        identifier: expect.any(String),
        instanceReference: expect.any(String),
        modelKey: expect.any(String),
      });
    });
    it("Can tokenize correctly", async () => {
      const tokens = await model.tokenize("Chaos is a ladder.");
      expect(tokens).toMatchSnapshot();
    });
    it("Can tokenize multiple strings correctly", async () => {
      const tokens = await model.tokenize([
        "Cersei understands the consequences of her absence",
        "and she is absent anyway",
      ]);
      expect(tokens).toMatchSnapshot();
    });
    it("Can count tokens correctly", async () => {
      const count = await model.countTokens("Chaos is a ladder.");
      expect(count).toMatchInlineSnapshot(`6`);
    });
    it("Has correct properties", async () => {
      expect(model.displayName).toMatchInlineSnapshot(`"Qwen2.5 0.5B Instruct"`);
      expect(model.format).toMatchInlineSnapshot(`"gguf"`);
      expect(model.identifier).toEqual(llmTestingQwen05B);
      expect(model.path).toEqual(llmTestingQwen05B);
      expect(model.sizeBytes).toMatchInlineSnapshot(`397807936`);
      expect(model.trainedForToolUse).toMatchInlineSnapshot(`true`);
      expect(model.vision).toMatchInlineSnapshot(`false`);
    });
  });

  describe("load config round-trips", () => {
    it("preserves fit=true through getLoadConfig() to load() round-trip", async () => {
      let firstModel: LLM | undefined;
      let roundTripModel: LLM | undefined;
      try {
        firstModel = await client.llm.model(llmTestingQwen05B, {
          verbose: false,
          config: defaultLoadConfig,
        });

        const firstLoadConfig = await firstModel.getLoadConfig();

        expect(firstLoadConfig.fit).toBe(true);
        expect(firstLoadConfig.gpu?.splitStrategy).toBe("evenly");

        roundTripModel = await client.llm.load(llmTestingQwen05B, {
          identifier: `fit-roundtrip-${Date.now()}-${Math.round(Math.random() * 1_000_000)}`,
          verbose: false,
          config: firstLoadConfig,
        });

        const secondLoadConfig = await roundTripModel.getLoadConfig();

        expect(secondLoadConfig.fit).toBe(true);
        expect(secondLoadConfig.gpu?.splitStrategy).toBe("evenly");
      } finally {
        if (roundTripModel !== undefined) {
          await roundTripModel.unload();
        }
        if (firstModel !== undefined) {
          await firstModel.unload();
        }
      }
    }, 60_000);
    it("preserves fit=false through getLoadConfig() to load() round-trip", async () => {
      let firstManualModel: LLM | undefined;
      let secondManualModel: LLM | undefined;
      try {
        firstManualModel = await client.llm.load(llmTestingQwen05B, {
          identifier: `fit-disabled-${Date.now()}-${Math.round(Math.random() * 1_000_000)}`,
          verbose: false,
          config: {
            fit: false,
            gpu: { ratio: "off" },
            ...defaultLoadConfig,
          },
        });

        const firstLoadConfig = await firstManualModel.getLoadConfig();

        expect(firstLoadConfig.fit).toBe(false);
        expect(firstLoadConfig.gpu?.ratio).toBe("off");

        secondManualModel = await client.llm.load(llmTestingQwen05B, {
          identifier: `fit-disabled-roundtrip-${Date.now()}-${Math.round(Math.random() * 1_000_000)}`,
          verbose: false,
          config: firstLoadConfig,
        });

        const secondLoadConfig = await secondManualModel.getLoadConfig();

        expect(secondLoadConfig.fit).toBe(false);
        expect(secondLoadConfig.gpu?.ratio).toBe("off");
      } finally {
        if (secondManualModel !== undefined) {
          await secondManualModel.unload();
        }
        if (firstManualModel !== undefined) {
          await firstManualModel.unload();
        }
      }
    }, 60_000);
  });
});