Skip to content
This repository was archived by the owner on Sep 12, 2024. It is now read-only.

Commit a10a0b5

Browse files
authored
chore: update llama-cpp readme (#15)
* feat: impl llama cpp tokenization/embedding * chore: readme
1 parent 7adaa80 commit a10a0b5

File tree

8 files changed

+229
-15
lines changed

8 files changed

+229
-15
lines changed

README-zh-CN.md

Lines changed: 82 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,13 @@ Node.js运行的大语言模型LLaMA。
2323
- [模型获取](#模型获取)
2424
- [模型版本](#模型版本)
2525
- [使用(llama.cpp后端)](#使用llamacpp后端)
26-
- [使用(llama-rs后端)](#使用llama-rs后端)
2726
- [推理](#推理)
2827
- [分词](#分词)
2928
- [嵌入](#嵌入)
29+
- [使用(llama-rs后端)](#使用llama-rs后端)
30+
- [推理](#推理-1)
31+
- [分词](#分词-1)
32+
- [嵌入](#嵌入-1)
3033
- [关于性能](#关于性能)
3134
- [手动编译 (from node\_modules)](#手动编译-from-node_modules)
3235
- [手动编译 (from source)](#手动编译-from-source)
@@ -96,11 +99,11 @@ llama-rs后端现在只支持GGML / GGMF模型。llama.cpp后端仅支持GGJT模
9699

97100
如果您希望同时进行多个推理会话,则需要创建多个LLama实例。
98101

99-
llama.cpp后端现仅支持推理. 嵌入和分词功能请等待后期更新。
102+
### 推理
100103

101104
```typescript
102105
import { LLama } from "llama-node";
103-
import { LLamaCpp, LoadConfig } from "llama-node/dist/llm/llama-cpp";
106+
import { LLamaCpp, LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
104107
import path from "path";
105108

106109
const model = path.resolve(process.cwd(), "./ggml-vicuna-7b-4bit-rev1.bin");
@@ -148,6 +151,79 @@ llama.createCompletion(
148151

149152
```
150153

154+
### 分词
155+
156+
```typescript
157+
import { LLama } from "llama-node";
158+
import { LLamaCpp, LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
159+
import path from "path";
160+
161+
const model = path.resolve(process.cwd(), "./ggml-vicuna-7b-4bit-rev1.bin");
162+
163+
const llama = new LLama(LLamaCpp);
164+
165+
const config: LoadConfig = {
166+
path: model,
167+
enableLogging: true,
168+
nCtx: 1024,
169+
nParts: -1,
170+
seed: 0,
171+
f16Kv: false,
172+
logitsAll: false,
173+
vocabOnly: false,
174+
useMlock: false,
175+
embedding: false,
176+
};
177+
178+
llama.load(config);
179+
180+
const content = "how are you?";
181+
182+
llama.tokenize({ content, nCtx: 2048 }).then(console.log);
183+
184+
```
185+
186+
### 嵌入
187+
```typescript
188+
import { LLama } from "llama-node";
189+
import { LLamaCpp, LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
190+
import path from "path";
191+
192+
const model = path.resolve(process.cwd(), "./ggml-vicuna-7b-4bit-rev1.bin");
193+
194+
const llama = new LLama(LLamaCpp);
195+
196+
const config: LoadConfig = {
197+
path: model,
198+
enableLogging: true,
199+
nCtx: 1024,
200+
nParts: -1,
201+
seed: 0,
202+
f16Kv: false,
203+
logitsAll: false,
204+
vocabOnly: false,
205+
useMlock: false,
206+
embedding: false,
207+
};
208+
209+
llama.load(config);
210+
211+
const prompt = `Who is the president of the United States?`;
212+
213+
const params = {
214+
nThreads: 4,
215+
nTokPredict: 2048,
216+
topK: 40,
217+
topP: 0.1,
218+
temp: 0.2,
219+
repeatPenalty: 1,
220+
prompt,
221+
};
222+
223+
llama.getEmbedding(params).then(console.log);
224+
225+
```
226+
151227
---
152228

153229
## 使用(llama-rs后端)
@@ -160,7 +236,7 @@ llama.createCompletion(
160236

161237
```typescript
162238
import { LLama } from "llama-node";
163-
import { LLamaRS } from "llama-node/dist/llm/llama-rs";
239+
import { LLamaRS } from "llama-node/dist/llm/llama-rs.js";
164240
import path from "path";
165241

166242
const model = path.resolve(process.cwd(), "./ggml-alpaca-7b-q4.bin");
@@ -203,7 +279,7 @@ llama.createCompletion(
203279

204280
```typescript
205281
import { LLama } from "llama-node";
206-
import { LLamaRS } from "llama-node/dist/llm/llama-rs";
282+
import { LLamaRS } from "llama-node/dist/llm/llama-rs.js";
207283
import path from "path";
208284

209285
const model = path.resolve(process.cwd(), "./ggml-alpaca-7b-q4.bin");
@@ -223,7 +299,7 @@ llama.tokenize(content).then(console.log);
223299

224300
```typescript
225301
import { LLama } from "llama-node";
226-
import { LLamaRS } from "llama-node/dist/llm/llama-rs";
302+
import { LLamaRS } from "llama-node/dist/llm/llama-rs.js";
227303
import path from "path";
228304
import fs from "fs";
229305

README.md

Lines changed: 82 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,13 @@ This project is in an early stage, the API for nodejs may change in the future,
2525
- [Getting the weights](#getting-the-weights)
2626
- [Model versioning](#model-versioning)
2727
- [Usage (llama.cpp backend)](#usage-llamacpp-backend)
28-
- [Usage (llama-rs backend)](#usage-llama-rs-backend)
2928
- [Inference](#inference)
3029
- [Tokenize](#tokenize)
3130
- [Embedding](#embedding)
31+
- [Usage (llama-rs backend)](#usage-llama-rs-backend)
32+
- [Inference](#inference-1)
33+
- [Tokenize](#tokenize-1)
34+
- [Embedding](#embedding-1)
3235
- [Performance related](#performance-related)
3336
- [Manual compilation (from node\_modules)](#manual-compilation-from-node_modules)
3437
- [Manual compilation (from source)](#manual-compilation-from-source)
@@ -98,11 +101,11 @@ The current version supports only one inference session on one LLama instance at
98101

99102
If you wish to have multiple inference sessions concurrently, you need to create multiple LLama instances
100103

101-
llama.cpp backend now only supports inferencing. Please wait for embedding and tokenization feature.
104+
### Inference
102105

103106
```typescript
104107
import { LLama } from "llama-node";
105-
import { LLamaCpp, LoadConfig } from "llama-node/dist/llm/llama-cpp";
108+
import { LLamaCpp, LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
106109
import path from "path";
107110

108111
const model = path.resolve(process.cwd(), "./ggml-vicuna-7b-4bit-rev1.bin");
@@ -150,6 +153,79 @@ llama.createCompletion(
150153

151154
```
152155

156+
### Tokenize
157+
158+
```typescript
159+
import { LLama } from "llama-node";
160+
import { LLamaCpp, LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
161+
import path from "path";
162+
163+
const model = path.resolve(process.cwd(), "./ggml-vicuna-7b-4bit-rev1.bin");
164+
165+
const llama = new LLama(LLamaCpp);
166+
167+
const config: LoadConfig = {
168+
path: model,
169+
enableLogging: true,
170+
nCtx: 1024,
171+
nParts: -1,
172+
seed: 0,
173+
f16Kv: false,
174+
logitsAll: false,
175+
vocabOnly: false,
176+
useMlock: false,
177+
embedding: false,
178+
};
179+
180+
llama.load(config);
181+
182+
const content = "how are you?";
183+
184+
llama.tokenize({ content, nCtx: 2048 }).then(console.log);
185+
186+
```
187+
188+
### Embedding
189+
```typescript
190+
import { LLama } from "llama-node";
191+
import { LLamaCpp, LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
192+
import path from "path";
193+
194+
const model = path.resolve(process.cwd(), "./ggml-vicuna-7b-4bit-rev1.bin");
195+
196+
const llama = new LLama(LLamaCpp);
197+
198+
const config: LoadConfig = {
199+
path: model,
200+
enableLogging: true,
201+
nCtx: 1024,
202+
nParts: -1,
203+
seed: 0,
204+
f16Kv: false,
205+
logitsAll: false,
206+
vocabOnly: false,
207+
useMlock: false,
208+
embedding: false,
209+
};
210+
211+
llama.load(config);
212+
213+
const prompt = `Who is the president of the United States?`;
214+
215+
const params = {
216+
nThreads: 4,
217+
nTokPredict: 2048,
218+
topK: 40,
219+
topP: 0.1,
220+
temp: 0.2,
221+
repeatPenalty: 1,
222+
prompt,
223+
};
224+
225+
llama.getEmbedding(params).then(console.log);
226+
227+
```
228+
153229
---
154230

155231
## Usage (llama-rs backend)
@@ -162,7 +238,7 @@ If you wish to have multiple inference sessions concurrently, you need to create
162238

163239
```typescript
164240
import { LLama } from "llama-node";
165-
import { LLamaRS } from "llama-node/dist/llm/llama-rs";
241+
import { LLamaRS } from "llama-node/dist/llm/llama-rs.js";
166242
import path from "path";
167243

168244
const model = path.resolve(process.cwd(), "./ggml-alpaca-7b-q4.bin");
@@ -205,7 +281,7 @@ Get tokenization result from LLaMA
205281

206282
```typescript
207283
import { LLama } from "llama-node";
208-
import { LLamaRS } from "llama-node/dist/llm/llama-rs";
284+
import { LLamaRS } from "llama-node/dist/llm/llama-rs.js";
209285
import path from "path";
210286

211287
const model = path.resolve(process.cwd(), "./ggml-alpaca-7b-q4.bin");
@@ -226,7 +302,7 @@ Preview version, embedding end token may change in the future. Do not use it in
226302

227303
```typescript
228304
import { LLama } from "llama-node";
229-
import { LLamaRS } from "llama-node/dist/llm/llama-rs";
305+
import { LLamaRS } from "llama-node/dist/llm/llama-rs.js";
230306
import path from "path";
231307
import fs from "fs";
232308

example/llama-cpp/embedding.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import { LLama } from "../../src";
2+
import { LLamaCpp, LoadConfig } from "../../src/llm/llama-cpp";
3+
import path from "path";
4+
5+
const model = path.resolve(process.cwd(), "./ggml-vicuna-7b-4bit-rev1.bin");
6+
7+
const llama = new LLama(LLamaCpp);
8+
9+
const config: LoadConfig = {
10+
path: model,
11+
enableLogging: true,
12+
nCtx: 1024,
13+
nParts: -1,
14+
seed: 0,
15+
f16Kv: false,
16+
logitsAll: false,
17+
vocabOnly: false,
18+
useMlock: false,
19+
embedding: false,
20+
};
21+
22+
llama.load(config);
23+
24+
const prompt = `Who is the president of the United States?`;
25+
26+
const params = {
27+
nThreads: 4,
28+
nTokPredict: 2048,
29+
topK: 40,
30+
topP: 0.1,
31+
temp: 0.2,
32+
repeatPenalty: 1,
33+
prompt,
34+
};
35+
36+
llama.getEmbedding(params).then(console.log);

example/llama-cpp/tokenize.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import { LLama } from "../../src";
2+
import { LLamaCpp, LoadConfig } from "../../src/llm/llama-cpp";
3+
import path from "path";
4+
5+
const model = path.resolve(process.cwd(), "./ggml-vicuna-7b-4bit-rev1.bin");
6+
7+
const llama = new LLama(LLamaCpp);
8+
9+
const config: LoadConfig = {
10+
path: model,
11+
enableLogging: true,
12+
nCtx: 1024,
13+
nParts: -1,
14+
seed: 0,
15+
f16Kv: false,
16+
logitsAll: false,
17+
vocabOnly: false,
18+
useMlock: false,
19+
embedding: false,
20+
};
21+
22+
llama.load(config);
23+
24+
const content = "how are you?";
25+
26+
llama.tokenize({ content, nCtx: 2048 }).then(console.log);

src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { CompletionCallback } from "./llm";
2-
import { LLM } from "./llm";
2+
import type { LLM } from "./llm";
33

44
export class LLama<
55
Instance,
File renamed without changes.

src/llm/llama-cpp.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import {
77
TokenizeResultType,
88
} from "@llama-node/llama-cpp";
99

10-
import { LLM } from "../llm";
10+
import type { LLM } from "../llm";
1111

1212
export interface LoadConfig extends LlamaContextParams {
1313
path: string;

src/llm/llama-rs.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { LLama, LLamaConfig, LLamaInferenceArguments } from "@llama-node/core";
2-
import { LLM } from "../llm";
2+
import type { LLM } from "../llm";
33

44
export class LLamaRS
55
implements

0 commit comments

Comments
 (0)