@@ -25,10 +25,13 @@ This project is in an early stage, the API for nodejs may change in the future,
2525 - [ Getting the weights] ( #getting-the-weights )
2626 - [ Model versioning] ( #model-versioning )
2727 - [ Usage (llama.cpp backend)] ( #usage-llamacpp-backend )
28- - [ Usage (llama-rs backend)] ( #usage-llama-rs-backend )
2928 - [ Inference] ( #inference )
3029 - [ Tokenize] ( #tokenize )
3130 - [ Embedding] ( #embedding )
31+ - [ Usage (llama-rs backend)] ( #usage-llama-rs-backend )
32+ - [ Inference] ( #inference-1 )
33+ - [ Tokenize] ( #tokenize-1 )
34+ - [ Embedding] ( #embedding-1 )
3235 - [ Performance related] ( #performance-related )
3336 - [ Manual compilation (from node\_ modules)] ( #manual-compilation-from-node_modules )
3437 - [ Manual compilation (from source)] ( #manual-compilation-from-source )
@@ -98,11 +101,11 @@ The current version supports only one inference session on one LLama instance at
98101
99102If you wish to have multiple inference sessions concurrently, you need to create multiple LLama instances
100103
101- llama.cpp backend now only supports inferencing. Please wait for embedding and tokenization feature.
104+ ### Inference
102105
103106``` typescript
104107import { LLama } from " llama-node" ;
105- import { LLamaCpp , LoadConfig } from " llama-node/dist/llm/llama-cpp" ;
108+ import { LLamaCpp , LoadConfig } from " llama-node/dist/llm/llama-cpp.js " ;
106109import path from " path" ;
107110
108111const model = path .resolve (process .cwd (), " ./ggml-vicuna-7b-4bit-rev1.bin" );
@@ -150,6 +153,79 @@ llama.createCompletion(
150153
151154```
152155
156+ ### Tokenize
157+
158+ ``` typescript
159+ import { LLama } from " llama-node" ;
160+ import { LLamaCpp , LoadConfig } from " llama-node/dist/llm/llama-cpp.js" ;
161+ import path from " path" ;
162+
163+ const model = path .resolve (process .cwd (), " ./ggml-vicuna-7b-4bit-rev1.bin" );
164+
165+ const llama = new LLama (LLamaCpp );
166+
167+ const config: LoadConfig = {
168+ path: model ,
169+ enableLogging: true ,
170+ nCtx: 1024 ,
171+ nParts: - 1 ,
172+ seed: 0 ,
173+ f16Kv: false ,
174+ logitsAll: false ,
175+ vocabOnly: false ,
176+ useMlock: false ,
177+ embedding: false ,
178+ };
179+
180+ llama .load (config );
181+
182+ const content = " how are you?" ;
183+
184+ llama .tokenize ({ content , nCtx: 2048 }).then (console .log );
185+
186+ ```
187+
188+ ### Embedding
189+ ``` typescript
190+ import { LLama } from " llama-node" ;
191+ import { LLamaCpp , LoadConfig } from " llama-node/dist/llm/llama-cpp.js" ;
192+ import path from " path" ;
193+
194+ const model = path .resolve (process .cwd (), " ./ggml-vicuna-7b-4bit-rev1.bin" );
195+
196+ const llama = new LLama (LLamaCpp );
197+
198+ const config: LoadConfig = {
199+ path: model ,
200+ enableLogging: true ,
201+ nCtx: 1024 ,
202+ nParts: - 1 ,
203+ seed: 0 ,
204+ f16Kv: false ,
205+ logitsAll: false ,
206+ vocabOnly: false ,
207+ useMlock: false ,
208+ embedding: false ,
209+ };
210+
211+ llama .load (config );
212+
213+ const prompt = ` Who is the president of the United States? ` ;
214+
215+ const params = {
216+ nThreads: 4 ,
217+ nTokPredict: 2048 ,
218+ topK: 40 ,
219+ topP: 0.1 ,
220+ temp: 0.2 ,
221+ repeatPenalty: 1 ,
222+ prompt ,
223+ };
224+
225+ llama .getEmbedding (params ).then (console .log );
226+
227+ ```
228+
153229---
154230
155231## Usage (llama-rs backend)
@@ -162,7 +238,7 @@ If you wish to have multiple inference sessions concurrently, you need to create
162238
163239``` typescript
164240import { LLama } from " llama-node" ;
165- import { LLamaRS } from " llama-node/dist/llm/llama-rs" ;
241+ import { LLamaRS } from " llama-node/dist/llm/llama-rs.js " ;
166242import path from " path" ;
167243
168244const model = path .resolve (process .cwd (), " ./ggml-alpaca-7b-q4.bin" );
@@ -205,7 +281,7 @@ Get tokenization result from LLaMA
205281
206282``` typescript
207283import { LLama } from " llama-node" ;
208- import { LLamaRS } from " llama-node/dist/llm/llama-rs" ;
284+ import { LLamaRS } from " llama-node/dist/llm/llama-rs.js " ;
209285import path from " path" ;
210286
211287const model = path .resolve (process .cwd (), " ./ggml-alpaca-7b-q4.bin" );
@@ -226,7 +302,7 @@ Preview version, embedding end token may change in the future. Do not use it in
226302
227303``` typescript
228304import { LLama } from " llama-node" ;
229- import { LLamaRS } from " llama-node/dist/llm/llama-rs" ;
305+ import { LLamaRS } from " llama-node/dist/llm/llama-rs.js " ;
230306import path from " path" ;
231307import fs from " fs" ;
232308
0 commit comments