Skip to content

Commit cc86646

Browse files
authored
Llama cpp version update (#401)
* -1 is not a valid seed for llama_cpp, use its default of 0 * Update llama_cpp API to ~> 0.9.4 * Note breaking change in CHANGELOG.md
1 parent 5936259 commit cc86646

6 files changed

Lines changed: 41 additions & 21 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
## [Unreleased]
22

3+
## [0.8.0]
4+
- [BREAKING] Updated llama_cpp.rb to 0.9.4. The model file format used by the underlying llama.cpp library has changed to GGUF. llama.cpp ships with scripts to convert existing files and GGUF format models can be downloaded from HuggingFace.
5+
36
## [0.7.5] - 2023-11-13
47
- Fixes
58

Gemfile.lock

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ GEM
187187
jwt (2.7.1)
188188
language_server-protocol (3.17.0.3)
189189
lint_roller (1.0.0)
190-
llama_cpp (0.3.7)
190+
llama_cpp (0.9.4)
191191
loofah (2.21.1)
192192
crass (~> 1.0.2)
193193
nokogiri (>= 1.5.9)
@@ -395,7 +395,7 @@ DEPENDENCIES
395395
hnswlib (~> 0.8.1)
396396
hugging-face (~> 0.3.4)
397397
langchainrb!
398-
llama_cpp (~> 0.3.7)
398+
llama_cpp (~> 0.9.4)
399399
milvus (~> 0.9.2)
400400
nokogiri (~> 1.13)
401401
open-weather-ruby-client (~> 0.4.0)

langchain.gemspec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ Gem::Specification.new do |spec|
5757
spec.add_development_dependency "hnswlib", "~> 0.8.1"
5858
spec.add_development_dependency "hugging-face", "~> 0.3.4"
5959
spec.add_development_dependency "milvus", "~> 0.9.2"
60-
spec.add_development_dependency "llama_cpp", "~> 0.3.7"
60+
spec.add_development_dependency "llama_cpp", "~> 0.9.4"
6161
spec.add_development_dependency "nokogiri", "~> 1.13"
6262
spec.add_development_dependency "open-weather-ruby-client", "~> 0.4.0"
6363
spec.add_development_dependency "pg", "~> 1.5"

lib/langchain/llm/llama_cpp.rb

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class LlamaCpp < Base
2222
# @param n_ctx [Integer] The number of context tokens to use
2323
# @param n_threads [Integer] The CPU number of threads to use
2424
# @param seed [Integer] The seed to use
25-
def initialize(model_path:, n_gpu_layers: 1, n_ctx: 2048, n_threads: 1, seed: -1)
25+
def initialize(model_path:, n_gpu_layers: 1, n_ctx: 2048, n_threads: 1, seed: 0)
2626
depends_on "llama_cpp"
2727

2828
@model_path = model_path
@@ -33,30 +33,25 @@ def initialize(model_path:, n_gpu_layers: 1, n_ctx: 2048, n_threads: 1, seed: -1
3333
end
3434

3535
# @param text [String] The text to embed
36-
# @param n_threads [Integer] The number of CPU threads to use
3736
# @return [Array<Float>] The embedding
38-
def embed(text:, n_threads: nil)
37+
def embed(text:)
3938
# contexts are kinda stateful when it comes to embeddings, so allocate one each time
4039
context = embedding_context
4140

42-
embedding_input = context.tokenize(text: text, add_bos: true)
41+
embedding_input = @model.tokenize(text: text, add_bos: true)
4342
return unless embedding_input.size.positive?
4443

45-
n_threads ||= self.n_threads
46-
47-
context.eval(tokens: embedding_input, n_past: 0, n_threads: n_threads)
48-
context.embeddings
44+
context.eval(tokens: embedding_input, n_past: 0)
45+
Langchain::LLM::LlamaCppResponse.new(context, model: context.model.desc)
4946
end
5047

5148
# @param prompt [String] The prompt to complete
5249
# @param n_predict [Integer] The number of tokens to predict
53-
# @param n_threads [Integer] The number of CPU threads to use
5450
# @return [String] The completed prompt
55-
def complete(prompt:, n_predict: 128, n_threads: nil)
56-
n_threads ||= self.n_threads
51+
def complete(prompt:, n_predict: 128)
5752
# contexts do not appear to be stateful when it comes to completion, so re-use the same one
5853
context = completion_context
59-
::LLaMACpp.generate(context, prompt, n_threads: n_threads, n_predict: n_predict)
54+
::LLaMACpp.generate(context, prompt, n_predict: n_predict)
6055
end
6156

6257
private
@@ -71,23 +66,30 @@ def build_context_params(embeddings: false)
7166

7267
context_params.seed = seed
7368
context_params.n_ctx = n_ctx
74-
context_params.n_gpu_layers = n_gpu_layers
69+
context_params.n_threads = n_threads
7570
context_params.embedding = embeddings
7671

7772
context_params
7873
end
7974

75+
def build_model_params
76+
model_params = ::LLaMACpp::ModelParams.new
77+
model_params.n_gpu_layers = n_gpu_layers
78+
79+
model_params
80+
end
81+
8082
def build_model(embeddings: false)
8183
return @model if defined?(@model)
82-
@model = ::LLaMACpp::Model.new(model_path: model_path, params: build_context_params(embeddings: embeddings))
84+
@model = ::LLaMACpp::Model.new(model_path: model_path, params: build_model_params)
8385
end
8486

8587
def build_completion_context
86-
::LLaMACpp::Context.new(model: build_model)
88+
::LLaMACpp::Context.new(model: build_model, params: build_context_params(embeddings: false))
8789
end
8890

8991
def build_embedding_context
90-
::LLaMACpp::Context.new(model: build_model(embeddings: true))
92+
::LLaMACpp::Context.new(model: build_model, params: build_context_params(embeddings: true))
9193
end
9294

9395
def completion_context
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# frozen_string_literal: true
2+
3+
module Langchain::LLM
4+
class LlamaCppResponse < BaseResponse
5+
def embedding
6+
embeddings
7+
end
8+
9+
def embeddings
10+
raw_response.embeddings
11+
end
12+
end
13+
end

spec/langchain/llm/llama_cpp_spec.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818
let(:embedding) { [0.1, 0.2, 0.3] }
1919

2020
before do
21-
allow(llama_context).to receive(:tokenize).and_return([1, 9029])
21+
allow(llama_model).to receive(:tokenize).and_return([1, 9029])
22+
allow(llama_model).to receive(:desc).and_return("test")
23+
allow(llama_context).to receive(:model).and_return(llama_model)
2224
allow(llama_context).to receive(:eval)
2325
allow(llama_context).to receive(:embeddings).and_return(embedding)
2426
end
2527

2628
it "generates an embedding" do
27-
expect(subject.embed(text: "Hello World")).to eq(embedding)
29+
expect(subject.embed(text: "Hello World").embedding).to eq(embedding)
2830
end
2931
end
3032

0 commit comments

Comments
 (0)