@@ -22,7 +22,7 @@ class LlamaCpp < Base
2222 # @param n_ctx [Integer] The number of context tokens to use
2323 # @param n_threads [Integer] The CPU number of threads to use
2424 # @param seed [Integer] The seed to use
25- def initialize ( model_path :, n_gpu_layers : 1 , n_ctx : 2048 , n_threads : 1 , seed : - 1 )
25+ def initialize ( model_path :, n_gpu_layers : 1 , n_ctx : 2048 , n_threads : 1 , seed : 0 )
2626 depends_on "llama_cpp"
2727
2828 @model_path = model_path
@@ -33,30 +33,25 @@ def initialize(model_path:, n_gpu_layers: 1, n_ctx: 2048, n_threads: 1, seed: -1
3333 end
3434
3535 # @param text [String] The text to embed
36- # @param n_threads [Integer] The number of CPU threads to use
3736 # @return [Array<Float>] The embedding
38- def embed ( text :, n_threads : nil )
37+ def embed ( text :)
3938 # contexts are kinda stateful when it comes to embeddings, so allocate one each time
4039 context = embedding_context
4140
42- embedding_input = context . tokenize ( text : text , add_bos : true )
41+ embedding_input = @model . tokenize ( text : text , add_bos : true )
4342 return unless embedding_input . size . positive?
4443
45- n_threads ||= self . n_threads
46-
47- context . eval ( tokens : embedding_input , n_past : 0 , n_threads : n_threads )
48- context . embeddings
44+ context . eval ( tokens : embedding_input , n_past : 0 )
45+ Langchain ::LLM ::LlamaCppResponse . new ( context , model : context . model . desc )
4946 end
5047
5148 # @param prompt [String] The prompt to complete
5249 # @param n_predict [Integer] The number of tokens to predict
53- # @param n_threads [Integer] The number of CPU threads to use
5450 # @return [String] The completed prompt
55- def complete ( prompt :, n_predict : 128 , n_threads : nil )
56- n_threads ||= self . n_threads
51+ def complete ( prompt :, n_predict : 128 )
5752 # contexts do not appear to be stateful when it comes to completion, so re-use the same one
5853 context = completion_context
59- ::LLaMACpp . generate ( context , prompt , n_threads : n_threads , n_predict : n_predict )
54+ ::LLaMACpp . generate ( context , prompt , n_predict : n_predict )
6055 end
6156
6257 private
@@ -71,23 +66,30 @@ def build_context_params(embeddings: false)
7166
7267 context_params . seed = seed
7368 context_params . n_ctx = n_ctx
74- context_params . n_gpu_layers = n_gpu_layers
69+ context_params . n_threads = n_threads
7570 context_params . embedding = embeddings
7671
7772 context_params
7873 end
7974
75+ def build_model_params
76+ model_params = ::LLaMACpp ::ModelParams . new
77+ model_params . n_gpu_layers = n_gpu_layers
78+
79+ model_params
80+ end
81+
8082 def build_model ( embeddings : false )
8183 return @model if defined? ( @model )
82- @model = ::LLaMACpp ::Model . new ( model_path : model_path , params : build_context_params ( embeddings : embeddings ) )
84+ @model = ::LLaMACpp ::Model . new ( model_path : model_path , params : build_model_params )
8385 end
8486
8587 def build_completion_context
86- ::LLaMACpp ::Context . new ( model : build_model )
88+ ::LLaMACpp ::Context . new ( model : build_model , params : build_context_params ( embeddings : false ) )
8789 end
8890
8991 def build_embedding_context
90- ::LLaMACpp ::Context . new ( model : build_model ( embeddings : true ) )
92+ ::LLaMACpp ::Context . new ( model : build_model , params : build_context_params ( embeddings : true ) )
9193 end
9294
9395 def completion_context
0 commit comments