|
33 | 33 | #endif |
34 | 34 |
|
35 | 35 | static llama_context ** g_ctx; |
36 | | -static llama_model ** g_model; |
37 | 36 | static common_sampler ** g_smpl; |
38 | 37 | static common_params * g_params; |
39 | | -static std::vector<llama_token> * g_input_tokens; |
40 | | -static std::ostringstream * g_output_ss; |
41 | | -static std::vector<llama_token> * g_output_tokens; |
42 | 38 | static bool is_interacting = false; |
43 | 39 | static bool need_insert_eot = false; |
44 | 40 |
|
@@ -136,7 +132,6 @@ int llama_completion(int argc, char ** argv) { |
136 | 132 | llama_context * ctx = nullptr; |
137 | 133 | common_sampler * smpl = nullptr; |
138 | 134 |
|
139 | | - g_model = &model; |
140 | 135 | g_ctx = &ctx; |
141 | 136 | g_smpl = &smpl; |
142 | 137 |
|
@@ -549,9 +544,9 @@ int llama_completion(int argc, char ** argv) { |
549 | 544 | int n_consumed = 0; |
550 | 545 | int n_session_consumed = 0; |
551 | 546 |
|
552 | | - std::vector<int> input_tokens; g_input_tokens = &input_tokens; |
553 | | - std::vector<int> output_tokens; g_output_tokens = &output_tokens; |
554 | | - std::ostringstream output_ss; g_output_ss = &output_ss; |
| 547 | + std::vector<int> input_tokens; |
| 548 | + std::vector<int> output_tokens; |
| 549 | + std::ostringstream output_ss; |
555 | 550 | std::ostringstream assistant_ss; // for storing current assistant message, used in conversation mode |
556 | 551 |
|
557 | 552 | // the first thing we will do is to output the prompt, so set color accordingly |
|
0 commit comments