Skip to content

Commit a9ee2f1

Browse files
committed
make preprocessor_config.json optional
1 parent e31f590 commit a9ee2f1

File tree

1 file changed

+29
-15
lines changed

1 file changed

+29
-15
lines changed

models/kimi.cpp

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -336,8 +336,7 @@ namespace vit
336336
bool load_more(ggml::type dtype, int lm_hidden_size, const json::JSON &config)
337337
{
338338
const auto vis_cfg = config["config.json"]["vision_config"];
339-
auto pp_cfg = config["preprocessor_config.json"];
340-
if (!vis_cfg.IsObject() || !pp_cfg.IsObject()) return false;
339+
if (!vis_cfg.IsObject()) return false;
341340

342341
vis_config.dtype = dtype;
343342

@@ -354,19 +353,34 @@ namespace vit
354353
vis_config.merge_kernel_size[0] = (int)size[0].ToInt();
355354
vis_config.merge_kernel_size[1] = (int)size[1].ToInt();
356355

357-
auto image_mean = pp_cfg["image_mean"];
358-
auto image_std = pp_cfg["image_std"];
359-
CHATLLM_CHECK(image_mean.length() == 3) << "invalid image_mean";
360-
CHATLLM_CHECK(image_std.length() == 3) << "invalid image_std";
361-
362-
vis_config.in_token_limit = (int )pp_cfg["in_token_limit"].ToInt();
363-
vis_config.pad_input = pp_cfg["pad_input"].ToBool();
364-
vis_config.image_mean[0] = (float)image_mean[0].ToFloat();
365-
vis_config.image_mean[1] = (float)image_mean[1].ToFloat();
366-
vis_config.image_mean[2] = (float)image_mean[2].ToFloat();
367-
vis_config.image_std[0] = (float)image_std[0].ToFloat();
368-
vis_config.image_std[1] = (float)image_std[1].ToFloat();
369-
vis_config.image_std[2] = (float)image_std[2].ToFloat();
356+
vis_config.in_token_limit = 4096;
357+
vis_config.pad_input = true;
358+
for (int i = 0; i < 3; i++)
359+
{
360+
vis_config.image_mean[i] = 0.5f;
361+
vis_config.image_std[i] = 0.5f;
362+
}
363+
364+
auto pp_cfg = config["preprocessor_config.json"];
365+
if (pp_cfg.IsObject())
366+
{
367+
vis_config.in_token_limit = (int )pp_cfg["in_token_limit"].ToInt();
368+
vis_config.pad_input = pp_cfg["pad_input"].ToBool();
369+
370+
auto image_mean = pp_cfg["image_mean"];
371+
auto image_std = pp_cfg["image_std"];
372+
CHATLLM_CHECK(image_mean.length() == 3) << "invalid image_mean";
373+
CHATLLM_CHECK(image_std.length() == 3) << "invalid image_std";
374+
375+
vis_config.in_token_limit = (int )pp_cfg["in_token_limit"].ToInt();
376+
vis_config.pad_input = pp_cfg["pad_input"].ToBool();
377+
vis_config.image_mean[0] = (float)image_mean[0].ToFloat();
378+
vis_config.image_mean[1] = (float)image_mean[1].ToFloat();
379+
vis_config.image_mean[2] = (float)image_mean[2].ToFloat();
380+
vis_config.image_std[0] = (float)image_std[0].ToFloat();
381+
vis_config.image_std[1] = (float)image_std[1].ToFloat();
382+
vis_config.image_std[2] = (float)image_std[2].ToFloat();
383+
}
370384

371385
const size_t tensor_ovhd = ggml_tensor_overhead();
372386
const size_t num_tensors = 11 + vis_config.num_hidden_layers * 18;

0 commit comments

Comments
 (0)