@@ -336,8 +336,7 @@ namespace vit
336336 bool load_more (ggml::type dtype, int lm_hidden_size, const json::JSON &config)
337337 {
338338 const auto vis_cfg = config[" config.json" ][" vision_config" ];
339- auto pp_cfg = config[" preprocessor_config.json" ];
340- if (!vis_cfg.IsObject () || !pp_cfg.IsObject ()) return false ;
339+ if (!vis_cfg.IsObject ()) return false ;
341340
342341 vis_config.dtype = dtype;
343342
@@ -354,19 +353,34 @@ namespace vit
354353 vis_config.merge_kernel_size [0 ] = (int )size[0 ].ToInt ();
355354 vis_config.merge_kernel_size [1 ] = (int )size[1 ].ToInt ();
356355
357- auto image_mean = pp_cfg[" image_mean" ];
358- auto image_std = pp_cfg[" image_std" ];
359- CHATLLM_CHECK (image_mean.length () == 3 ) << " invalid image_mean" ;
360- CHATLLM_CHECK (image_std.length () == 3 ) << " invalid image_std" ;
361-
362- vis_config.in_token_limit = (int )pp_cfg[" in_token_limit" ].ToInt ();
363- vis_config.pad_input = pp_cfg[" pad_input" ].ToBool ();
364- vis_config.image_mean [0 ] = (float )image_mean[0 ].ToFloat ();
365- vis_config.image_mean [1 ] = (float )image_mean[1 ].ToFloat ();
366- vis_config.image_mean [2 ] = (float )image_mean[2 ].ToFloat ();
367- vis_config.image_std [0 ] = (float )image_std[0 ].ToFloat ();
368- vis_config.image_std [1 ] = (float )image_std[1 ].ToFloat ();
369- vis_config.image_std [2 ] = (float )image_std[2 ].ToFloat ();
356+ vis_config.in_token_limit = 4096 ;
357+ vis_config.pad_input = true ;
358+ for (int i = 0 ; i < 3 ; i++)
359+ {
360+ vis_config.image_mean [i] = 0 .5f ;
361+ vis_config.image_std [i] = 0 .5f ;
362+ }
363+
364+ auto pp_cfg = config[" preprocessor_config.json" ];
365+ if (pp_cfg.IsObject ())
366+ {
367+ vis_config.in_token_limit = (int )pp_cfg[" in_token_limit" ].ToInt ();
368+ vis_config.pad_input = pp_cfg[" pad_input" ].ToBool ();
369+
370+ auto image_mean = pp_cfg[" image_mean" ];
371+ auto image_std = pp_cfg[" image_std" ];
372+ CHATLLM_CHECK (image_mean.length () == 3 ) << " invalid image_mean" ;
373+ CHATLLM_CHECK (image_std.length () == 3 ) << " invalid image_std" ;
374+
375+ vis_config.in_token_limit = (int )pp_cfg[" in_token_limit" ].ToInt ();
376+ vis_config.pad_input = pp_cfg[" pad_input" ].ToBool ();
377+ vis_config.image_mean [0 ] = (float )image_mean[0 ].ToFloat ();
378+ vis_config.image_mean [1 ] = (float )image_mean[1 ].ToFloat ();
379+ vis_config.image_mean [2 ] = (float )image_mean[2 ].ToFloat ();
380+ vis_config.image_std [0 ] = (float )image_std[0 ].ToFloat ();
381+ vis_config.image_std [1 ] = (float )image_std[1 ].ToFloat ();
382+ vis_config.image_std [2 ] = (float )image_std[2 ].ToFloat ();
383+ }
370384
371385 const size_t tensor_ovhd = ggml_tensor_overhead ();
372386 const size_t num_tensors = 11 + vis_config.num_hidden_layers * 18 ;
0 commit comments