5252 < link rel ="search " title ="Search " href ="../../search.html " />
5353 < meta name ="viewport " content ="width=device-width, initial-scale=1 "/>
5454 < meta name ="docsearch:language " content ="en "/>
55- < meta name ="docbuild:last-update " content ="Dec 22 , 2025 "/>
55+ < meta name ="docbuild:last-update " content ="Dec 24 , 2025 "/>
5656 </ head >
5757
5858
@@ -435,6 +435,7 @@ <h2> Contents </h2>
435435 </ div >
436436 < nav aria-label ="Page ">
437437 < ul class ="visible nav section-nav flex-column ">
438+ < li class ="toc-h2 nav-item toc-entry "> < a class ="reference internal nav-link " href ="#data-preparation-for-sft-training "> Data Preparation (For SFT Training)</ a > </ li >
438439< li class ="toc-h2 nav-item toc-entry "> < a class ="reference internal nav-link " href ="#reproduce "> Reproduce</ a > < ul class ="visible nav section-nav flex-column ">
439440< li class ="toc-h3 nav-item toc-entry "> < a class ="reference internal nav-link " href ="#configuration "> Configuration</ a > </ li >
440441< li class ="toc-h3 nav-item toc-entry "> < a class ="reference internal nav-link " href ="#supported-models "> Supported Models</ a > </ li >
@@ -463,6 +464,38 @@ <h1>VLM Single-Turn RL (FSDP & Megatron)<a class="headerlink" href="#vlm-sin
463464< p align ="center ">
464465 < img src ="fsdp_vs_megatron.png " alt ="FSDP vs Megatron Reward Plot " width ="800 ">
465466</ p >
467+ < section id ="data-preparation-for-sft-training ">
468+ < h2 > Data Preparation (For SFT Training)< a class ="headerlink " href ="#data-preparation-for-sft-training " title ="Link to this heading "> #</ a > </ h2 >
469+ < p > The < a class ="reference external " href ="https://huggingface.co/datasets/chenhegu/geo3k_imgurl "> geo3k_imgurl</ a > dataset contains:</ p >
470+ < ul class ="simple ">
471+ < li > < p > < code class ="docutils literal notranslate "> < span class ="pre "> problem</ span > </ code > : The math problem text (string)</ p > </ li >
472+ < li > < p > < code class ="docutils literal notranslate "> < span class ="pre "> answer</ span > </ code > : The answer (string, e.g., “270”)</ p > </ li >
473+ < li > < p > < code class ="docutils literal notranslate "> < span class ="pre "> images</ span > </ code > : Image data (list)</ p > </ li >
474+ </ ul >
475+ < p > For SFT training, we need to format the < code class ="docutils literal notranslate "> < span class ="pre "> answer</ span > </ code > field for < code class ="docutils literal notranslate "> < span class ="pre "> \boxed{}</ span > </ code > format and the messages. You can use the following script to format the answer field:</ p >
476+ < div class ="highlight-python notranslate "> < div class ="highlight "> < pre > < span > </ span > < span class ="kn "> from</ span > < span class ="w "> </ span > < span class ="nn "> datasets</ span > < span class ="w "> </ span > < span class ="kn "> import</ span > < span class ="n "> load_dataset</ span >
477+ < span class ="kn "> import</ span > < span class ="w "> </ span > < span class ="nn "> pandas</ span > < span class ="w "> </ span > < span class ="k "> as</ span > < span class ="w "> </ span > < span class ="nn "> pd</ span >
478+
479+ < span class ="n "> ds</ span > < span class ="o "> =</ span > < span class ="n "> load_dataset</ span > < span class ="p "> (</ span > < span class ="s2 "> "chenhegu/geo3k_imgurl"</ span > < span class ="p "> ,</ span > < span class ="n "> split</ span > < span class ="o "> =</ span > < span class ="s2 "> "train"</ span > < span class ="p "> )</ span >
480+
481+ < span class ="k "> def</ span > < span class ="w "> </ span > < span class ="nf "> format_answer</ span > < span class ="p "> (</ span > < span class ="n "> answer</ span > < span class ="p "> :</ span > < span class ="nb "> str</ span > < span class ="p "> )</ span > < span class ="o "> -></ span > < span class ="nb "> str</ span > < span class ="p "> :</ span >
482+ < span class ="w "> </ span > < span class ="sd "> """Format answer to include \\boxed{} format."""</ span >
483+ < span class ="k "> return</ span > < span class ="sa "> f</ span > < span class ="s2 "> "Answer: </ span > < span class ="se "> \\</ span > < span class ="s2 "> boxed</ span > < span class ="se "> {{</ span > < span class ="si "> {</ span > < span class ="n "> answer</ span > < span class ="si "> }</ span > < span class ="se "> }}</ span > < span class ="s2 "> "</ span >
484+
485+ < span class ="k "> def</ span > < span class ="w "> </ span > < span class ="nf "> process_sample</ span > < span class ="p "> (</ span > < span class ="n "> sample</ span > < span class ="p "> ):</ span >
486+ < span class ="n "> formatted_answer</ span > < span class ="o "> =</ span > < span class ="sa "> f</ span > < span class ="s2 "> "Answer: </ span > < span class ="se "> \\</ span > < span class ="s2 "> boxed</ span > < span class ="se "> {{</ span > < span class ="si "> {</ span > < span class ="n "> sample</ span > < span class ="p "> [</ span > < span class ="s1 "> 'answer'</ span > < span class ="p "> ]</ span > < span class ="si "> }</ span > < span class ="se "> }}</ span > < span class ="s2 "> "</ span >
487+
488+ < span class ="n "> sample</ span > < span class ="p "> [</ span > < span class ="s2 "> "messages"</ span > < span class ="p "> ]</ span > < span class ="o "> =</ span > < span class ="p "> [</ span >
489+ < span class ="p "> {</ span > < span class ="s2 "> "role"</ span > < span class ="p "> :</ span > < span class ="s2 "> "user"</ span > < span class ="p "> ,</ span > < span class ="s2 "> "content"</ span > < span class ="p "> :</ span > < span class ="n "> sample</ span > < span class ="p "> [</ span > < span class ="s2 "> "problem"</ span > < span class ="p "> ]},</ span >
490+ < span class ="p "> {</ span > < span class ="s2 "> "role"</ span > < span class ="p "> :</ span > < span class ="s2 "> "assistant"</ span > < span class ="p "> ,</ span > < span class ="s2 "> "content"</ span > < span class ="p "> :</ span > < span class ="n "> formatted_answer</ span > < span class ="p "> }</ span >
491+ < span class ="p "> ]</ span >
492+ < span class ="k "> return</ span > < span class ="n "> sample</ span >
493+
494+ < span class ="n "> ds</ span > < span class ="o "> =</ span > < span class ="n "> ds</ span > < span class ="o "> .</ span > < span class ="n "> map</ span > < span class ="p "> (</ span > < span class ="n "> process_sample</ span > < span class ="p "> )</ span >
495+ < span class ="n "> ds</ span > < span class ="o "> .</ span > < span class ="n "> to_parquet</ span > < span class ="p "> (</ span > < span class ="s2 "> "/root/datasets/geo3k_imgurl/train_formatted.parquet"</ span > < span class ="p "> )</ span >
496+ </ pre > </ div >
497+ </ div >
498+ </ section >
466499< section id ="reproduce ">
467500< h2 > Reproduce< a class ="headerlink " href ="#reproduce " title ="Link to this heading "> #</ a > </ h2 >
468501< div class ="highlight-bash notranslate "> < div class ="highlight "> < pre > < span > </ span > < span class ="nb "> export</ span > < span class ="w "> </ span > < span class ="nv "> WANDB_API_KEY</ span > < span class ="o "> =</ span > your_wandb_api_key
@@ -475,6 +508,9 @@ <h2>Reproduce<a class="headerlink" href="#reproduce" title="Link to this heading
475508
476509< span class ="c1 "> # With different model</ span >
477510< span class ="nv "> SLIME_SCRIPT_MODEL_NAME</ span > < span class ="o "> =</ span > Qwen3-VL-4B-Instruct< span class ="w "> </ span > ./examples/geo3k_vlm/run_geo3k_vlm.sh
511+
512+ < span class ="c1 "> # SFT</ span >
513+ ./examples/geo_3k_vlm/run_geo3k_vlm_sft.sh
478514</ pre > </ div >
479515</ div >
480516< section id ="configuration ">
@@ -578,6 +614,7 @@ <h2>B200<a class="headerlink" href="#b200" title="Link to this heading">#</a></h
578614 </ div >
579615 < nav class ="bd-toc-nav page-toc ">
580616 < ul class ="visible nav section-nav flex-column ">
617+ < li class ="toc-h2 nav-item toc-entry "> < a class ="reference internal nav-link " href ="#data-preparation-for-sft-training "> Data Preparation (For SFT Training)</ a > </ li >
581618< li class ="toc-h2 nav-item toc-entry "> < a class ="reference internal nav-link " href ="#reproduce "> Reproduce</ a > < ul class ="visible nav section-nav flex-column ">
582619< li class ="toc-h3 nav-item toc-entry "> < a class ="reference internal nav-link " href ="#configuration "> Configuration</ a > </ li >
583620< li class ="toc-h3 nav-item toc-entry "> < a class ="reference internal nav-link " href ="#supported-models "> Supported Models</ a > </ li >
@@ -622,7 +659,7 @@ <h2>B200<a class="headerlink" href="#b200" title="Link to this heading">#</a></h
622659
623660 < div class ="footer-item ">
624661 < p class ="last-updated ">
625- Last updated on Dec 22 , 2025.
662+ Last updated on Dec 24 , 2025.
626663 < br />
627664</ p >
628665 </ div >
0 commit comments