@@ -19,9 +19,11 @@ use tracing::{debug, info, warn};
1919/// Prevents OOM on extremely large C files.
2020const MAX_C_SOURCE_SIZE : usize = 5 * 1024 * 1024 ;
2121
22+ /// LOC threshold above which chunked translation is used.
23+ const MEDIUM_FILE_LOC : usize = 800 ;
2224/// LOC threshold above which repair iterations are reduced to save tokens.
2325const LARGE_FILE_LOC : usize = 1000 ;
24- /// LOC threshold above which repair iterations are further reduced.
26+ /// LOC threshold above which repair iterations are further reduced and larger chunk targets apply .
2527const VERY_LARGE_FILE_LOC : usize = 2000 ;
2628
2729use crate :: CoreError ;
@@ -163,7 +165,7 @@ fn preflight_budget_check(config: &MigrationConfig, c_source: &str, name: &str)
163165 // Repair (per iter): Rust source + C source + errors in, Rust out
164166 let repair_per_iter = c_tokens * 4 ;
165167 let c_lines = c_source. lines ( ) . count ( ) ;
166- let effective_iters = effective_repair_iterations ( config. max_repair_iterations , c_lines) ;
168+ let effective_iters = effective_repair_iterations ( config. max_repair_iterations , c_lines, false ) ;
167169 let repair_cost = repair_per_iter * effective_iters as u64 ;
168170 // Test gen: C + Rust in, tests out
169171 let test_gen_cost = c_tokens * 3 ;
@@ -191,11 +193,15 @@ fn preflight_budget_check(config: &MigrationConfig, c_source: &str, name: &str)
191193 }
192194}
193195
194- /// Compute effective max repair iterations based on file size.
196+ /// Compute effective max repair iterations based on file size and chunking .
195197///
196198/// Large files use fewer iterations to conserve tokens — each repair
197199/// iteration sends the full Rust + C source, which is expensive.
198- fn effective_repair_iterations ( configured_max : u32 , c_lines : usize ) -> u32 {
200+ /// Chunked translations get full iterations since each chunk is small.
201+ fn effective_repair_iterations ( configured_max : u32 , c_lines : usize , was_chunked : bool ) -> u32 {
202+ if was_chunked {
203+ return configured_max;
204+ }
199205 if c_lines > VERY_LARGE_FILE_LOC {
200206 configured_max. min ( 2 )
201207 } else if c_lines > LARGE_FILE_LOC {
@@ -618,12 +624,32 @@ pub async fn migrate_file(
618624 "calling translation agent"
619625 ) ;
620626 let c_lines_for_chunk = unit. c_source . lines ( ) . count ( ) ;
621- let rust_code = if c_lines_for_chunk > VERY_LARGE_FILE_LOC {
622- let chunks = noricum_tools:: ast:: chunk_c_source ( & unit. c_source , 500 ) ;
627+ let use_chunked = c_lines_for_chunk > MEDIUM_FILE_LOC ;
628+ let rust_code = if use_chunked {
629+ let chunk_target = if c_lines_for_chunk > VERY_LARGE_FILE_LOC { 500 } else { 400 } ;
630+ // Use structural chunking when data model patterns are detected
631+ let has_data_model = analysis. patterns . iter ( ) . any ( |p| {
632+ p. contains ( "struct" ) || p. contains ( "linked_list" ) || p. contains ( "recursive" )
633+ } ) ;
634+ let chunks = if has_data_model {
635+ let structural = noricum_tools:: ast:: chunk_c_source_structural ( & unit. c_source , chunk_target) ;
636+ if structural. len ( ) > 1 {
637+ info ! (
638+ function = %name,
639+ "using structural chunking (data model first)"
640+ ) ;
641+ structural
642+ } else {
643+ noricum_tools:: ast:: chunk_c_source ( & unit. c_source , chunk_target)
644+ }
645+ } else {
646+ noricum_tools:: ast:: chunk_c_source ( & unit. c_source , chunk_target)
647+ } ;
623648 info ! (
624649 function = %name,
625650 chunks = chunks. len( ) ,
626651 c_lines = c_lines_for_chunk,
652+ chunk_target,
627653 "using multi-pass chunked translation"
628654 ) ;
629655 match noricum_agents:: translation:: translate_chunked (
@@ -662,6 +688,54 @@ pub async fn migrate_file(
662688 }
663689 }
664690 } ;
691+
692+ // Quality gate: if initial translation has >5 unsafe blocks, re-translate
693+ let unsafe_count = noricum_tools:: ast:: count_unsafe_blocks_ast ( & rust_code) ;
694+ let rust_code = if unsafe_count > 5 {
695+ warn ! (
696+ function = %name,
697+ unsafe_count,
698+ "initial translation has too many unsafe blocks, re-translating with temperature 0.5"
699+ ) ;
700+ unit. metrics . llm_calls += 1 ;
701+ match noricum_agents:: translation:: translate_function_with_patterns_and_temperature (
702+ & client,
703+ & translation_model_sel. model ,
704+ & unit. c_source ,
705+ unit. c2rust_output . as_deref ( ) ,
706+ & analysis,
707+ & relevant_patterns,
708+ Some ( 0.5 ) ,
709+ )
710+ . await
711+ {
712+ Ok ( retranslated) => {
713+ let new_unsafe = noricum_tools:: ast:: count_unsafe_blocks_ast ( & retranslated) ;
714+ if new_unsafe < unsafe_count {
715+ info ! (
716+ function = %name,
717+ old_unsafe = unsafe_count,
718+ new_unsafe,
719+ "re-translation reduced unsafe blocks"
720+ ) ;
721+ retranslated
722+ } else {
723+ info ! (
724+ function = %name,
725+ "re-translation did not improve, keeping original"
726+ ) ;
727+ rust_code
728+ }
729+ }
730+ Err ( e) => {
731+ warn ! ( function = %name, error = %e, "re-translation failed, keeping original" ) ;
732+ rust_code
733+ }
734+ }
735+ } else {
736+ rust_code
737+ } ;
738+
665739 unit. rust_output = Some ( rust_code) ;
666740 unit. state = MigrationState :: Refined ;
667741 unit. metrics . translation_ms = translation_start. elapsed ( ) . as_millis ( ) as u64 ;
@@ -677,7 +751,8 @@ pub async fn migrate_file(
677751
678752 // --- Stage 6: Validate ---
679753 let c_lines = unit. c_source . lines ( ) . count ( ) ;
680- let max_iters = effective_repair_iterations ( config. max_repair_iterations , c_lines) ;
754+ let was_chunked = c_lines > MEDIUM_FILE_LOC ;
755+ let max_iters = effective_repair_iterations ( config. max_repair_iterations , c_lines, was_chunked) ;
681756
682757 let validation =
683758 noricum_validation:: validate_with_threshold ( & unit, config. min_idiomatic_score ) ?;
@@ -755,7 +830,8 @@ pub async fn migrate_file(
755830 ) ;
756831 }
757832
758- let repaired = noricum_agents:: repair:: repair_function_with_temperature (
833+ let c_abbrev_limit = if c_lines > 500 { Some ( 500 ) } else { None } ;
834+ let repaired = noricum_agents:: repair:: repair_function_full (
759835 & client,
760836 & repair_model_sel. model ,
761837 current_rust,
@@ -765,6 +841,7 @@ pub async fn migrate_file(
765841 iteration,
766842 max_iters,
767843 config. repair_base_temperature ,
844+ c_abbrev_limit,
768845 )
769846 . await ?;
770847
@@ -1233,34 +1310,54 @@ fn main() {
12331310
12341311 #[ test]
12351312 fn test_effective_small ( ) {
1236- assert_eq ! ( effective_repair_iterations( 5 , 500 ) , 5 ) ;
1313+ assert_eq ! ( effective_repair_iterations( 5 , 500 , false ) , 5 ) ;
12371314 }
12381315
12391316 #[ test]
12401317 fn test_effective_large ( ) {
1241- assert_eq ! ( effective_repair_iterations( 5 , 1500 ) , 3 ) ;
1318+ assert_eq ! ( effective_repair_iterations( 5 , 1500 , false ) , 3 ) ;
12421319 }
12431320
12441321 #[ test]
12451322 fn test_effective_very_large ( ) {
1246- assert_eq ! ( effective_repair_iterations( 5 , 3000 ) , 2 ) ;
1323+ assert_eq ! ( effective_repair_iterations( 5 , 3000 , false ) , 2 ) ;
12471324 }
12481325
12491326 #[ test]
12501327 fn test_effective_already_low ( ) {
1251- assert_eq ! ( effective_repair_iterations( 1 , 3000 ) , 1 ) ;
1328+ assert_eq ! ( effective_repair_iterations( 1 , 3000 , false ) , 1 ) ;
12521329 }
12531330
12541331 #[ test]
12551332 fn test_effective_boundary_1000 ( ) {
12561333 // 1000 is NOT > LARGE_FILE_LOC (1000), so no reduction
1257- assert_eq ! ( effective_repair_iterations( 5 , 1000 ) , 5 ) ;
1334+ assert_eq ! ( effective_repair_iterations( 5 , 1000 , false ) , 5 ) ;
12581335 }
12591336
12601337 #[ test]
12611338 fn test_effective_boundary_2001 ( ) {
12621339 // 2001 > VERY_LARGE_FILE_LOC (2000), so min(5, 2) = 2
1263- assert_eq ! ( effective_repair_iterations( 5 , 2001 ) , 2 ) ;
1340+ assert_eq ! ( effective_repair_iterations( 5 , 2001 , false ) , 2 ) ;
1341+ }
1342+
1343+ #[ test]
1344+ fn test_effective_chunked_gets_full_iterations ( ) {
1345+ // Chunked translations always get full configured iterations
1346+ assert_eq ! ( effective_repair_iterations( 5 , 1500 , true ) , 5 ) ;
1347+ assert_eq ! ( effective_repair_iterations( 5 , 3000 , true ) , 5 ) ;
1348+ assert_eq ! ( effective_repair_iterations( 3 , 5000 , true ) , 3 ) ;
1349+ }
1350+
1351+ #[ test]
1352+ fn test_medium_file_triggers_chunking ( ) {
1353+ // Files >800 LOC should trigger chunked translation
1354+ assert ! ( 800 < LARGE_FILE_LOC ) ;
1355+ assert ! ( MEDIUM_FILE_LOC == 800 ) ;
1356+ // 900 LOC > MEDIUM_FILE_LOC, so chunking is used
1357+ let c_lines = 900 ;
1358+ assert ! ( c_lines > MEDIUM_FILE_LOC ) ;
1359+ // But with was_chunked=true, full iterations are restored
1360+ assert_eq ! ( effective_repair_iterations( 5 , c_lines, true ) , 5 ) ;
12641361 }
12651362
12661363 #[ test]
0 commit comments