|
1 | 1 | use crate::distillers; |
2 | 2 | use crate::pipeline::toml_filter; |
3 | | -use crate::pipeline::{DistillResult, Route, SessionState, classifier, composer, scorer}; |
| 3 | +use crate::pipeline::{DistillResult, Route, SessionState, classifier, collapse, composer, scorer}; |
4 | 4 | use crate::store::sqlite::Store; |
5 | 5 | use serde::{Deserialize, Serialize}; |
6 | 6 | use std::sync::{Arc, Mutex}; |
@@ -117,19 +117,23 @@ pub fn process_payload( |
117 | 117 | // Fallback to Rust distiller pipeline |
118 | 118 | let ctype = classifier::classify(&content); |
119 | 119 |
|
| 120 | + // Pre-processing: collapse repetitive lines before scoring |
| 121 | + let collapse_result = collapse::collapse(&content, &ctype); |
| 122 | + let effective_input = collapse_result.collapsed_lines.join("\n"); |
| 123 | + |
120 | 124 | let scored_segments = if let Some(ref lock) = session { |
121 | 125 | if let Ok(state) = lock.lock() { |
122 | | - scorer::score_segments(&content, &ctype, Some(&*state)) |
| 126 | + scorer::score_segments(&effective_input, &ctype, Some(&*state)) |
123 | 127 | } else { |
124 | | - scorer::score_segments(&content, &ctype, None) |
| 128 | + scorer::score_segments(&effective_input, &ctype, None) |
125 | 129 | } |
126 | 130 | } else { |
127 | | - scorer::score_segments(&content, &ctype, None) |
| 131 | + scorer::score_segments(&effective_input, &ctype, None) |
128 | 132 | }; |
129 | 133 |
|
130 | 134 | let distiller = distillers::get_distiller(&ctype); |
131 | 135 | let active_ctype = distiller.content_type(); |
132 | | - let output = distiller.distill(&scored_segments, &content); |
| 136 | + let output = distiller.distill(&scored_segments, &effective_input); |
133 | 137 | (output, format!("{:?}", active_ctype), Some(active_ctype)) |
134 | 138 | }; |
135 | 139 |
|
@@ -220,6 +224,7 @@ pub fn process_payload( |
220 | 224 | }, |
221 | 225 | segments_kept: 0, |
222 | 226 | segments_dropped: 0, |
| 227 | + collapse_savings: None, |
223 | 228 | }; |
224 | 229 | let session_id = session |
225 | 230 | .as_ref() |
|
0 commit comments