@@ -151,7 +151,6 @@ async fn extract_features_async(
151151 config : & ModelConfig , mut file : impl AsyncInputApi , file_len : usize ,
152152) -> Result < ( Vec < u8 > , Vec < i32 > ) > {
153153 debug_assert ! ( config. beg_size < config. block_size) ;
154- debug_assert ! ( config. mid_size < config. block_size) ;
155154 debug_assert ! ( config. end_size < config. block_size) ;
156155 let buffer_size = std:: cmp:: min ( config. block_size , file_len) ;
157156 let mut content_beg = vec ! [ 0 ; buffer_size] ;
@@ -160,31 +159,18 @@ async fn extract_features_async(
160159 let mut end = vec ! [ 0 ; buffer_size] ;
161160 file. read_at ( & mut end, file_len - buffer_size) . await ?;
162161 let end = strip_suffix ( & end) ;
163- let mid_len = std:: cmp:: min ( config. mid_size , file_len) ;
164- let mid_off = ( file_len - mid_len) / 2 ;
165- let mut mid = vec ! [ 0 ; mid_len] ;
166- file. read_at ( & mut mid, mid_off) . await ?;
167162 let mut features = vec ! [ config. padding_token; config. features_size( ) ] ;
168163 let split_features = config. split_features ( & mut features) ;
169164 copy_features ( split_features. beg , beg, 0 ) ;
170- copy_features ( split_features. mid , & mid, 1 ) ;
171- copy_features ( split_features. end , end, 2 ) ;
172- for ( offset, features) in split_features. off {
173- let mut buffer = Vec :: new ( ) ;
174- if offset + features. len ( ) <= file_len {
175- buffer = vec ! [ 0 ; features. len( ) ] ;
176- file. read_at ( & mut buffer, offset) . await ?;
177- }
178- copy_features ( features, & buffer, 0 ) ;
179- }
165+ copy_features ( split_features. end , end, 1 ) ;
180166 Ok ( ( content_beg, features) )
181167}
182168
183169fn copy_features ( dst : & mut [ i32 ] , src : & [ u8 ] , align : usize ) {
184170 let len = std:: cmp:: min ( dst. len ( ) , src. len ( ) ) ;
185171 let dst_len = dst. len ( ) ; // borrowing issue: cannot inline below
186- let dst = & mut dst[ ( dst_len - len) * align / 2 ..] [ ..len] ;
187- let src = & src[ ( src. len ( ) - len) * align / 2 ..] [ ..len] ;
172+ let dst = & mut dst[ ( dst_len - len) * align..] [ ..len] ;
173+ let src = & src[ ( src. len ( ) - len) * align..] [ ..len] ;
188174 for ( dst, src) in dst. iter_mut ( ) . zip ( src. iter ( ) ) {
189175 * dst = * src as i32 ;
190176 }
@@ -272,23 +258,23 @@ mod tests {
272258 GzDecoder :: new ( File :: open ( PATH ) . unwrap ( ) ) . read_to_string ( & mut tests) . unwrap ( ) ;
273259 let tests: Vec < Test > = serde_json:: from_str ( & tests) . unwrap ( ) ;
274260 for test in tests {
261+ assert_eq ! ( test. args. mid_size, 0 , "unsupported mid_size" ) ;
262+ assert ! ( !test. args. use_inputs_at_offsets, "unsupported use_inputs_at_offsets" ) ;
263+ assert ! ( test. features. mid. is_empty( ) , "unsupported mid" ) ;
264+ assert ! ( test. features. offset_0x8000_0x8007. is_empty( ) , "unsupported offset" ) ;
265+ assert ! ( test. features. offset_0x8800_0x8807. is_empty( ) , "unsupported offset" ) ;
266+ assert ! ( test. features. offset_0x9000_0x9007. is_empty( ) , "unsupported offset" ) ;
267+ assert ! ( test. features. offset_0x9800_0x9807. is_empty( ) , "unsupported offset" ) ;
275268 let config = ModelConfig {
276269 beg_size : test. args . beg_size ,
277- mid_size : test. args . mid_size ,
278270 end_size : test. args . end_size ,
279- use_inputs_at_offsets : test. args . use_inputs_at_offsets ,
280271 padding_token : test. args . padding_token ,
281272 block_size : test. args . block_size ,
282273 ..crate :: model:: CONFIG
283274 } ;
284275 let mut expected = Vec :: new ( ) ;
285276 expected. extend_from_slice ( & test. features . beg ) ;
286- expected. extend_from_slice ( & test. features . mid ) ;
287277 expected. extend_from_slice ( & test. features . end ) ;
288- expected. extend_from_slice ( & test. features . offset_0x8000_0x8007 ) ;
289- expected. extend_from_slice ( & test. features . offset_0x8800_0x8807 ) ;
290- expected. extend_from_slice ( & test. features . offset_0x9000_0x9007 ) ;
291- expected. extend_from_slice ( & test. features . offset_0x9800_0x9807 ) ;
292278 let content = BASE64 . decode ( test. content_base64 . as_bytes ( ) ) . unwrap ( ) ;
293279 let actual = extract_features_async ( & config, content. as_slice ( ) , content. len ( ) ) ;
294280 let actual = exec ( actual) . unwrap ( ) . 1 ;
0 commit comments