@@ -15,13 +15,23 @@ use crate::Canvas;
15
15
16
16
/// A buffer for conversion.
17
17
pub struct Converter {
18
- /// How many texels to do at once.
18
+ /// How many super-blocks to do at once.
19
19
///
20
- /// Each entry in `in_slices` and `out_slices` except for the last has the size of `chunk`.
20
+ /// A super-texel is a unit determined by the shader which encompasses a whole number of input
21
+ /// and output blocks, i.e. a common multiple of both pixel counts.
21
22
chunk : usize ,
22
23
/// The number of chunks to do at once.
24
+ ///
25
+ /// Each chunk is one consecutive set of super-texels so discontinuities can occur from one
26
+ /// chunk to the next. That allows us to specialize the texel index and texel fetch code for
27
+ /// the most common texel index schemes that occur as a result.
23
28
chunk_count : usize ,
24
29
30
+ /// How many input texels are read in each super-block chunk.
31
+ chunk_per_fetch : usize ,
32
+ /// How many out texels are written in each super-block chunk.
33
+ chunk_per_write : usize ,
34
+
25
35
super_blocks : TexelBuffer < [ u32 ; 2 ] > ,
26
36
/// Buffer where we store input texels after reading them.
27
37
in_texels : TexelBuffer ,
@@ -185,9 +195,9 @@ struct IntShuffleOps {
185
195
struct SuperTexel {
186
196
blocks : Range < u32 > ,
187
197
/// In blocks per super block.
188
- in_super : u32 ,
198
+ in_per_super : u32 ,
189
199
/// Out blocks per super block.
190
- out_super : u32 ,
200
+ out_per_super : u32 ,
191
201
}
192
202
193
203
pub ( crate ) struct ChunkSpec < ' ch > {
@@ -201,6 +211,8 @@ impl Converter {
201
211
Converter {
202
212
chunk : 1024 ,
203
213
chunk_count : 1 ,
214
+ chunk_per_fetch : 0 ,
215
+ chunk_per_write : 0 ,
204
216
super_blocks : TexelBuffer :: default ( ) ,
205
217
in_texels : TexelBuffer :: default ( ) ,
206
218
in_coords : TexelBuffer :: default ( ) ,
@@ -327,9 +339,11 @@ impl Converter {
327
339
}
328
340
329
341
/// Special case on `convert_texelbuf_with_ops`, when both buffers:
330
- /// * utilize an expasion-roundtrip-safe color/bit combination
342
+ ///
343
+ /// * utilize an expansion-roundtrip-safe color/bit combination
331
344
/// * have the same bit depths on all channels
332
345
/// * do not require any color conversion between them
346
+ /// * as a consequence of these, have a common pixel-to-texel ratio of 1-to-1
333
347
///
334
348
/// This avoids expanding them into `pixel_in_buffer` where they'd be represented as `f32x4`
335
349
/// and thus undergo an expensive `u8->f32->u8` cast chain.
@@ -390,6 +404,16 @@ impl Converter {
390
404
) where
391
405
T : AsTexel ,
392
406
{
407
+ debug_assert_eq ! (
408
+ that. chunk, that. chunk_per_fetch,
409
+ "Inconsistent usage of channel shuffle, only applicable to matching texels"
410
+ ) ;
411
+
412
+ debug_assert_eq ! (
413
+ that. chunk, that. chunk_per_write,
414
+ "Inconsistent usage of channel shuffle, only applicable to matching texels"
415
+ ) ;
416
+
393
417
let in_texel = T :: texel ( ) . array :: < N > ( ) ;
394
418
let out_texel = T :: texel ( ) . array :: < M > ( ) ;
395
419
@@ -527,6 +551,10 @@ impl Converter {
527
551
frame_in : & Canvas ,
528
552
frame_out : & mut Canvas ,
529
553
) {
554
+ // We *must* make progress.
555
+ assert ! ( self . chunk > 0 ) ;
556
+ assert ! ( self . chunk_count > 0 ) ;
557
+
530
558
use core:: slice:: from_mut;
531
559
// We use a notion of 'supertexels', the common multiple of input and output texel blocks.
532
560
// That is, if the input is a 2-by-2 pixel block and the output is single pixels then we
@@ -536,6 +564,17 @@ impl Converter {
536
564
let ( sb_x, sb_y) = self . super_texel ( info) ;
537
565
let mut blocks = Self :: blocks ( sb_x. blocks . clone ( ) , sb_y. blocks . clone ( ) ) ;
538
566
567
+ assert ! ( sb_x. in_per_super > 0 ) ;
568
+ assert ! ( sb_x. in_per_super > 0 ) ;
569
+ assert ! ( sb_x. out_per_super > 0 ) ;
570
+ assert ! ( sb_y. out_per_super > 0 ) ;
571
+
572
+ self . chunk_per_fetch = self . chunk * ( sb_x. in_per_super * sb_y. in_per_super ) as usize ;
573
+ self . chunk_per_write = self . chunk * ( sb_x. out_per_super * sb_y. out_per_super ) as usize ;
574
+
575
+ assert ! ( self . chunk_per_fetch > 0 ) ;
576
+ assert ! ( self . chunk_per_write > 0 ) ;
577
+
539
578
loop {
540
579
let at_once = self . chunk * self . chunk_count ;
541
580
self . super_blocks . resize ( at_once) ;
@@ -566,6 +605,8 @@ impl Converter {
566
605
567
606
let super_width = core:: cmp:: max ( b0. width ( ) , b1. width ( ) ) ;
568
607
let super_height = core:: cmp:: max ( b0. height ( ) , b1. height ( ) ) ;
608
+
609
+ // All currently supported texels are a power-of-two.
569
610
assert ! ( super_width % b0. width( ) == 0 ) ;
570
611
assert ! ( super_width % b1. width( ) == 0 ) ;
571
612
assert ! ( super_height % b0. height( ) == 0 ) ;
@@ -579,13 +620,13 @@ impl Converter {
579
620
(
580
621
SuperTexel {
581
622
blocks : 0 ..sb_height,
582
- in_super : super_height / b0. height ( ) ,
583
- out_super : super_height / b1. height ( ) ,
623
+ in_per_super : super_height / b0. height ( ) ,
624
+ out_per_super : super_height / b1. height ( ) ,
584
625
} ,
585
626
SuperTexel {
586
627
blocks : 0 ..sb_width,
587
- in_super : super_width / b0. width ( ) ,
588
- out_super : super_width / b1. width ( ) ,
628
+ in_per_super : super_width / b0. width ( ) ,
629
+ out_per_super : super_width / b1. width ( ) ,
589
630
} ,
590
631
)
591
632
}
@@ -599,7 +640,7 @@ impl Converter {
599
640
sb_y : & SuperTexel ,
600
641
) {
601
642
fn is_trivial_super ( sup : & SuperTexel ) -> bool {
602
- sup. in_super == 1 && sup. out_super == 1
643
+ sup. in_per_super == 1 && sup. out_per_super == 1
603
644
}
604
645
605
646
self . in_coords . resize ( 0 ) ;
@@ -620,10 +661,10 @@ impl Converter {
620
661
. as_mut_slice ( )
621
662
. copy_from_slice ( & self . super_blocks ) ;
622
663
} else {
623
- let in_chunk_len = ( sb_x. in_super * sb_y. in_super ) as usize ;
664
+ let in_chunk_len = ( sb_x. in_per_super * sb_y. in_per_super ) as usize ;
624
665
self . in_coords
625
666
. resize ( self . super_blocks . len ( ) * in_chunk_len) ;
626
- let out_chunk_len = ( sb_x. out_super * sb_y. out_super ) as usize ;
667
+ let out_chunk_len = ( sb_x. out_per_super * sb_y. out_per_super ) as usize ;
627
668
self . out_coords
628
669
. resize ( self . super_blocks . len ( ) * out_chunk_len) ;
629
670
@@ -637,18 +678,18 @@ impl Converter {
637
678
. chunks_exact_mut ( out_chunk_len) ;
638
679
639
680
for & [ bx, by] in self . super_blocks . as_slice ( ) . iter ( ) {
640
- let ( sx, sy) = ( bx * sb_x. in_super , by * sb_y. in_super ) ;
681
+ let ( sx, sy) = ( bx * sb_x. in_per_super , by * sb_y. in_per_super ) ;
641
682
if let Some ( chunk) = in_chunks. next ( ) {
642
- Self :: blocks ( 0 ..sb_x. in_super , 0 ..sb_y. in_super ) ( chunk) ;
683
+ Self :: blocks ( 0 ..sb_x. in_per_super , 0 ..sb_y. in_per_super ) ( chunk) ;
643
684
for p in chunk. iter_mut ( ) {
644
685
let [ ix, iy] = * p;
645
686
* p = [ sx + ix, sy + iy] ;
646
687
}
647
688
}
648
689
649
- let ( sx, sy) = ( bx * sb_x. out_super , by * sb_y. out_super ) ;
690
+ let ( sx, sy) = ( bx * sb_x. out_per_super , by * sb_y. out_per_super ) ;
650
691
if let Some ( chunk) = out_chunks. next ( ) {
651
- Self :: blocks ( 0 ..sb_x. out_super , 0 ..sb_y. out_super ) ( chunk) ;
692
+ Self :: blocks ( 0 ..sb_x. out_per_super , 0 ..sb_y. out_per_super ) ( chunk) ;
652
693
for p in chunk. iter_mut ( ) {
653
694
let [ ox, oy] = * p;
654
695
* p = [ sx + ox, sy + oy] ;
@@ -665,13 +706,13 @@ impl Converter {
665
706
666
707
let in_chunk = ChunkSpec {
667
708
chunks : self . in_slices . as_mut_slice ( ) ,
668
- chunk_size : self . chunk ,
709
+ chunk_size : self . chunk_per_fetch ,
669
710
should_defer_texel_ops : converter. should_defer_texel_read ,
670
711
} ;
671
712
672
713
let out_chunk = ChunkSpec {
673
714
chunks : self . out_slices . as_mut_slice ( ) ,
674
- chunk_size : self . chunk ,
715
+ chunk_size : self . chunk_per_write ,
675
716
should_defer_texel_ops : converter. should_defer_texel_write ,
676
717
} ;
677
718
@@ -681,6 +722,7 @@ impl Converter {
681
722
& mut self . in_index_list ,
682
723
in_chunk,
683
724
) ;
725
+
684
726
( ops. fill_out_index ) (
685
727
& info,
686
728
self . out_coords . as_slice ( ) ,
@@ -778,8 +820,8 @@ impl Converter {
778
820
* `in_texels`.
779
821
*/
780
822
let chunks = self . in_slices . as_mut_slice ( ) ;
781
- let indexes = self . in_index_list . chunks ( self . chunk ) ;
782
- let range = ( 0 ..self . in_index_list . len ( ) ) . step_by ( self . chunk ) ;
823
+ let indexes = self . in_index_list . chunks ( self . chunk_per_fetch ) ;
824
+ let range = ( 0 ..self . in_index_list . len ( ) ) . step_by ( self . chunk_per_fetch ) ;
783
825
784
826
for ( chunk, ( indexes, start) ) in chunks. iter_mut ( ) . zip ( indexes. zip ( range) ) {
785
827
let [ _, available] = chunk;
@@ -805,7 +847,7 @@ impl Converter {
805
847
idx : & self . in_index_list ,
806
848
into : & mut self . in_texels ,
807
849
range : 0 ..self . in_index_list . len ( ) ,
808
- } )
850
+ } ) ;
809
851
}
810
852
}
811
853
@@ -831,6 +873,9 @@ impl Converter {
831
873
let texels = & from. as_texels ( texel) [ range] ;
832
874
let texel_slice = into. as_mut_texels ( texel) ;
833
875
876
+ // The index structure and used texel type should match.
877
+ debug_assert_eq ! ( idx. len( ) , texels. len( ) ) ;
878
+
834
879
for ( & index, from) in idx. zip ( texels) {
835
880
if let Some ( into) = texel_slice. get_mut ( index) {
836
881
* into = texel. copy_val ( from) ;
@@ -859,8 +904,8 @@ impl Converter {
859
904
* the `out_texels`.
860
905
*/
861
906
let chunks = self . out_slices . as_slice ( ) ;
862
- let indexes = self . out_index_list . chunks ( self . chunk ) ;
863
- let range = ( 0 ..self . out_index_list . len ( ) ) . step_by ( self . chunk ) ;
907
+ let indexes = self . out_index_list . chunks ( self . chunk_per_write ) ;
908
+ let range = ( 0 ..self . out_index_list . len ( ) ) . step_by ( self . chunk_per_write ) ;
864
909
865
910
for ( & chunk, ( indexes, start) ) in chunks. iter ( ) . zip ( indexes. zip ( range) ) {
866
911
let [ _, unwritten] = chunk;
@@ -1372,7 +1417,7 @@ impl CommonPixel {
1372
1417
// target buffer by chunks if this is available.
1373
1418
fn join_bits < const N : usize > (
1374
1419
info : & Info ,
1375
- ops : & ConvertOps ,
1420
+ _ : & ConvertOps ,
1376
1421
bits : [ [ FromBits ; 4 ] ; N ] ,
1377
1422
pixel_buf : & TexelBuffer ,
1378
1423
out_texels : & mut TexelBuffer ,
@@ -1413,6 +1458,8 @@ impl CommonPixel {
1413
1458
let texel_slice = self . out_texels . as_mut_texels ( texel) ;
1414
1459
let pixel_slice = self . pixel_buf . as_texels ( self . join . array :: < N > ( ) ) ;
1415
1460
1461
+ debug_assert_eq ! ( texel_slice. len( ) , pixel_slice. len( ) ) ;
1462
+
1416
1463
for ch in [ 0u8 , 1 , 2 , 3 ] {
1417
1464
for ( texbits, pixels) in texel_slice. iter_mut ( ) . zip ( pixel_slice) {
1418
1465
for ( pixel_bits, joined) in self . bits . iter ( ) . zip ( pixels) {
0 commit comments