@@ -743,26 +743,35 @@ blit_blend_rgb_overlay_avx2(SDL_BlitInfo *info)
743743
744744 const __m256i mm256_255 = _mm256_set1_epi16 (255 );
745745 const __m256i mm256_127 = _mm256_set1_epi16 (127 );
746+ const __m256i mm256_amask = _mm256_set1_epi32 (info -> dst -> Amask );
747+ const __m256i mm256_rgb_mask = _mm256_set1_epi32 (~info -> dst -> Amask );
746748
747- RUN_AVX2_BLITTER (RUN_16BIT_SHUFFLE_OUT (
748- /* src * dst */
749- __m256i multiply = _mm256_mullo_epi16 (shuff_src , shuff_dst );
750- /* divide by 127 */
751- multiply = _mm256_srli_epi16 (multiply , 7 );
752-
753- /* 255 - dst */
754- __m256i inverted_dst = _mm256_subs_epu16 (mm256_255 , shuff_dst );
755- /* 255 - src */
756- __m256i inverted_src = _mm256_subs_epu16 (mm256_255 , shuff_src );
757- /* dst * src */
758- __m256i screen = _mm256_mullo_epi16 (inverted_dst , inverted_src );
759- /* divide by 127 */
760- screen = _mm256_srli_epi16 (screen , 7 );
761- /* 255 - screen */
762- screen = _mm256_subs_epu16 (mm256_255 , screen );
763-
764- __m256i gt_127 = _mm256_cmpgt_epi16 (shuff_dst , mm256_127 );
765- shuff_dst = _mm256_blendv_epi8 (multiply , screen , gt_127 );))
749+ __m256i mm256_dst_alpha ;
750+
751+ RUN_AVX2_BLITTER (
752+ mm256_dst_alpha = _mm256_and_si256 (pixels_dst , mm256_amask );
753+ RUN_16BIT_SHUFFLE_OUT (
754+ /* src * dst */
755+ __m256i multiply = _mm256_mullo_epi16 (shuff_src , shuff_dst );
756+ /* divide by 127 */
757+ multiply = _mm256_srli_epi16 (multiply , 7 );
758+
759+ /* 255 - dst */
760+ __m256i inverted_dst = _mm256_sub_epi16 (mm256_255 , shuff_dst );
761+ /* 255 - src */
762+ __m256i inverted_src = _mm256_sub_epi16 (mm256_255 , shuff_src );
763+ /* dst * src */
764+ __m256i screen = _mm256_mullo_epi16 (inverted_dst , inverted_src );
765+ /* divide by 127 */
766+ screen = _mm256_srli_epi16 (screen , 7 );
767+ /* 255 - screen */
768+ screen = _mm256_sub_epi16 (mm256_255 , screen );
769+
770+ __m256i gt_127 = _mm256_cmpgt_epi16 (shuff_dst , mm256_127 );
771+ shuff_dst = _mm256_blendv_epi8 (multiply , screen , gt_127 );
772+
773+ ) pixels_dst = _mm256_and_si256 (pixels_dst , mm256_rgb_mask );
774+ pixels_dst = _mm256_or_si256 (pixels_dst , mm256_dst_alpha );)
766775}
767776#else
768777void
0 commit comments