forked from AviSynth/AviSynthPlus
-
Notifications
You must be signed in to change notification settings - Fork 23
Expand file tree
/
Copy pathconvert_avx.cpp
More file actions
121 lines (103 loc) · 5.86 KB
/
Copy pathconvert_avx.cpp
File metadata and controls
121 lines (103 loc) · 5.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
// Avisynth v2.5. Copyright 2002-2009 Ben Rudiak-Gould et al.
// http://www.avisynth.org
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA, or visit
// http://www.gnu.org/copyleft/gpl.html .
//
// Linking Avisynth statically or dynamically with other modules is making a
// combined work based on Avisynth. Thus, the terms and conditions of the GNU
// General Public License cover the whole combination.
//
// As a special exception, the copyright holders of Avisynth give you
// permission to link Avisynth with independent modules that communicate with
// Avisynth solely through the interfaces defined in avisynth.h, regardless of the license
// terms of these independent modules, and to copy and distribute the
// resulting combined work under terms of your choice, provided that
// every copy of the combined work is accompanied by a complete copy of
// the source code of Avisynth (the version of Avisynth used to produce the
// combined work), being distributed under the terms of the GNU General
// Public License plus this exception. An independent module is a module
// which is not derived from or based on Avisynth, such as 3rd-party filters,
// import and export plugins, or graphical user interfaces.
#include "convert.h"
#include "convert_planar.h"
#include "convert_rgb.h"
#include "convert_yv12.h"
#include "convert_yuy2.h"
#include <avs/alignment.h>
#include <avs/win.h>
#include <avs/minmax.h>
#include <emmintrin.h>
#include <immintrin.h>
#include <tuple>
#include <map>
#include "convert_avx.h"
template<typename pixel_t, uint8_t targetbits>
void convert_32_to_uintN_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range)
{
const float *srcp0 = reinterpret_cast<const float *>(srcp);
pixel_t *dstp0 = reinterpret_cast<pixel_t *>(dstp);
src_pitch = src_pitch / sizeof(float);
dst_pitch = dst_pitch / sizeof(pixel_t);
int src_width = src_rowsize / sizeof(float);
float max_dst_pixelvalue = (float)((1<<targetbits) - 1); // 255, 1023, 4095, 16383, 65535.0
float factor = 1.0f / float_range * max_dst_pixelvalue;
for(int y=0; y<src_height; y++)
{
for (int x = 0; x < src_width; x++)
{
float pixel = srcp0[x] * factor + 0.5f; // 0.5f: keep the neutral grey level of float 0.5
dstp0[x] = pixel_t(clamp(pixel, 0.0f, max_dst_pixelvalue)); // we clamp here!
}
dstp0 += dst_pitch;
srcp0 += src_pitch;
}
_mm256_zeroupper();
}
template void convert_32_to_uintN_c_avx<uint8_t, 8>(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range);
template void convert_32_to_uintN_c_avx<uint16_t, 10>(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range);
template void convert_32_to_uintN_c_avx<uint16_t, 12>(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range);
template void convert_32_to_uintN_c_avx<uint16_t, 14>(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range);
template void convert_32_to_uintN_c_avx<uint16_t, 16>(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range);
// YUV: bit shift 10-12-14-16 <=> 10-12-14-16 bits
// shift right or left, depending on expandrange template param
template<bool expandrange, uint8_t shiftbits>
void convert_uint16_to_uint16_c_avx(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range)
{
const uint16_t *srcp0 = reinterpret_cast<const uint16_t *>(srcp);
uint16_t *dstp0 = reinterpret_cast<uint16_t *>(dstp);
src_pitch = src_pitch / sizeof(uint16_t);
dst_pitch = dst_pitch / sizeof(uint16_t);
const int src_width = src_rowsize / sizeof(uint16_t);
for(int y=0; y<src_height; y++)
{
for (int x = 0; x < src_width; x++)
{
if(expandrange)
dstp0[x] = srcp0[x] << shiftbits; // expand range. No clamp before, source is assumed to have valid range
else
dstp0[x] = srcp0[x] >> shiftbits; // reduce range
}
dstp0 += dst_pitch;
srcp0 += src_pitch;
}
_mm256_zeroupper();
}
// instantiate them
template void convert_uint16_to_uint16_c_avx<false, 2>(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range);
template void convert_uint16_to_uint16_c_avx<false, 4>(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range);
template void convert_uint16_to_uint16_c_avx<false, 6>(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range);
template void convert_uint16_to_uint16_c_avx<true, 2>(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range);
template void convert_uint16_to_uint16_c_avx<true, 4>(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range);
template void convert_uint16_to_uint16_c_avx<true, 6>(const BYTE *srcp, BYTE *dstp, int src_rowsize, int src_height, int src_pitch, int dst_pitch, float float_range);