Skip to content

Commit 2e95a2c

Browse files
JPEG: Around ~3x faster to decode
1 parent 1e46b19 commit 2e95a2c

File tree

1 file changed

+31
-21
lines changed

1 file changed

+31
-21
lines changed

MCGalaxy/util/Imaging/JpegDecoder.cs

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -309,19 +309,18 @@ void DecodeMCUs(byte[] src, SimpleBitmap bmp) {
309309
for (int y = 0; y < BLOCK_SAMPLES; y++)
310310
for (int x = 0; x < BLOCK_SAMPLES; x++)
311311
{
312+
float sample = output[y * BLOCK_SAMPLES + x];
313+
312314
for (int py = 0; py < samplesY; py++)
313315
for (int px = 0; px < samplesX; px++)
314316
{
315317
int YY = (by * BLOCK_SAMPLES + y) * samplesY + py;
316318
int XX = (bx * BLOCK_SAMPLES + x) * samplesX + px;
317319
int idx = YY * mcu_w + XX;
318320

319-
if (i == 0)
320-
colors[idx].Y = output[y * BLOCK_SAMPLES + x];
321-
else if (i == 1)
322-
colors[idx].Cb = output[y * BLOCK_SAMPLES + x];
323-
else if (i == 2)
324-
colors[idx].Cr = output[y * BLOCK_SAMPLES + x];
321+
if (i == 0) colors[idx].Y = sample;
322+
else if (i == 1) colors[idx].Cb = sample;
323+
else if (i == 2) colors[idx].Cr = sample;
325324
}
326325
}
327326
}
@@ -401,7 +400,7 @@ void DecodeBlock(JpegComponent comp, byte[] src, int[] block, float[] output) {
401400

402401
float[] idct_factors;
403402
void ComputeIDCTFactors() {
404-
float[] factors = new float[128];
403+
float[] factors = new float[64];
405404

406405
for (int xy = 0; xy < 8; xy++)
407406
{
@@ -410,29 +409,40 @@ void ComputeIDCTFactors() {
410409
float cuv = uv == 0 ? 0.70710678f : 1.0f;
411410
float cosuv = (float)Math.Cos((2 * xy + 1) * uv * Math.PI / 16.0);
412411

413-
factors[(2 * xy + 1) * uv] = cuv * cosuv;
412+
factors[(xy * 8) + uv] = cuv * cosuv;
414413
}
415414
}
416415
idct_factors = factors;
417416
}
418417

419-
void IDCT(int[] block, float[] output) {
420-
float[] factors = idct_factors;
418+
unsafe void IDCT(int[] block, float[] output) {
419+
float[] factors = idct_factors;
420+
float* tmp = stackalloc float[BLOCK_SAMPLES * BLOCK_SAMPLES];
421421

422-
for (int y = 0; y < 8; y++)
423-
for (int x = 0; x < 8; x++)
422+
for (int col = 0; col < BLOCK_SAMPLES; col++)
424423
{
425-
float sum = 0.0f;
426-
for (int v = 0; v < 8; v++)
427-
for (int u = 0; u < 8; u++)
424+
for (int y = 0; y < BLOCK_SAMPLES; y++)
428425
{
429-
float suv = block[v*8+u];
430-
float cu_cosu = factors[(2 * x + 1) * u];
431-
float cv_cosv = factors[(2 * y + 1) * v];
432-
433-
sum += cu_cosu * cv_cosv * suv;
426+
float sum = 0.0f;
427+
for (int v = 0; v < BLOCK_SAMPLES; v++)
428+
{
429+
sum += block[v * 8 + col] * factors[(y * 8) + v];
430+
}
431+
tmp[y * 8 + col] = sum;
432+
}
433+
}
434+
435+
for (int row = 0; row < BLOCK_SAMPLES; row++)
436+
{
437+
for (int x = 0; x < BLOCK_SAMPLES; x++)
438+
{
439+
float sum = 0.0f;
440+
for (int u = 0; u < BLOCK_SAMPLES; u++)
441+
{
442+
sum += tmp[row * 8 + u] * factors[(x * 8) + u];
443+
}
444+
output[row * 8 + x] = (sum / 4.0f) + 128.0f; // undo level shift at end
434445
}
435-
output[y*8+x] = (sum / 4.0f) + 128.0f; // undo level shift at end
436446
}
437447
}
438448

0 commit comments

Comments
 (0)