diff --git a/cmake/MakeEmbed.cmake b/cmake/MakeEmbed.cmake new file mode 100644 index 0000000000..7af44f3819 --- /dev/null +++ b/cmake/MakeEmbed.cmake @@ -0,0 +1,4 @@ +file(READ ${INPUT_FILE} EMBED_CONTENTS) +file(WRITE ${OUTPUT_FILE} + "const char* k${VAR_NAME} = R\"(${EMBED_CONTENTS})\";\n" +) \ No newline at end of file diff --git a/src/ARMJIT_Memory.cpp b/src/ARMJIT_Memory.cpp index 09bad072fb..fa9026deab 100644 --- a/src/ARMJIT_Memory.cpp +++ b/src/ARMJIT_Memory.cpp @@ -1371,11 +1371,11 @@ void VRAMWrite(u32 addr, T val) { switch (addr & 0x00E00000) { - case 0x00000000: NDS::Current->GPU.WriteVRAM_ABG(addr, val); return; - case 0x00200000: NDS::Current->GPU.WriteVRAM_BBG(addr, val); return; - case 0x00400000: NDS::Current->GPU.WriteVRAM_AOBJ(addr, val); return; - case 0x00600000: NDS::Current->GPU.WriteVRAM_BOBJ(addr, val); return; - default: NDS::Current->GPU.WriteVRAM_LCDC(addr, val); return; + case 0x00000000: NDS::Current->GPU.SyncVRAM_ABG(addr, true); NDS::Current->GPU.WriteVRAM_ABG(addr, val); return; + case 0x00200000: NDS::Current->GPU.SyncVRAM_BBG(addr, true); NDS::Current->GPU.WriteVRAM_BBG(addr, val); return; + case 0x00400000: NDS::Current->GPU.SyncVRAM_AOBJ(addr, true); NDS::Current->GPU.WriteVRAM_AOBJ(addr, val); return; + case 0x00600000: NDS::Current->GPU.SyncVRAM_BOBJ(addr, true); NDS::Current->GPU.WriteVRAM_BOBJ(addr, val); return; + default: NDS::Current->GPU.SyncVRAM_LCDC(addr, true); NDS::Current->GPU.WriteVRAM_LCDC(addr, val); return; } } template @@ -1383,11 +1383,11 @@ T VRAMRead(u32 addr) { switch (addr & 0x00E00000) { - case 0x00000000: return NDS::Current->GPU.ReadVRAM_ABG(addr); - case 0x00200000: return NDS::Current->GPU.ReadVRAM_BBG(addr); - case 0x00400000: return NDS::Current->GPU.ReadVRAM_AOBJ(addr); - case 0x00600000: return NDS::Current->GPU.ReadVRAM_BOBJ(addr); - default: return NDS::Current->GPU.ReadVRAM_LCDC(addr); + case 0x00000000: NDS::Current->GPU.SyncVRAM_ABG(addr, false); return NDS::Current->GPU.ReadVRAM_ABG(addr); + case 0x00200000: NDS::Current->GPU.SyncVRAM_BBG(addr, false); return NDS::Current->GPU.ReadVRAM_BBG(addr); + case 0x00400000: NDS::Current->GPU.SyncVRAM_AOBJ(addr, false); return NDS::Current->GPU.ReadVRAM_AOBJ(addr); + case 0x00600000: NDS::Current->GPU.SyncVRAM_BOBJ(addr, false); return NDS::Current->GPU.ReadVRAM_BOBJ(addr); + default: NDS::Current->GPU.SyncVRAM_LCDC(addr, false); return NDS::Current->GPU.ReadVRAM_LCDC(addr); } } diff --git a/src/Args.h b/src/Args.h index 7a54175672..5dbcb2faa3 100644 --- a/src/Args.h +++ b/src/Args.h @@ -30,7 +30,7 @@ #include "DSi_NAND.h" #include "FATStorage.h" #include "FreeBIOS.h" -#include "GPU3D_Soft.h" +#include "GPU.h" #include "SPI_Firmware.h" #include "SPU.h" @@ -116,7 +116,7 @@ struct NDSArgs /// The 3D renderer to initialize the DS with. /// Defaults to the software renderer. /// Can be changed later at any time. - std::unique_ptr Renderer3D = std::make_unique(); + std::unique_ptr Renderer = nullptr; }; /// Arguments to pass into the DSi constructor. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e85dd42958..d2705bd162 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -34,6 +34,7 @@ add_library(core STATIC GBACart.cpp GBACartMotionPak.cpp GPU.cpp + GPU_Soft.cpp GPU2D.cpp GPU2D_Soft.cpp GPU3D.cpp @@ -87,14 +88,14 @@ if (ENABLE_GDBSTUB) endif() if (ENABLE_OGLRENDERER) + add_subdirectory(OpenGL_shaders) target_sources(core PRIVATE GPU_OpenGL.cpp - GPU_OpenGL_shaders.h + GPU2D_OpenGL.cpp GPU3D_OpenGL.cpp GPU3D_Compute.cpp GPU3D_TexcacheOpenGL.cpp GPU3D_TexcacheOpenGL.h - GPU3D_OpenGL_shaders.h OpenGLSupport.cpp) target_compile_definitions(core PUBLIC OGLRENDERER_ENABLED) diff --git a/src/DSi.cpp b/src/DSi.cpp index 5f511a168b..1bdeb57a40 100644 --- a/src/DSi.cpp +++ b/src/DSi.cpp @@ -1589,11 +1589,11 @@ void DSi::ARM9Write8(u32 addr, u8 val) JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr); switch (addr & 0x00E00000) { - case 0x00000000: GPU.WriteVRAM_ABG(addr, val); return; - case 0x00200000: GPU.WriteVRAM_BBG(addr, val); return; - case 0x00400000: GPU.WriteVRAM_AOBJ(addr, val); return; - case 0x00600000: GPU.WriteVRAM_BOBJ(addr, val); return; - default: GPU.WriteVRAM_LCDC(addr, val); return; + case 0x00000000: GPU.SyncVRAM_ABG(addr, true); GPU.WriteVRAM_ABG(addr, val); return; + case 0x00200000: GPU.SyncVRAM_BBG(addr, true); GPU.WriteVRAM_BBG(addr, val); return; + case 0x00400000: GPU.SyncVRAM_AOBJ(addr, true); GPU.WriteVRAM_AOBJ(addr, val); return; + case 0x00600000: GPU.SyncVRAM_BOBJ(addr, true); GPU.WriteVRAM_BOBJ(addr, val); return; + default: GPU.SyncVRAM_LCDC(addr, true); GPU.WriteVRAM_LCDC(addr, val); return; } case 0x08000000: diff --git a/src/GPU.cpp b/src/GPU.cpp index e1c7ddc4f1..b01f213814 100644 --- a/src/GPU.cpp +++ b/src/GPU.cpp @@ -22,8 +22,7 @@ #include "ARMJIT.h" -#include "GPU2D_Soft.h" -#include "GPU3D.h" +#include "GPU_Soft.h" namespace melonDS { @@ -41,6 +40,15 @@ enum LCD_FinishFrame, }; +// flags for VRAM blocks that can serve for display captures +// each block is 32K, thus each of banks A/B/C/D contains 4 blocks +enum +{ + CBFlag_IsCapture = (1<<15), // the contents of this block are a display capture + CBFlag_Complete = (1<<14), // this block contains a complete capture (not in progress) + CBFlag_Synced = (1<<13), // this block has been synced back to emulated VRAM +}; + /* VRAM invalidation tracking @@ -63,22 +71,21 @@ enum VRAMDirty need to be reset for the respective VRAM bank. */ -GPU::GPU(melonDS::NDS& nds, std::unique_ptr&& renderer3d, std::unique_ptr&& renderer2d) noexcept : +GPU::GPU(melonDS::NDS& nds, std::unique_ptr&& renderer) noexcept : NDS(nds), GPU2D_A(0, *this), GPU2D_B(1, *this), - GPU3D(nds, renderer3d ? std::move(renderer3d) : std::make_unique()), - GPU2D_Renderer(renderer2d ? std::move(renderer2d) : std::make_unique(*this)) + GPU3D(*this) { NDS.RegisterEventFuncs(Event_LCD, this, { - MakeEventThunk(GPU, StartHBlank), - MakeEventThunk(GPU, StartScanline), - MakeEventThunk(GPU, FinishFrame) + MakeEventThunk(GPU, StartHBlank), + MakeEventThunk(GPU, StartScanline), + MakeEventThunk(GPU, FinishFrame) }); NDS.RegisterEventFuncs(Event_DisplayFIFO, this, {MakeEventThunk(GPU, DisplayFIFO)}); - InitFramebuffers(); + SetRenderer(std::move(renderer)); } GPU::~GPU() noexcept @@ -119,8 +126,12 @@ void GPU::ResetVRAMCache() noexcept void GPU::Reset() noexcept { + ScreensEnabled = false; + ScreenSwap = false; + VCount = 0; - NextVCount = -1; + VCountOverride = false; + NextVCount = 0; TotalScanlines = 0; DispStat[0] = 0; @@ -167,58 +178,49 @@ void GPU::Reset() noexcept memset(VRAMPtr_BBG, 0, sizeof(VRAMPtr_BBG)); memset(VRAMPtr_BOBJ, 0, sizeof(VRAMPtr_BOBJ)); - size_t fbsize; - if (GPU3D.IsRendererAccelerated()) - fbsize = (256*3 + 1) * 192; - else - fbsize = 256 * 192; + memset(VRAMCaptureBlockFlags, 0, sizeof(VRAMCaptureBlockFlags)); - for (size_t i = 0; i < fbsize; i++) - { - Framebuffer[0][0][i] = 0xFFFFFFFF; - Framebuffer[1][0][i] = 0xFFFFFFFF; - } - for (size_t i = 0; i < fbsize; i++) - { - Framebuffer[0][1][i] = 0xFFFFFFFF; - Framebuffer[1][1][i] = 0xFFFFFFFF; - } + memset(VRAMCBF_ABG, 0, sizeof(VRAMCBF_ABG)); + memset(VRAMCBF_AOBJ, 0, sizeof(VRAMCBF_AOBJ)); + memset(VRAMCBF_BBG, 0, sizeof(VRAMCBF_BBG)); + memset(VRAMCBF_BOBJ, 0, sizeof(VRAMCBF_BOBJ)); GPU2D_A.Reset(); GPU2D_B.Reset(); GPU3D.Reset(); - int backbuf = FrontBuffer ? 0 : 1; - GPU2D_Renderer->SetFramebuffer(Framebuffer[backbuf][1].get(), Framebuffer[backbuf][0].get()); + Rend->Reset(); ResetVRAMCache(); OAMDirty = 0x3; - PaletteDirty = 0xF; + PaletteDirty = 0x5F; } void GPU::Stop() noexcept { - int fbsize; - if (GPU3D.IsRendererAccelerated()) - fbsize = (256*3 + 1) * 192; - else - fbsize = 256 * 192; - - memset(Framebuffer[0][0].get(), 0, fbsize*4); - memset(Framebuffer[0][1].get(), 0, fbsize*4); - memset(Framebuffer[1][0].get(), 0, fbsize*4); - memset(Framebuffer[1][1].get(), 0, fbsize*4); - - GPU3D.Stop(*this); + Rend->Stop(); } void GPU::DoSavestate(Savestate* file) noexcept { file->Section("GPUG"); + Rend->PreSavestate(); + + if (file->Saving) + { + SyncAllVRAMCaptures(); + } + + memset(VRAMCaptureBlockFlags, 0, sizeof(VRAMCaptureBlockFlags)); + + file->VarBool(&ScreensEnabled); + file->VarBool(&ScreenSwap); + file->Var16(&VCount); - file->Var32(&NextVCount); + file->VarBool(&VCountOverride); + file->Var16(&NextVCount); file->Var16(&TotalScanlines); file->Var16(&DispStat[0]); @@ -226,6 +228,17 @@ void GPU::DoSavestate(Savestate* file) noexcept file->Var16(&VMatch[0]); file->Var16(&VMatch[1]); + file->VarArray(DispFIFO, sizeof(DispFIFO)); + file->Var8(&DispFIFOReadPtr); + file->Var8(&DispFIFOWritePtr); + file->VarArray(DispFIFOBuffer, sizeof(DispFIFOBuffer)); + + file->Var16(&MasterBrightnessA); + file->Var16(&MasterBrightnessB); + + file->Var32(&CaptureCnt); + file->VarBool(&CaptureEnable); + file->VarArray(Palette, 2*1024); file->VarArray(OAM, 2*1024); @@ -263,13 +276,25 @@ void GPU::DoSavestate(Savestate* file) noexcept if (!file->Saving) { for (int i = 0; i < 0x20; i++) + { VRAMPtr_ABG[i] = GetUniqueBankPtr(VRAMMap_ABG[i], i << 14); + VRAMCBF_ABG[i] = GetUniqueBankCBF(VRAMMap_ABG[i], i); + } for (int i = 0; i < 0x10; i++) + { VRAMPtr_AOBJ[i] = GetUniqueBankPtr(VRAMMap_AOBJ[i], i << 14); + VRAMCBF_AOBJ[i] = GetUniqueBankCBF(VRAMMap_AOBJ[i], i); + } for (int i = 0; i < 0x8; i++) + { VRAMPtr_BBG[i] = GetUniqueBankPtr(VRAMMap_BBG[i], i << 14); + VRAMCBF_BBG[i] = GetUniqueBankCBF(VRAMMap_BBG[i], i); + } for (int i = 0; i < 0x8; i++) + { VRAMPtr_BOBJ[i] = GetUniqueBankPtr(VRAMMap_BOBJ[i], i << 14); + VRAMCBF_BOBJ[i] = GetUniqueBankCBF(VRAMMap_BOBJ[i], i); + } } GPU2D_A.DoSavestate(file); @@ -277,51 +302,219 @@ void GPU::DoSavestate(Savestate* file) noexcept GPU3D.DoSavestate(file); if (!file->Saving) + { ResetVRAMCache(); + OAMDirty = 0x3; + PaletteDirty = 0x5F; + } + + Rend->PostSavestate(); } -void GPU::AssignFramebuffers() noexcept + +void GPU::SetRenderer(std::unique_ptr&& renderer) noexcept { - int backbuf = FrontBuffer ? 0 : 1; - if (NDS.PowerControl9 & (1<<15)) + SyncAllVRAMCaptures(); + + bool good = false; + if (renderer) { - GPU2D_Renderer->SetFramebuffer(Framebuffer[backbuf][0].get(), Framebuffer[backbuf][1].get()); + Rend = std::move(renderer); + if (Rend->Init()) + { + Rend->Reset(); + good = true; + } + else + { + // TODO: report error to platform + } } - else + + if (!good) { - GPU2D_Renderer->SetFramebuffer(Framebuffer[backbuf][1].get(), Framebuffer[backbuf][0].get()); + Rend = std::make_unique(NDS); + Rend->Init(); + Rend->Reset(); } + + ResetVRAMCache(); + OAMDirty = 0x3; + PaletteDirty = 0x5F; } -void GPU::SetRenderer3D(std::unique_ptr&& renderer) noexcept + +bool GPU::GetFramebuffers(void** top, void** bottom) { - if (renderer == nullptr) - GPU3D.SetCurrentRenderer(std::make_unique()); + return Rend->GetFramebuffers(top, bottom); +} + + +u8 GPU::Read8(u32 addr) +{ + u16 ret = Read16(addr & ~0x1); + if (addr & 0x1) + return ret >> 8; else - GPU3D.SetCurrentRenderer(std::move(renderer)); + return ret & 0xFF; +} - InitFramebuffers(); +u16 GPU::Read16(u32 addr) +{ + switch (addr) + { + case 0x04000064: return CaptureCnt & 0xFFFF; + case 0x04000066: return CaptureCnt >> 16; + + case 0x0400006C: return MasterBrightnessA; + case 0x0400106C: return MasterBrightnessB; + } + + Log(LogLevel::Debug, "unknown GPU read16 %08X\n", addr); + return 0; } -void GPU::InitFramebuffers() noexcept +u32 GPU::Read32(u32 addr) { - int fbsize; - if (GPU3D.IsRendererAccelerated()) - fbsize = (256*3 + 1) * 192; - else - fbsize = 256 * 192; + switch (addr) + { + case 0x04000064: return CaptureCnt; + + case 0x0400006C: return MasterBrightnessA; + case 0x0400106C: return MasterBrightnessB; + } + + Log(LogLevel::Debug, "unknown GPU read32 %08X\n", addr); + return 0; +} + +void GPU::Write8(u32 addr, u8 val) +{ + switch (addr) + { + case 0x04000004: + SetDispStat(0, val, 0x00FF); + return; + case 0x04000005: + SetDispStat(0, val << 8, 0xFF00); + return; + case 0x04000006: + SetVCount(val, 0x00FF); + return; + case 0x04000007: + SetVCount(val << 8, 0xFF00); + return; + + case 0x04000064: + CaptureCnt = (CaptureCnt & 0xFFFFFF00) | (val & 0x1F); + return; + case 0x04000065: + CaptureCnt = (CaptureCnt & 0xFFFF00FF) | ((val & 0x1F) << 8); + return; + case 0x04000066: + CaptureCnt = (CaptureCnt & 0xFF00FFFF) | ((val & 0x3F) << 16); + return; + case 0x04000067: + CaptureCnt = (CaptureCnt & 0x00FFFFFF) | ((val & 0xEF) << 24); + return; + + case 0x04000068: + DispFIFO[DispFIFOWritePtr] = val * 0x0101; + return; + case 0x04000069: + return; + case 0x0400006A: + DispFIFO[DispFIFOWritePtr+1] = val * 0x0101; + return; + case 0x0400006B: + DispFIFOWritePtr += 2; + DispFIFOWritePtr &= 0xF; + return; + + case 0x0400006C: + MasterBrightnessA = (MasterBrightnessA & 0xFF00) | (val & 0x1F); + return; + case 0x0400006D: + MasterBrightnessA = (MasterBrightnessA & 0x00FF) | ((val & 0xC0) << 8); + return; + case 0x0400106C: + MasterBrightnessB = (MasterBrightnessB & 0xFF00) | (val & 0x1F); + return; + case 0x0400106D: + MasterBrightnessB = (MasterBrightnessB & 0x00FF) | ((val & 0xC0) << 8); + return; + } + + Log(LogLevel::Debug, "unknown GPU write8 %08X %02X\n", addr, val); +} - Framebuffer[0][0] = std::make_unique(fbsize); - Framebuffer[1][0] = std::make_unique(fbsize); - Framebuffer[0][1] = std::make_unique(fbsize); - Framebuffer[1][1] = std::make_unique(fbsize); +void GPU::Write16(u32 addr, u16 val) +{ + switch (addr) + { + case 0x04000004: + SetDispStat(0, val, 0xFFFF); + return; + case 0x04000006: + SetVCount(val, 0xFFFF); + return; + + case 0x04000064: + CaptureCnt = (CaptureCnt & 0xFFFF0000) | (val & 0x1F1F); + return; + case 0x04000066: + CaptureCnt = (CaptureCnt & 0xFFFF) | ((val & 0xEF3F) << 16); + return; + + case 0x04000068: + DispFIFO[DispFIFOWritePtr] = val; + return; + case 0x0400006A: + DispFIFO[DispFIFOWritePtr+1] = val; + DispFIFOWritePtr += 2; + DispFIFOWritePtr &= 0xF; + return; + + case 0x0400006C: + MasterBrightnessA = val & 0xC01F; + return; + case 0x0400106C: + MasterBrightnessB = val & 0xC01F; + return; + } - memset(Framebuffer[0][0].get(), 0, fbsize*4); - memset(Framebuffer[1][0].get(), 0, fbsize*4); - memset(Framebuffer[0][1].get(), 0, fbsize*4); - memset(Framebuffer[1][1].get(), 0, fbsize*4); + Log(LogLevel::Debug, "unknown GPU write16 %08X %04X\n", addr, val); +} - AssignFramebuffers(); +void GPU::Write32(u32 addr, u32 val) +{ + switch (addr) + { + case 0x04000004: + SetDispStat(0, val & 0xFFFF, 0xFFFF); + SetVCount(val >> 16, 0xFFFF); + return; + + case 0x04000064: + CaptureCnt = val & 0xEF3F1F1F; + return; + + case 0x04000068: + DispFIFO[DispFIFOWritePtr] = val & 0xFFFF; + DispFIFO[DispFIFOWritePtr+1] = val >> 16; + DispFIFOWritePtr += 2; + DispFIFOWritePtr &= 0xF; + return; + + case 0x0400006C: + MasterBrightnessA = val & 0xC01F; + return; + case 0x0400106C: + MasterBrightnessB = val & 0xC01F; + return; + } + + Log(LogLevel::Debug, "unknown GPU write32 %08X %08X\n", addr, val); } @@ -369,6 +562,16 @@ const u8* GPU::GetUniqueBankPtr(u32 mask, u32 offset) const noexcept return &VRAM[num][offset & VRAMMask[num]]; } +u16* GPU::GetUniqueBankCBF(u32 mask, u32 offset) +{ + //mask &= 0xF; + if (!mask || (mask & (mask - 1)) != 0) return nullptr; + if (mask & 0x1F0) return nullptr; + int num = __builtin_ctz(mask); + offset = (offset >> 1) & 0x3; + return &VRAMCaptureBlockFlags[(num << 2) | offset]; +} + #define MAP_RANGE(map, base, n) for (int i = 0; i < n; i++) VRAMMap_##map[(base)+i] |= bankmask; #define UNMAP_RANGE(map, base, n) for (int i = 0; i < n; i++) VRAMMap_##map[(base)+i] &= ~bankmask; @@ -377,6 +580,9 @@ const u8* GPU::GetUniqueBankPtr(u32 mask, u32 offset) const noexcept #define UNMAP_RANGE_PTR(map, base, n) \ for (int i = 0; i < n; i++) { VRAMMap_##map[(base)+i] &= ~bankmask; VRAMPtr_##map[(base)+i] = GetUniqueBankPtr(VRAMMap_##map[(base)+i], ((base)+i)<<14); } +#define SET_RANGE_CBF(map, base) \ + for (int i = 0; i < 8; i++) { VRAMCBF_##map[(base)+i] = GetUniqueBankCBF(VRAMMap_##map[(base)+i], ((base)+i)); } + void GPU::MapVRAM_AB(u32 bank, u8 cnt) noexcept { cnt &= 0x9B; @@ -400,11 +606,13 @@ void GPU::MapVRAM_AB(u32 bank, u8 cnt) noexcept case 1: // ABG UNMAP_RANGE_PTR(ABG, oldofs<<3, 8); + SET_RANGE_CBF(ABG, oldofs<<3); break; case 2: // AOBJ oldofs &= 0x1; UNMAP_RANGE_PTR(AOBJ, oldofs<<3, 8); + SET_RANGE_CBF(AOBJ, oldofs<<3); break; case 3: // texture @@ -423,11 +631,13 @@ void GPU::MapVRAM_AB(u32 bank, u8 cnt) noexcept case 1: // ABG MAP_RANGE_PTR(ABG, ofs<<3, 8); + SET_RANGE_CBF(ABG, ofs<<3); break; case 2: // AOBJ ofs &= 0x1; MAP_RANGE_PTR(AOBJ, ofs<<3, 8); + SET_RANGE_CBF(AOBJ, ofs<<3); break; case 3: // texture @@ -462,6 +672,7 @@ void GPU::MapVRAM_CD(u32 bank, u8 cnt) noexcept case 1: // ABG UNMAP_RANGE_PTR(ABG, oldofs<<3, 8); + SET_RANGE_CBF(ABG, oldofs<<3); break; case 2: // ARM7 VRAM @@ -477,10 +688,12 @@ void GPU::MapVRAM_CD(u32 bank, u8 cnt) noexcept if (bank == 2) { UNMAP_RANGE_PTR(BBG, 0, 8); + SET_RANGE_CBF(BBG, 0); } else { UNMAP_RANGE_PTR(BOBJ, 0, 8); + SET_RANGE_CBF(BOBJ, 0); } break; } @@ -496,6 +709,7 @@ void GPU::MapVRAM_CD(u32 bank, u8 cnt) noexcept case 1: // ABG MAP_RANGE_PTR(ABG, ofs<<3, 8); + SET_RANGE_CBF(ABG, ofs<<3); break; case 2: // ARM7 VRAM @@ -514,14 +728,18 @@ void GPU::MapVRAM_CD(u32 bank, u8 cnt) noexcept if (bank == 2) { MAP_RANGE_PTR(BBG, 0, 8); + SET_RANGE_CBF(BBG, 0); } else { MAP_RANGE_PTR(BOBJ, 0, 8); + SET_RANGE_CBF(BOBJ, 0); } break; } } + + // TODO sync capture blocks if we get mapped to ARM7? } void GPU::MapVRAM_E(u32 bank, u8 cnt) noexcept @@ -545,10 +763,12 @@ void GPU::MapVRAM_E(u32 bank, u8 cnt) noexcept case 1: // ABG UNMAP_RANGE_PTR(ABG, 0, 4); + SET_RANGE_CBF(ABG, 0); break; case 2: // AOBJ UNMAP_RANGE_PTR(AOBJ, 0, 4); + SET_RANGE_CBF(AOBJ, 0); break; case 3: // texture palette @@ -571,10 +791,12 @@ void GPU::MapVRAM_E(u32 bank, u8 cnt) noexcept case 1: // ABG MAP_RANGE_PTR(ABG, 0, 4); + SET_RANGE_CBF(ABG, 0); break; case 2: // AOBJ MAP_RANGE_PTR(AOBJ, 0, 4); + SET_RANGE_CBF(AOBJ, 0); break; case 3: // texture palette @@ -616,6 +838,8 @@ void GPU::MapVRAM_FG(u32 bank, u8 cnt) noexcept VRAMMap_ABG[base + 2] &= ~bankmask; VRAMPtr_ABG[base] = GetUniqueBankPtr(VRAMMap_ABG[base], base << 14); VRAMPtr_ABG[base + 2] = GetUniqueBankPtr(VRAMMap_ABG[base + 2], (base + 2) << 14); + VRAMCBF_ABG[base] = GetUniqueBankCBF(VRAMMap_ABG[base], base); + VRAMCBF_ABG[base + 2] = GetUniqueBankCBF(VRAMMap_ABG[base + 2], base + 2); } break; @@ -626,6 +850,8 @@ void GPU::MapVRAM_FG(u32 bank, u8 cnt) noexcept VRAMMap_AOBJ[base + 2] &= ~bankmask; VRAMPtr_AOBJ[base] = GetUniqueBankPtr(VRAMMap_AOBJ[base], base << 14); VRAMPtr_AOBJ[base + 2] = GetUniqueBankPtr(VRAMMap_AOBJ[base + 2], (base + 2) << 14); + VRAMCBF_AOBJ[base] = GetUniqueBankCBF(VRAMMap_AOBJ[base], base); + VRAMCBF_AOBJ[base + 2] = GetUniqueBankCBF(VRAMMap_AOBJ[base + 2], base + 2); } break; @@ -659,6 +885,8 @@ void GPU::MapVRAM_FG(u32 bank, u8 cnt) noexcept VRAMMap_ABG[base + 2] |= bankmask; VRAMPtr_ABG[base] = GetUniqueBankPtr(VRAMMap_ABG[base], base << 14); VRAMPtr_ABG[base + 2] = GetUniqueBankPtr(VRAMMap_ABG[base + 2], (base + 2) << 14); + VRAMCBF_ABG[base] = GetUniqueBankCBF(VRAMMap_ABG[base], base); + VRAMCBF_ABG[base + 2] = GetUniqueBankCBF(VRAMMap_ABG[base + 2], base + 2); } break; @@ -669,6 +897,8 @@ void GPU::MapVRAM_FG(u32 bank, u8 cnt) noexcept VRAMMap_AOBJ[base + 2] |= bankmask; VRAMPtr_AOBJ[base] = GetUniqueBankPtr(VRAMMap_AOBJ[base], base << 14); VRAMPtr_AOBJ[base + 2] = GetUniqueBankPtr(VRAMMap_AOBJ[base + 2], (base + 2) << 14); + VRAMCBF_AOBJ[base] = GetUniqueBankCBF(VRAMMap_AOBJ[base], base); + VRAMCBF_AOBJ[base + 2] = GetUniqueBankCBF(VRAMMap_AOBJ[base + 2], base + 2); } break; @@ -708,14 +938,12 @@ void GPU::MapVRAM_H(u32 bank, u8 cnt) noexcept break; case 1: // BBG - VRAMMap_BBG[0] &= ~bankmask; - VRAMMap_BBG[1] &= ~bankmask; - VRAMMap_BBG[4] &= ~bankmask; - VRAMMap_BBG[5] &= ~bankmask; - VRAMPtr_BBG[0] = GetUniqueBankPtr(VRAMMap_BBG[0], 0 << 14); - VRAMPtr_BBG[1] = GetUniqueBankPtr(VRAMMap_BBG[1], 1 << 14); - VRAMPtr_BBG[4] = GetUniqueBankPtr(VRAMMap_BBG[4], 4 << 14); - VRAMPtr_BBG[5] = GetUniqueBankPtr(VRAMMap_BBG[5], 5 << 14); + for (int i : {0, 1, 4, 5}) + { + VRAMMap_BBG[i] &= ~bankmask; + VRAMPtr_BBG[i] = GetUniqueBankPtr(VRAMMap_BBG[i], i << 14); + VRAMCBF_BBG[i] = GetUniqueBankCBF(VRAMMap_BBG[i], i); + } break; case 2: // BBG ext palette @@ -733,14 +961,12 @@ void GPU::MapVRAM_H(u32 bank, u8 cnt) noexcept break; case 1: // BBG - VRAMMap_BBG[0] |= bankmask; - VRAMMap_BBG[1] |= bankmask; - VRAMMap_BBG[4] |= bankmask; - VRAMMap_BBG[5] |= bankmask; - VRAMPtr_BBG[0] = GetUniqueBankPtr(VRAMMap_BBG[0], 0 << 14); - VRAMPtr_BBG[1] = GetUniqueBankPtr(VRAMMap_BBG[1], 1 << 14); - VRAMPtr_BBG[4] = GetUniqueBankPtr(VRAMMap_BBG[4], 4 << 14); - VRAMPtr_BBG[5] = GetUniqueBankPtr(VRAMMap_BBG[5], 5 << 14); + for (int i : {0, 1, 4, 5}) + { + VRAMMap_BBG[i] |= bankmask; + VRAMPtr_BBG[i] = GetUniqueBankPtr(VRAMMap_BBG[i], i << 14); + VRAMCBF_BBG[i] = GetUniqueBankCBF(VRAMMap_BBG[i], i); + } break; case 2: // BBG ext palette @@ -770,18 +996,17 @@ void GPU::MapVRAM_I(u32 bank, u8 cnt) noexcept break; case 1: // BBG - VRAMMap_BBG[2] &= ~bankmask; - VRAMMap_BBG[3] &= ~bankmask; - VRAMMap_BBG[6] &= ~bankmask; - VRAMMap_BBG[7] &= ~bankmask; - VRAMPtr_BBG[2] = GetUniqueBankPtr(VRAMMap_BBG[2], 2 << 14); - VRAMPtr_BBG[3] = GetUniqueBankPtr(VRAMMap_BBG[3], 3 << 14); - VRAMPtr_BBG[6] = GetUniqueBankPtr(VRAMMap_BBG[6], 6 << 14); - VRAMPtr_BBG[7] = GetUniqueBankPtr(VRAMMap_BBG[7], 7 << 14); + for (int i : {2, 3, 6, 7}) + { + VRAMMap_BBG[i] &= ~bankmask; + VRAMPtr_BBG[i] = GetUniqueBankPtr(VRAMMap_BBG[i], i << 14); + VRAMCBF_BBG[i] = GetUniqueBankCBF(VRAMMap_BBG[i], i); + } break; case 2: // BOBJ UNMAP_RANGE_PTR(BOBJ, 0, 8); + SET_RANGE_CBF(BOBJ, 0); break; case 3: // BOBJ ext palette @@ -799,18 +1024,17 @@ void GPU::MapVRAM_I(u32 bank, u8 cnt) noexcept break; case 1: // BBG - VRAMMap_BBG[2] |= bankmask; - VRAMMap_BBG[3] |= bankmask; - VRAMMap_BBG[6] |= bankmask; - VRAMMap_BBG[7] |= bankmask; - VRAMPtr_BBG[2] = GetUniqueBankPtr(VRAMMap_BBG[2], 2 << 14); - VRAMPtr_BBG[3] = GetUniqueBankPtr(VRAMMap_BBG[3], 3 << 14); - VRAMPtr_BBG[6] = GetUniqueBankPtr(VRAMMap_BBG[6], 6 << 14); - VRAMPtr_BBG[7] = GetUniqueBankPtr(VRAMMap_BBG[7], 7 << 14); + for (int i : {2, 3, 6, 7}) + { + VRAMMap_BBG[i] |= bankmask; + VRAMPtr_BBG[i] = GetUniqueBankPtr(VRAMMap_BBG[i], i << 14); + VRAMCBF_BBG[i] = GetUniqueBankCBF(VRAMMap_BBG[i], i); + } break; case 2: // BOBJ MAP_RANGE_PTR(BOBJ, 0, 8); + SET_RANGE_CBF(BOBJ, 0); break; case 3: // BOBJ ext palette @@ -831,16 +1055,52 @@ void GPU::SetPowerCnt(u32 val) noexcept // * bit9: disables engine B palette, OAM and rendering (screen turns white) // * bit15: screen swap - if (!(val & (1<<0))) Log(LogLevel::Warn, "!!! CLEARING POWCNT BIT0. DANGER\n"); - GPU2D_A.SetEnabled(val & (1<<1)); GPU2D_B.SetEnabled(val & (1<<9)); GPU3D.SetEnabled(val & (1<<3), val & (1<<2)); - AssignFramebuffers(); + ScreenSwap = !!(val & (1<<15)); +} + + +void GPU::SetDispStatIRQ(int cpu, int num) +{ + u16 irqmask = (1 << num); + u16 enablemask = (1 << (num+3)); + + // DISPSTAT IRQs are edge-triggered + // if the flag was already set, no IRQ will be triggered + if (DispStat[cpu] & irqmask) + return; + + DispStat[cpu] |= irqmask; + if (DispStat[cpu] & enablemask) + NDS.SetIRQ(cpu, num); } +bool GPU::UsesDisplayFIFO() +{ + if (((GPU2D_A.DispCnt >> 16) & 0x3) == 3) + return true; + if ((CaptureCnt & (1<<25)) && ((CaptureCnt >> 29) & 0x3) != 0) + return true; + + return false; +} + +void GPU::SampleDisplayFIFO(u32 offset, u32 num) +{ + for (u32 i = 0; i < num; i++) + { + u16 val = DispFIFO[DispFIFOReadPtr]; + DispFIFOReadPtr++; + DispFIFOReadPtr &= 0xF; + + DispFIFOBuffer[offset+i] = val; + } +} + void GPU::DisplayFIFO(u32 x) noexcept { // sample the FIFO @@ -849,9 +1109,9 @@ void GPU::DisplayFIFO(u32 x) noexcept if (x > 0) { if (x == 8) - GPU2D_A.SampleFIFO(0, 5); + SampleDisplayFIFO(0, 5); else - GPU2D_A.SampleFIFO(x-11, 8); + SampleDisplayFIFO(x-11, 8); } if (x < 256) @@ -861,15 +1121,18 @@ void GPU::DisplayFIFO(u32 x) noexcept NDS.ScheduleEvent(Event_DisplayFIFO, true, 6*8, 0, x+8); } else - GPU2D_A.SampleFIFO(253, 3); // sample the remaining pixels + SampleDisplayFIFO(253, 3); // sample the remaining pixels } + void GPU::StartFrame() noexcept { + ScreensEnabled = !!(NDS.PowerControl9 & (1<<0)); + // only run the display FIFO if needed: // * if it is used for display or capture // * if we have display FIFO DMA - RunFIFO = GPU2D_A.UsesFIFO() || NDS.DMAsInMode(0, 0x04); + RunFIFO = UsesDisplayFIFO() || NDS.DMAsInMode(0, 0x04); TotalScanlines = 0; StartScanline(0); @@ -880,61 +1143,61 @@ void GPU::StartHBlank(u32 line) noexcept DispStat[0] |= (1<<1); DispStat[1] |= (1<<1); + bool resetregs = (VCount == 262); + + // note: this should be done around 48 cycles after the scanline start + GPU2D_A.UpdateRegistersPreDraw(resetregs); + GPU2D_B.UpdateRegistersPreDraw(resetregs); + if (VCount < 192) { // draw // note: this should start 48 cycles after the scanline start if (line < 192) - { - GPU2D_Renderer->DrawScanline(line, &GPU2D_A); - GPU2D_Renderer->DrawScanline(line, &GPU2D_B); - } - - // sprites are pre-rendered one scanline in advance + Rend->DrawScanline(line); if (line < 191) - { - GPU2D_Renderer->DrawSprites(line+1, &GPU2D_A); - GPU2D_Renderer->DrawSprites(line+1, &GPU2D_B); - } + Rend->DrawSprites(line+1); NDS.CheckDMAs(0, 0x02); } else if (VCount == 215) { - GPU3D.VCount215(*this); + Rend->Start3DRendering(); } else if (VCount == 262) { - GPU2D_Renderer->DrawSprites(0, &GPU2D_A); - GPU2D_Renderer->DrawSprites(0, &GPU2D_B); + // sprites are pre-rendered one scanline in advance + Rend->DrawSprites(0); } + GPU2D_A.UpdateRegistersPostDraw(resetregs); + GPU2D_B.UpdateRegistersPostDraw(resetregs); + if (DispStat[0] & (1<<4)) NDS.SetIRQ(0, IRQ_HBlank); if (DispStat[1] & (1<<4)) NDS.SetIRQ(1, IRQ_HBlank); - if (VCount < 262) - NDS.ScheduleEvent(Event_LCD, true, (LINE_CYCLES - HBLANK_CYCLES), LCD_StartScanline, line+1); - else + if ((VCount == 262) || (VCount == 511)) NDS.ScheduleEvent(Event_LCD, true, (LINE_CYCLES - HBLANK_CYCLES), LCD_FinishFrame, line+1); + else + NDS.ScheduleEvent(Event_LCD, true, (LINE_CYCLES - HBLANK_CYCLES), LCD_StartScanline, line+1); } void GPU::FinishFrame(u32 lines) noexcept { - FrontBuffer = FrontBuffer ? 0 : 1; - AssignFramebuffers(); + Rend->SwapBuffers(); TotalScanlines = lines; if (GPU3D.AbortFrame) { - GPU3D.RestartFrame(*this); + Rend->Restart3DRendering(); GPU3D.AbortFrame = false; } } void GPU::BlankFrame() noexcept { - int backbuf = FrontBuffer ? 0 : 1; + /*int backbuf = FrontBuffer ? 0 : 1; int fbsize; if (GPU3D.IsRendererAccelerated()) fbsize = (256*3 + 1) * 192; @@ -945,128 +1208,187 @@ void GPU::BlankFrame() noexcept memset(Framebuffer[backbuf][1].get(), 0, fbsize*4); FrontBuffer = backbuf; - AssignFramebuffers(); + AssignFramebuffers();*/ + // TODO do it on the renderer!! TotalScanlines = 263; } void GPU::StartScanline(u32 line) noexcept { - if (line == 0) - VCount = 0; - else if (NextVCount != 0xFFFFFFFF) - VCount = NextVCount; - else - VCount++; - - NextVCount = -1; + /* + * order of operations on hardware: + * 1. VCount is incremented + * 2. things are done based on the new value (ie. 262 is when the DISPSTAT VBlank flag is cleared) + * 3. if VCOUNT was written to, the new value is applied + * 4. VMatch is checked + * + * if VCount is set to 263 or more, it will count all the way to 511 and wrap around + * if the 261->262 transition is skipped, the VBlank flag remains set (until the end of the next frame) + * -> this suppresses the next VBlank IRQ + * likely, skipping 191->192 behaves similarly + * + * ultimately, messing with VCount can cause a lot of weird shit, seeing as VCount controls + * a lot of the renderer logic and the LCD sync signals. + * certain VCount transitions can cause odd effects such as LCDs fading out. + */ + + // clear HBlank flags DispStat[0] &= ~(1<<1); DispStat[1] &= ~(1<<1); - if (VCount == VMatch[0]) - { - DispStat[0] |= (1<<2); + // update hardware status - if (DispStat[0] & (1<<5)) NDS.SetIRQ(0, IRQ_VCount); - } + if (line == 0) + VCount = 0; else - DispStat[0] &= ~(1<<2); - - if (VCount == VMatch[1]) { - DispStat[1] |= (1<<2); - - if (DispStat[1] & (1<<5)) NDS.SetIRQ(1, IRQ_VCount); + VCount++; + VCount &= 0x1FF; } - else - DispStat[1] &= ~(1<<2); - GPU2D_A.CheckWindows(VCount); - GPU2D_B.CheckWindows(VCount); + GPU2D_A.UpdateWindows(VCount); + GPU2D_B.UpdateWindows(VCount); if (VCount >= 2 && VCount < 194) NDS.CheckDMAs(0, 0x03); else if (VCount == 194) NDS.StopDMAs(0, 0x03); - if (line < 192) + if ((VCount < 192) && RunFIFO) + NDS.ScheduleEvent(Event_DisplayFIFO, false, 32, 0, 0); + + if (VCount == 0) { - if (line == 0) + if (CaptureCnt & (1<<31)) { - GPU2D_Renderer->VBlankEnd(&GPU2D_A, &GPU2D_B); - GPU2D_A.VBlankEnd(); - GPU2D_B.VBlankEnd(); + CaptureEnable = true; + CheckCaptureStart(); } - - if (RunFIFO) - NDS.ScheduleEvent(Event_DisplayFIFO, false, 32, 0, 0); } + else if (VCount == 192) + { + // VBlank + + SetDispStatIRQ(0, 0); + SetDispStatIRQ(1, 0); + + if (CaptureEnable) + CheckCaptureEnd(); + + DispFIFOReadPtr = 0; + DispFIFOWritePtr = 0; - if (VCount == 262) + // in reality rendering already finishes at line 144 + // and games might already start to modify texture memory. + // That doesn't matter for us because we cache the entire + // texture memory anyway and only update it before the start + // of the next frame. + // So we can give the rasteriser a bit more headroom + Rend->Finish3DRendering(); + + DispStat[0] |= (1<<0); + DispStat[1] |= (1<<0); + + NDS.StopDMAs(0, 0x04); + + NDS.CheckDMAs(0, 0x01); + NDS.CheckDMAs(1, 0x11); + + GPU3D.VBlank(); + + Rend->VBlank(); + + if (CaptureEnable) + { + CaptureCnt &= ~(1<<31); + CaptureEnable = false; + } + } + else if (VCount == 262) { - // frame end + // VBlank end DispStat[0] &= ~(1<<0); DispStat[1] &= ~(1<<0); } - else + + // if VCount was written to during the previous scanline, apply the new value + + if (VCountOverride) { - if (VCount == 192) - { - // in reality rendering already finishes at line 144 - // and games might already start to modify texture memory. - // That doesn't matter for us because we cache the entire - // texture memory anyway and only update it before the start - //of the next frame. - // So we can give the rasteriser a bit more headroom - GPU3D.VCount144(*this); + VCount = NextVCount; + VCountOverride = false; + } + + // check for VCount match - // VBlank - DispStat[0] |= (1<<0); - DispStat[1] |= (1<<0); + if (VCount == VMatch[0]) + SetDispStatIRQ(0, 2); + else + DispStat[0] &= ~(1<<2); - NDS.StopDMAs(0, 0x04); + if (VCount == VMatch[1]) + SetDispStatIRQ(1, 2); + else + DispStat[1] &= ~(1<<2); + + NDS.ScheduleEvent(Event_LCD, true, HBLANK_CYCLES, LCD_StartHBlank, line); +} - NDS.CheckDMAs(0, 0x01); - NDS.CheckDMAs(1, 0x11); - if (DispStat[0] & (1<<3)) NDS.SetIRQ(0, IRQ_VBlank); - if (DispStat[1] & (1<<3)) NDS.SetIRQ(1, IRQ_VBlank); +void GPU::Restart3DFrame() noexcept +{ + Rend->Restart3DRendering(); +} - GPU2D_A.VBlank(); - GPU2D_B.VBlank(); - GPU3D.VBlank(); - // Need a better way to identify the openGL renderer in particular - if (GPU3D.IsRendererAccelerated()) - GPU3D.Blit(*this); - } +/*void GPU::UpdateRegisters(u32 line) +{ + if (line == 0) + { + if (CaptureCnt & (1<<31)) + CaptureEnable = true; + } + else if (line == 192) + { + CaptureCnt &= ~(1<<31); + CaptureEnable = false; } - NDS.ScheduleEvent(Event_LCD, true, HBLANK_CYCLES, LCD_StartHBlank, line); -} + GPU2D_A.UpdateRegisters(line); + GPU2D_B.UpdateRegisters(line); +}*/ -void GPU::SetDispStat(u32 cpu, u16 val) noexcept +void GPU::SetDispStat(u32 cpu, u16 val, u16 mask) noexcept { - val &= 0xFFB8; - DispStat[cpu] &= 0x0047; + const u16 ro_mask = 0x0047; + + val &= (mask & ~ro_mask); + DispStat[cpu] &= (~mask | ro_mask); DispStat[cpu] |= val; - VMatch[cpu] = (val >> 8) | ((val & 0x80) << 1); + VMatch[cpu] = (DispStat[cpu] >> 8) | ((DispStat[cpu] & 0x80) << 1); } -void GPU::SetVCount(u16 val) noexcept +void GPU::SetVCount(u16 val, u16 mask) noexcept { + // the VCount register is 9 bits wide + val &= mask & 0x1FF; + // VCount write is delayed until the next scanline // TODO: how does the 3D engine react to VCount writes while it's rendering? // 3D engine seems to give up on the current frame in that situation, repeating the last two scanlines // TODO: also check the various DMA types that can be involved - GPU3D.AbortFrame |= NextVCount != val; - NextVCount = val; + u16 nextvc = (NextVCount & ~mask) | (val & mask); + + GPU3D.AbortFrame |= NextVCount != nextvc; + NextVCount = nextvc; + VCountOverride = true; } template @@ -1186,4 +1508,236 @@ bool GPU::MakeVRAMFlat_BOBJExtPalCoherent(NonStupidBitField<8*1024/VRAMDirtyGran { return CopyLinearVRAM<8*1024>(VRAMFlat_BOBJExtPal, &VRAMMap_BOBJExtPal, dirty, &GPU::ReadVRAM_BOBJExtPal); } + + +void GPU::VRAMCBFlagsSet(u32 bank, u32 block, u16 val) +{ + u16* cbflags = &VRAMCaptureBlockFlags[bank << 2]; + u32 start = block;//val & 0x3; + u32 len = (val >> 4) & 0x3; + + u32 b = start; + for (u32 i = 0; i < len; i++) + { + cbflags[b] = val; + b = (b + 1) & 0x3; + } +} + +void GPU::VRAMCBFlagsClear(u32 bank, u32 block) +{ + u16* cbflags = &VRAMCaptureBlockFlags[bank << 2]; + u16 flags = cbflags[block]; + u32 start = flags & 0x3; + u32 len = (flags >> 4) & 0x3; + + u32 b = start; + for (u32 i = 0; i < len; i++) + { + cbflags[b] = 0; + b = (b + 1) & 0x3; + } +} + +void GPU::VRAMCBFlagsOr(u32 bank, u32 block, u16 val) +{ + u16* cbflags = &VRAMCaptureBlockFlags[bank << 2]; + u16 flags = cbflags[block]; + u32 start = flags & 0x3; + u32 len = (flags >> 4) & 0x3; + + u32 b = start; + for (u32 i = 0; i < len; i++) + { + cbflags[b] |= val; + b = (b + 1) & 0x3; + } +} + +void GPU::CheckCaptureStart() +{ + u32 dstbank = (CaptureCnt >> 16) & 0x3; + if (!(VRAMMap_LCDC & (1<> 18) & 0x3; + u32 size = (CaptureCnt >> 20) & 0x3; + u32 len = (size == 0) ? 1 : size; + + // if needed, invalidate old captures + u16* cbflags = &VRAMCaptureBlockFlags[dstbank << 2]; + u32 b = dstoff; + for (u32 i = 0; i < len; i++) + { + u16 oldflags = cbflags[b]; + b = (b + 1) & 0x3; + + if (!(oldflags & CBFlag_IsCapture)) + continue; + + u32 oldstart = oldflags & 0x3; + u32 oldsize = (oldflags >> 6) & 0x3; + if (oldstart == dstoff && oldsize == size) + continue; + + // we have an old capture here, and it was at a different offset/size + // sync it and invalidate it + + Rend->SyncVRAMCapture(dstbank, oldstart, oldsize, (oldflags & CBFlag_Complete)); + VRAMCBFlagsClear(dstbank, oldstart); + } + + // mark involved VRAM blocks as being a new capture + u16 newval = CBFlag_IsCapture | dstoff | (dstbank << 2) | (len << 4) | (size << 6); + VRAMCBFlagsSet(dstbank, dstoff, newval); + Rend->AllocCapture(dstbank, dstoff, size); +} + +void GPU::CheckCaptureEnd() +{ + // mark this capture as complete + // TODO this will break if they change CaptureCnt during a capture + u32 dstbank = (CaptureCnt >> 16) & 0x3; + u32 dstoff = (CaptureCnt >> 18) & 0x3; + u32 size = (CaptureCnt >> 20) & 0x3; + + u16 flags = VRAMCaptureBlockFlags[(dstbank << 2) | dstoff]; + if (!(flags & CBFlag_IsCapture)) + return; + + u32 oldstart = flags & 0x3; + u32 oldsize = (flags >> 6) & 0x3; + if (dstoff != oldstart || size != oldsize) + return; + + VRAMCBFlagsOr(dstbank, dstoff, CBFlag_Complete); +} + +void GPU::SyncVRAMCaptureBlock(u32 block, bool write) +{ + u16 flags = VRAMCaptureBlockFlags[block]; + if (!(flags & CBFlag_IsCapture)) return; + + // sync the capture which contains this block + u32 bank = block >> 2; + u32 start = flags & 0x3; + u32 len = (flags >> 6) & 0x3; + + if (flags & CBFlag_Synced) + { + if (write) + VRAMCBFlagsClear(bank, start); + return; + } + + Rend->SyncVRAMCapture(bank, start, len, (flags & CBFlag_Complete)); + + if (write) + { + // if this block was written to by the CPU, invalidate the entire capture + // the renderer will need to use the emulated VRAM contents + VRAMCBFlagsClear(bank, start); + } + else + { + // if this block was simply read by the CPU, we just need to mark it as synced + VRAMCBFlagsOr(bank, start, CBFlag_Synced); + } +} + +void GPU::SyncAllVRAMCaptures() +{ + for (u32 b = 0; b < 16; b++) + { + u16 flags = VRAMCaptureBlockFlags[b]; + if (!(flags & CBFlag_IsCapture)) + continue; + if (flags & CBFlag_Synced) + continue; + + u32 bank = b >> 2; + u32 start = flags & 0x3; + u32 len = (flags >> 6) & 0x3; + + Rend->SyncVRAMCapture(bank, start, len, (flags & CBFlag_Complete)); + VRAMCBFlagsClear(bank, start); + } +} + +int GPU::GetCaptureBlock_LCDC(u32 offset) +{ + u16 flags = VRAMCaptureBlockFlags[offset >> 15]; + //return (flags & CBFlag_IsCapture); + if (flags & CBFlag_IsCapture) + return ((offset >> 15) & 0xC) | (flags & 0x3); + return -1; +} + +void GPU::GetCaptureInfo(int* info, u16** cbf, int len) +{ + for (int b = 0; b < len; b++) + { + u16* ptr = cbf[b]; + if (!ptr) + { + info[b] = -1; + continue; + } + + u16 flags = *ptr; + if (flags & CBFlag_IsCapture) + info[b] = flags & 0xF; + else + info[b] = -1; + } +} + +void GPU::GetCaptureInfo_ABG(int* info) +{ + return GetCaptureInfo(info, VRAMCBF_ABG, 32); +} + +void GPU::GetCaptureInfo_AOBJ(int* info) +{ + return GetCaptureInfo(info, VRAMCBF_AOBJ, 16); +} + +void GPU::GetCaptureInfo_BBG(int* info) +{ + return GetCaptureInfo(info, VRAMCBF_BBG, 8); +} + +void GPU::GetCaptureInfo_BOBJ(int* info) +{ + return GetCaptureInfo(info, VRAMCBF_BOBJ, 8); +} + +void GPU::GetCaptureInfo_Texture(int* info) +{ + for (int b = 0; b < 16; b++) + { + int bank = b >> 2; + int subblock = b & 0x3; + u32 mask = VRAMMap_Texture[bank]; + u16 cbf = 0; + + // check the bank mask + // for now we don't bother with overlapping banks + // this may change if a game happens to do this + if (mask == (1<<0)) + cbf = VRAMCaptureBlockFlags[(0<<2) | subblock]; + else if (mask == (1<<1)) + cbf = VRAMCaptureBlockFlags[(1<<2) | subblock]; + else if (mask == (1<<2)) + cbf = VRAMCaptureBlockFlags[(2<<2) | subblock]; + else if (mask == (1<<3)) + cbf = VRAMCaptureBlockFlags[(3<<2) | subblock]; + + if (cbf & CBFlag_IsCapture) + info[b] = cbf & 0xF; + else + info[b] = -1; + } +} + } diff --git a/src/GPU.h b/src/GPU.h index fd5ad82091..ed593afdcc 100644 --- a/src/GPU.h +++ b/src/GPU.h @@ -52,29 +52,37 @@ struct VRAMTrackingSet NonStupidBitField DeriveState(const u32* currentMappings, GPU& gpu); }; +class Renderer; + class GPU { public: - explicit GPU(melonDS::NDS& nds, std::unique_ptr&& renderer3d = nullptr, std::unique_ptr&& renderer2d = nullptr) noexcept; + explicit GPU(melonDS::NDS& nds, std::unique_ptr&& renderer = nullptr) noexcept; ~GPU() noexcept; void Reset() noexcept; void Stop() noexcept; void DoSavestate(Savestate* file) noexcept; - /// Sets the active renderer to the renderer given in the provided pointer. - /// The pointer is moved-from, so it will be \c nullptr after this method is called. - /// If the pointer is \c nullptr, the renderer is reset to the default renderer. - void SetRenderer3D(std::unique_ptr&& renderer) noexcept; - [[nodiscard]] const Renderer3D& GetRenderer3D() const noexcept { return GPU3D.GetCurrentRenderer(); } - [[nodiscard]] Renderer3D& GetRenderer3D() noexcept { return GPU3D.GetCurrentRenderer(); } + void SetRenderer(std::unique_ptr&& renderer) noexcept; + const Renderer& GetRenderer() const noexcept { return *Rend; } + Renderer& GetRenderer() noexcept { return *Rend; } + + // return value for GetFramebuffers: + // true -> pointers to RAM framebuffers are returned via the parameters + // false -> this renderer doesn't use RAM framebuffers + // - values are renderer-specific (ie. OpenGL texture handle) + bool GetFramebuffers(void** top, void** bottom); u8* GetUniqueBankPtr(u32 mask, u32 offset) noexcept; const u8* GetUniqueBankPtr(u32 mask, u32 offset) const noexcept; - void SetRenderer2D(std::unique_ptr&& renderer) noexcept { GPU2D_Renderer = std::move(renderer); } - [[nodiscard]] const GPU2D::Renderer2D& GetRenderer2D() const noexcept { return *GPU2D_Renderer; } - [[nodiscard]] GPU2D::Renderer2D& GetRenderer2D() noexcept { return *GPU2D_Renderer; } + u8 Read8(u32 addr); + u16 Read16(u32 addr); + u32 Read32(u32 addr); + void Write8(u32 addr, u8 val); + void Write16(u32 addr, u16 val); + void Write32(u32 addr, u32 val); void MapVRAM_AB(u32 bank, u8 cnt) noexcept; void MapVRAM_CD(u32 bank, u8 cnt) noexcept; @@ -83,6 +91,68 @@ class GPU void MapVRAM_H(u32 bank, u8 cnt) noexcept; void MapVRAM_I(u32 bank, u8 cnt) noexcept; + /* + VRAM syncing code for display capture blocks + + The software renderer will write display captures straight to VRAM, making this unnecessary. + However, hardware-accelerated renderers may want to keep display captures in GPU memory unless + it is necessary to read them back. This syncing system assists with that. + + Those checks are limited to banks A..D, since those are the only ones that can be used for + display capture. + + TODO: make checks more efficient + */ + + void SyncVRAM_LCDC(u32 addr, bool write) + { + u32 bank = (addr >> 17) & 0x7; + if (bank >= 4) return; + + if (VRAMMap_LCDC & (1<> 15) & 0xF, write); + } + + void SyncVRAM_ABG(u32 addr, bool write) + { + u32 mask = VRAMMap_ABG[(addr >> 14) & 0x1F]; + addr = (addr >> 15) & 0x3; + if (mask & (1<<0)) SyncVRAMCaptureBlock((0<<2) | addr, write); + if (mask & (1<<1)) SyncVRAMCaptureBlock((1<<2) | addr, write); + if (mask & (1<<2)) SyncVRAMCaptureBlock((2<<2) | addr, write); + if (mask & (1<<3)) SyncVRAMCaptureBlock((3<<2) | addr, write); + } + + void SyncVRAM_AOBJ(u32 addr, bool write) + { + u32 mask = VRAMMap_AOBJ[(addr >> 14) & 0xF]; + addr = (addr >> 15) & 0x3; + if (mask & (1<<0)) SyncVRAMCaptureBlock((0<<2) | addr, write); + if (mask & (1<<1)) SyncVRAMCaptureBlock((1<<2) | addr, write); + } + + void SyncVRAM_BBG(u32 addr, bool write) + { + u32 mask = VRAMMap_BBG[(addr >> 14) & 0x7]; + addr = (addr >> 15) & 0x3; + if (mask & (1<<2)) SyncVRAMCaptureBlock((2<<2) | addr, write); + } + + void SyncVRAM_BOBJ(u32 addr, bool write) + { + u32 mask = VRAMMap_BOBJ[(addr >> 14) & 0x7]; + addr = (addr >> 15) & 0x3; + if (mask & (1<<3)) SyncVRAMCaptureBlock((3<<2) | addr, write); + } + + int GetCaptureBlock_LCDC(u32 offset); + + void GetCaptureInfo_ABG(int* info); + void GetCaptureInfo_AOBJ(int* info); + void GetCaptureInfo_BBG(int* info); + void GetCaptureInfo_BOBJ(int* info); + void GetCaptureInfo_Texture(int* info); + template T ReadVRAM_LCDC(u32 addr) const noexcept { @@ -481,7 +551,10 @@ class GPU addr &= 0x7FF; *(T*)&Palette[addr] = val; - PaletteDirty |= 1 << (addr / VRAMDirtyGranularity); + if (addr & 0x3FE) + PaletteDirty |= 1 << (addr / VRAMDirtyGranularity); + else + PaletteDirty |= 0x10 << (addr / VRAMDirtyGranularity); } template @@ -518,11 +591,13 @@ class GPU void StartScanline(u32 line) noexcept; void StartHBlank(u32 line) noexcept; + void Restart3DFrame() noexcept; + void DisplayFIFO(u32 x) noexcept; - void SetDispStat(u32 cpu, u16 val) noexcept; + void SetDispStat(u32 cpu, u16 val, u16 mask) noexcept; + void SetVCount(u16 val, u16 mask) noexcept; - void SetVCount(u16 val) noexcept; bool MakeVRAMFlat_ABGCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty) noexcept; bool MakeVRAMFlat_BBGCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) noexcept; @@ -538,15 +613,28 @@ class GPU bool MakeVRAMFlat_TextureCoherent(NonStupidBitField<512*1024/VRAMDirtyGranularity>& dirty) noexcept; bool MakeVRAMFlat_TexPalCoherent(NonStupidBitField<128*1024/VRAMDirtyGranularity>& dirty) noexcept; - void SyncDirtyFlags() noexcept; - melonDS::NDS& NDS; + + bool ScreensEnabled = false; + bool ScreenSwap = false; + u16 VCount = 0; u16 TotalScanlines = 0; u16 DispStat[2] {}; u8 VRAMCNT[9] {}; u8 VRAMSTAT = 0; + u16 MasterBrightnessA; + u16 MasterBrightnessB; + + u16 DispFIFO[16]; + u8 DispFIFOReadPtr; + u8 DispFIFOWritePtr; + alignas(8) u16 DispFIFOBuffer[256]; + + u32 CaptureCnt; + bool CaptureEnable; + alignas(u64) u8 Palette[2*1024] {}; alignas(u64) u8 OAM[2*1024] {}; @@ -581,11 +669,8 @@ class GPU u8* VRAMPtr_BBG[0x8] {}; u8* VRAMPtr_BOBJ[0x8] {}; - int FrontBuffer = 0; - std::unique_ptr Framebuffer[2][2] {}; - - GPU2D::Unit GPU2D_A; - GPU2D::Unit GPU2D_B; + melonDS::GPU2D GPU2D_A; + melonDS::GPU2D GPU2D_B; melonDS::GPU3D GPU3D; NonStupidBitField<128*1024/VRAMDirtyGranularity> VRAMDirty[9] {}; @@ -615,10 +700,13 @@ class GPU alignas(u64) u8 VRAMFlat_Texture[512*1024] {}; alignas(u64) u8 VRAMFlat_TexPal[128*1024] {}; + + u32 OAMDirty = 0; + u32 PaletteDirty = 0; + private: void ResetVRAMCache() noexcept; - void AssignFramebuffers() noexcept; - void InitFramebuffers() noexcept; + template T ReadVRAM_ABGExtPal(u32 addr) const noexcept { @@ -694,17 +782,99 @@ class GPU return change; } - u32 NextVCount = 0; + u16* GetUniqueBankCBF(u32 mask, u32 offset); + void VRAMCBFlagsSet(u32 bank, u32 block, u16 val); + void VRAMCBFlagsClear(u32 bank, u32 block); + void VRAMCBFlagsOr(u32 bank, u32 block, u16 val); + void CheckCaptureStart(); + void CheckCaptureEnd(); + void SyncVRAMCaptureBlock(u32 block, bool write); + void SyncAllVRAMCaptures(); + void GetCaptureInfo(int* info, u16** cbf, int len); + + void SetDispStatIRQ(int cpu, int num); + + bool UsesDisplayFIFO(); + void SampleDisplayFIFO(u32 offset, u32 num); + + bool VCountOverride = false; + u16 NextVCount = 0; bool RunFIFO = false; u16 VMatch[2] {}; - std::unique_ptr GPU2D_Renderer = nullptr; + std::unique_ptr Rend = nullptr; - u32 OAMDirty = 0; - u32 PaletteDirty = 0; + u16 VRAMCaptureBlockFlags[16]; + + u16* VRAMCBF_ABG[0x20] {}; + u16* VRAMCBF_AOBJ[0x10] {}; + u16* VRAMCBF_BBG[0x8] {}; + u16* VRAMCBF_BOBJ[0x8] {}; +}; + + +struct RendererSettings +{ + // scale factor, for renderers that support upscaling + int ScaleFactor; + + // whether to use separate threads for rendering + bool Threaded; + + // whether to use hi-res vertex coordinates when applying upscaling + bool HiresCoordinates; + + // "improved polygon splitting" (regular OpenGL renderer) + bool BetterPolygons; +}; + +class Renderer +{ +public: + explicit Renderer(melonDS::GPU& gpu) : GPU(gpu), BackBuffer(0) {} + virtual ~Renderer() {} + virtual bool Init() = 0; + virtual void Reset() = 0; + virtual void Stop() = 0; + + virtual void PreSavestate() {} + virtual void PostSavestate() {} + + virtual void SetRenderSettings(RendererSettings& settings) = 0; + + virtual void DrawScanline(u32 line) = 0; + virtual void DrawSprites(u32 line) = 0; + + virtual void Start3DRendering() { Rend3D->RenderFrame(); } + virtual void Finish3DRendering() { Rend3D->FinishRendering(); } + virtual void Restart3DRendering() { Rend3D->RestartFrame(); } + + virtual void VBlank() = 0; + virtual void VBlankEnd() = 0; + + virtual void AllocCapture(u32 bank, u32 start, u32 len) = 0; + virtual void SyncVRAMCapture(u32 bank, u32 start, u32 len, bool complete) = 0; + + // a renderer may render to RAM buffers, or to something else (ie. OpenGL) + // if the renderer uses RAM buffers, they should be 32-bit BGRA, 256x192 for each screen + virtual bool GetFramebuffers(void** top, void** bottom) = 0; + virtual void SwapBuffers() { BackBuffer ^= 1; } + + virtual bool NeedsShaderCompile() { return false; } + virtual void ShaderCompileStep(int& current, int& count) {} + +protected: + melonDS::GPU& GPU; + + int BackBuffer; + + std::unique_ptr Rend2D_A; + std::unique_ptr Rend2D_B; + std::unique_ptr Rend3D; }; + } #endif diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp index a01a5caa3a..9cd291b3dc 100644 --- a/src/GPU2D.cpp +++ b/src/GPU2D.cpp @@ -37,8 +37,9 @@ using Platform::LogLevel; // * VRAM/FIFO display modes convert colors the same way // * 3D engine converts colors differently (18bit = 15bit * 2 + 1, except 0 = 0) // * 'screen disabled' white is 63,63,63 -// * [Gericom] bit15 is used as bottom green bit for palettes. TODO: check where this applies. -// tested on the normal BG palette and applies there +// * [Gericom] bit15 is used as bottom green bit for palettes. +// applies to any BG/OBJ graphics except direct color +// does not apply to VRAM display or mainmem FIFO // // for VRAM display mode, VRAM must be mapped to LCDC // @@ -85,16 +86,20 @@ using Platform::LogLevel; // for example these aren't affected by POWCNT GPU-disable bits. // to model the hardware more accurately, the relevant logic should be moved to GPU.cpp. -namespace GPU2D -{ -Unit::Unit(u32 num, melonDS::GPU& gpu) : Num(num), GPU(gpu) + +GPU2D::GPU2D(u32 num, melonDS::GPU& gpu) : Num(num), GPU(gpu) { } -void Unit::Reset() +void GPU2D::Reset() { Enabled = false; + DispCnt = 0; + memset(DispCntLatch, 0, sizeof(DispCntLatch)); + LayerEnable = 0; + OBJEnable = 0; + ForcedBlank = 0; memset(BGCnt, 0, 4*2); memset(BGXPos, 0, 4*2); memset(BGYPos, 0, 4*2); @@ -102,6 +107,8 @@ void Unit::Reset() memset(BGYRef, 0, 2*4); memset(BGXRefInternal, 0, 2*4); memset(BGYRefInternal, 0, 2*4); + memset(BGXRefReload, 0, 2*4); + memset(BGYRefReload, 0, 2*4); memset(BGRotA, 0, 2*2); memset(BGRotB, 0, 2*2); memset(BGRotC, 0, 2*2); @@ -121,31 +128,26 @@ void Unit::Reset() BGMosaicY = 0; BGMosaicYMax = 0; OBJMosaicY = 0; - OBJMosaicYMax = 0; - OBJMosaicYCount = 0; + BGMosaicLatch = true; + OBJMosaicLatch = true; + BGMosaicLine = 0; + OBJMosaicLine = 0; BlendCnt = 0; EVA = 16; EVB = 0; EVY = 0; - - memset(DispFIFO, 0, 16*2); - DispFIFOReadPtr = 0; - DispFIFOWritePtr = 0; - - memset(DispFIFOBuffer, 0, 256*2); - - CaptureCnt = 0; - CaptureLatch = false; - - MasterBrightness = 0; } -void Unit::DoSavestate(Savestate* file) +void GPU2D::DoSavestate(Savestate* file) { file->Section((char*)(Num ? "GP2B" : "GP2A")); file->Var32(&DispCnt); + file->VarArray(DispCntLatch, sizeof(DispCntLatch)); + file->Var8(&LayerEnable); + file->Var8(&OBJEnable); + file->Var8(&ForcedBlank); file->VarArray(BGCnt, 4*2); file->VarArray(BGXPos, 4*2); file->VarArray(BGYPos, 4*2); @@ -153,6 +155,8 @@ void Unit::DoSavestate(Savestate* file) file->VarArray(BGYRef, 2*4); file->VarArray(BGXRefInternal, 2*4); file->VarArray(BGYRefInternal, 2*4); + file->VarArray(BGXRefReload, 2*4); + file->VarArray(BGYRefReload, 2*4); file->VarArray(BGRotA, 2*2); file->VarArray(BGRotB, 2*2); file->VarArray(BGRotC, 2*2); @@ -167,7 +171,10 @@ void Unit::DoSavestate(Savestate* file) file->Var8(&BGMosaicY); file->Var8(&BGMosaicYMax); file->Var8(&OBJMosaicY); - file->Var8(&OBJMosaicYMax); + file->VarBool(&BGMosaicLatch); + file->VarBool(&OBJMosaicLatch); + file->Var32(&BGMosaicLine); + file->Var32(&OBJMosaicLine); file->Var16(&BlendCnt); file->Var16(&BlendAlpha); @@ -175,24 +182,11 @@ void Unit::DoSavestate(Savestate* file) file->Var8(&EVB); file->Var8(&EVY); - file->Var16(&MasterBrightness); - - if (!Num) - { - file->VarArray(DispFIFO, 16*2); - file->Var32(&DispFIFOReadPtr); - file->Var32(&DispFIFOWritePtr); - - file->VarArray(DispFIFOBuffer, 256*2); - - file->Var32(&CaptureCnt); - } - - file->Var32(&Win0Active); - file->Var32(&Win1Active); + file->Var8(&Win0Active); + file->Var8(&Win1Active); } -u8 Unit::Read8(u32 addr) +u8 GPU2D::Read8(u32 addr) { switch (addr & 0x00000FFF) { @@ -215,17 +209,22 @@ u8 Unit::Read8(u32 addr) case 0x04A: return WinCnt[2]; case 0x04B: return WinCnt[3]; + case 0x050: return BlendCnt & 0xFF; + case 0x051: return BlendCnt >> 8; + case 0x052: return BlendAlpha & 0xFF; + case 0x053: return BlendAlpha >> 8; + // there are games accidentally trying to read those // those are write-only case 0x04C: case 0x04D: return 0; } - Log(LogLevel::Debug, "unknown GPU read8 %08X\n", addr); + Log(LogLevel::Debug, "unknown GPU2D read8 %08X\n", addr); return 0; } -u16 Unit::Read16(u32 addr) +u16 GPU2D::Read16(u32 addr) { switch (addr & 0x00000FFF) { @@ -243,30 +242,23 @@ u16 Unit::Read16(u32 addr) case 0x050: return BlendCnt; case 0x052: return BlendAlpha; // BLDY is write-only - - case 0x064: return CaptureCnt & 0xFFFF; - case 0x066: return CaptureCnt >> 16; - - case 0x06C: return MasterBrightness; } - Log(LogLevel::Debug, "unknown GPU read16 %08X\n", addr); + Log(LogLevel::Debug, "unknown GPU2D read16 %08X\n", addr); return 0; } -u32 Unit::Read32(u32 addr) +u32 GPU2D::Read32(u32 addr) { switch (addr & 0x00000FFF) { case 0x000: return DispCnt; - - case 0x064: return CaptureCnt; } return Read16(addr) | (Read16(addr+2) << 16); } -void Unit::Write8(u32 addr, u8 val) +void GPU2D::Write8(u32 addr, u8 val) { switch (addr & 0x00000FFF) { @@ -287,11 +279,11 @@ void Unit::Write8(u32 addr, u8 val) if (Num) DispCnt &= 0xC0B1FFF7; return; - case 0x10: - if (!Num) GPU.GPU3D.SetRenderXPos((GPU.GPU3D.GetRenderXPos() & 0xFF00) | val); + case 0x010: + if (!Num) GPU.GPU3D.SetRenderXPos(val, 0x00FF); break; - case 0x11: - if (!Num) GPU.GPU3D.SetRenderXPos((GPU.GPU3D.GetRenderXPos() & 0x00FF) | (val << 8)); + case 0x011: + if (!Num) GPU.GPU3D.SetRenderXPos(val << 8, 0xFF00); break; } @@ -367,10 +359,10 @@ void Unit::Write8(u32 addr, u8 val) return; } - Log(LogLevel::Debug, "unknown GPU write8 %08X %02X\n", addr, val); + Log(LogLevel::Debug, "unknown GPU2D write8 %08X %02X\n", addr, val); } -void Unit::Write16(u32 addr, u16 val) +void GPU2D::Write16(u32 addr, u16 val) { switch (addr & 0x00000FFF) { @@ -384,27 +376,8 @@ void Unit::Write16(u32 addr, u16 val) return; case 0x010: - if (!Num) GPU.GPU3D.SetRenderXPos(val); + if (!Num) GPU.GPU3D.SetRenderXPos(val, 0xFFFF); break; - - case 0x064: - CaptureCnt = (CaptureCnt & 0xFFFF0000) | (val & 0xEF3F1F1F); - return; - - case 0x066: - CaptureCnt = (CaptureCnt & 0xFFFF) | ((val << 16) & 0xEF3F1F1F); - return; - - case 0x068: - DispFIFO[DispFIFOWritePtr] = val; - return; - case 0x06A: - DispFIFO[DispFIFOWritePtr+1] = val; - DispFIFOWritePtr += 2; - DispFIFOWritePtr &= 0xF; - return; - - case 0x06C: MasterBrightness = val; return; } if (!Enabled) return; @@ -431,21 +404,21 @@ void Unit::Write16(u32 addr, u16 val) case 0x026: BGRotD[0] = val; return; case 0x028: BGXRef[0] = (BGXRef[0] & 0xFFFF0000) | val; - if (GPU.VCount < 192) BGXRefInternal[0] = BGXRef[0]; + BGXRefReload[0] = BGXRef[0]; return; case 0x02A: if (val & 0x0800) val |= 0xF000; BGXRef[0] = (BGXRef[0] & 0xFFFF) | (val << 16); - if (GPU.VCount < 192) BGXRefInternal[0] = BGXRef[0]; + BGXRefReload[0] = BGXRef[0]; return; case 0x02C: BGYRef[0] = (BGYRef[0] & 0xFFFF0000) | val; - if (GPU.VCount < 192) BGYRefInternal[0] = BGYRef[0]; + BGYRefReload[0] = BGYRef[0]; return; case 0x02E: if (val & 0x0800) val |= 0xF000; BGYRef[0] = (BGYRef[0] & 0xFFFF) | (val << 16); - if (GPU.VCount < 192) BGYRefInternal[0] = BGYRef[0]; + BGYRefReload[0] = BGYRef[0]; return; case 0x030: BGRotA[1] = val; return; @@ -454,21 +427,21 @@ void Unit::Write16(u32 addr, u16 val) case 0x036: BGRotD[1] = val; return; case 0x038: BGXRef[1] = (BGXRef[1] & 0xFFFF0000) | val; - if (GPU.VCount < 192) BGXRefInternal[1] = BGXRef[1]; + BGXRefReload[1] = BGXRef[1]; return; case 0x03A: if (val & 0x0800) val |= 0xF000; BGXRef[1] = (BGXRef[1] & 0xFFFF) | (val << 16); - if (GPU.VCount < 192) BGXRefInternal[1] = BGXRef[1]; + BGXRefReload[1] = BGXRef[1]; return; case 0x03C: BGYRef[1] = (BGYRef[1] & 0xFFFF0000) | val; - if (GPU.VCount < 192) BGYRefInternal[1] = BGYRef[1]; + BGYRefReload[1] = BGYRef[1]; return; case 0x03E: if (val & 0x0800) val |= 0xF000; BGYRef[1] = (BGYRef[1] & 0xFFFF) | (val << 16); - if (GPU.VCount < 192) BGYRefInternal[1] = BGYRef[1]; + BGYRefReload[1] = BGYRef[1]; return; case 0x040: @@ -519,10 +492,10 @@ void Unit::Write16(u32 addr, u16 val) return; } - //printf("unknown GPU write16 %08X %04X\n", addr, val); + //printf("unknown GPU2D write16 %08X %04X\n", addr, val); } -void Unit::Write32(u32 addr, u32 val) +void GPU2D::Write32(u32 addr, u32 val) { switch (addr & 0x00000FFF) { @@ -530,126 +503,164 @@ void Unit::Write32(u32 addr, u32 val) DispCnt = val; if (Num) DispCnt &= 0xC0B1FFF7; return; + } - case 0x064: - CaptureCnt = val & 0xEF3F1F1F; - return; - - case 0x068: - DispFIFO[DispFIFOWritePtr] = val & 0xFFFF; - DispFIFO[DispFIFOWritePtr+1] = val >> 16; - DispFIFOWritePtr += 2; - DispFIFOWritePtr &= 0xF; + if (!Enabled) + { + Write16(addr, val&0xFFFF); + Write16(addr+2, val>>16); return; } - if (Enabled) + switch (addr & 0x00000FFF) { - switch (addr & 0x00000FFF) - { - case 0x028: - if (val & 0x08000000) val |= 0xF0000000; - BGXRef[0] = val; - if (GPU.VCount < 192) BGXRefInternal[0] = BGXRef[0]; - return; - case 0x02C: - if (val & 0x08000000) val |= 0xF0000000; - BGYRef[0] = val; - if (GPU.VCount < 192) BGYRefInternal[0] = BGYRef[0]; - return; - - case 0x038: - if (val & 0x08000000) val |= 0xF0000000; - BGXRef[1] = val; - if (GPU.VCount < 192) BGXRefInternal[1] = BGXRef[1]; - return; - case 0x03C: - if (val & 0x08000000) val |= 0xF0000000; - BGYRef[1] = val; - if (GPU.VCount < 192) BGYRefInternal[1] = BGYRef[1]; - return; - } + case 0x028: + if (val & 0x08000000) val |= 0xF0000000; + BGXRef[0] = val; + BGXRefReload[0] = BGXRef[0]; + return; + case 0x02C: + if (val & 0x08000000) val |= 0xF0000000; + BGYRef[0] = val; + BGYRefReload[0] = BGYRef[0]; + return; + + case 0x038: + if (val & 0x08000000) val |= 0xF0000000; + BGXRef[1] = val; + BGXRefReload[1] = BGXRef[1]; + return; + case 0x03C: + if (val & 0x08000000) val |= 0xF0000000; + BGYRef[1] = val; + BGYRefReload[1] = BGYRef[1]; + return; } Write16(addr, val&0xFFFF); Write16(addr+2, val>>16); } -void Unit::UpdateMosaicCounters(u32 line) + +u16* GPU2D::GetBGExtPal(u32 slot, u32 pal) { - // Y mosaic uses incrementing 4-bit counters - // the transformed Y position is updated every time the counter matches the MOSAIC register + const u32 PaletteSize = 256 * 2; + const u32 SlotSize = PaletteSize * 16; + return (u16*)&(Num == 0 + ? GPU.VRAMFlat_ABGExtPal + : GPU.VRAMFlat_BBGExtPal)[slot * SlotSize + pal * PaletteSize]; +} - if (OBJMosaicYCount == OBJMosaicSize[1]) - { - OBJMosaicYCount = 0; - OBJMosaicY = line + 1; - } - else - { - OBJMosaicYCount++; - OBJMosaicYCount &= 0xF; - } +u16* GPU2D::GetOBJExtPal() +{ + return Num == 0 + ? (u16*)GPU.VRAMFlat_AOBJExtPal + : (u16*)GPU.VRAMFlat_BOBJExtPal; } -void Unit::VBlank() + +void GPU2D::UpdateRegistersPreDraw(bool reset) { - if (CaptureLatch) + if (!Enabled) return; + + // enabling BG/OBJ layers or disabling forced blank takes two scanlines to apply + // however, disabling layers or enabling forced blank applies immediately + DispCntLatch[2] = DispCntLatch[1]; + DispCntLatch[1] = DispCntLatch[0]; + DispCntLatch[0] = DispCnt; + LayerEnable = ((DispCntLatch[2] & DispCnt) >> 8) & 0x1F; + OBJEnable = ((DispCntLatch[1] & DispCnt) >> 12) & 0x1; + ForcedBlank = ((DispCntLatch[2] | DispCnt) >> 7) & 0x1; + + if (BGMosaicLatch) + BGMosaicLine = GPU.VCount; + + for (int i = 0; i < 2; i++) { - CaptureCnt &= ~(1<<31); - CaptureLatch = false; + if (!(BGCnt[2+i] & (1<<6)) || BGMosaicLatch) + { + BGXRefInternal[i] = BGXRef[i]; + BGYRefInternal[i] = BGYRef[i]; + } } - DispFIFOReadPtr = 0; - DispFIFOWritePtr = 0; -} + if (DispCnt & (1<<12)) + { + // update OBJ mosaic counter -void Unit::VBlankEnd() -{ - // TODO: find out the exact time this happens - BGXRefInternal[0] = BGXRef[0]; - BGXRefInternal[1] = BGXRef[1]; - BGYRefInternal[0] = BGYRef[0]; - BGYRefInternal[1] = BGYRef[1]; + if (reset || (OBJMosaicY == OBJMosaicSize[1])) + { + OBJMosaicY = 0; + OBJMosaicLatch = true; + } + else + { + OBJMosaicY++; + OBJMosaicY &= 0xF; + OBJMosaicLatch = false; + } + } - BGMosaicY = 0; - BGMosaicYMax = BGMosaicSize[1]; - //OBJMosaicY = 0; - //OBJMosaicYMax = OBJMosaicSize[1]; - //OBJMosaicY = 0; - //OBJMosaicYCount = 0; + if (OBJMosaicLatch) + OBJMosaicLine = reset ? 0 : (GPU.VCount+1); } -void Unit::SampleFIFO(u32 offset, u32 num) +void GPU2D::UpdateRegistersPostDraw(bool reset) { - for (u32 i = 0; i < num; i++) + if (!Enabled) return; + + if (reset) + { + BGMosaicYMax = BGMosaicSize[1]; + BGMosaicY = 0; + BGMosaicLatch = true; + } + else { - u16 val = DispFIFO[DispFIFOReadPtr]; - DispFIFOReadPtr++; - DispFIFOReadPtr &= 0xF; + // for BG mosaic, the size in MOSAIC is copied to an internal register + // on the other hand, OBJ mosaic directly checks against the size in MOSAIC + // this makes the OBJ mosaic counter prone to overflowing if MOSAIC is modified midframe - DispFIFOBuffer[offset+i] = val; + if (BGMosaicY == BGMosaicYMax) + { + BGMosaicYMax = BGMosaicSize[1]; + BGMosaicY = 0; + BGMosaicLatch = true; + } + else + { + BGMosaicY++; + BGMosaicY &= 0xF; + BGMosaicLatch = false; + } } -} -u16* Unit::GetBGExtPal(u32 slot, u32 pal) -{ - const u32 PaletteSize = 256 * 2; - const u32 SlotSize = PaletteSize * 16; - return (u16*)&(Num == 0 - ? GPU.VRAMFlat_ABGExtPal - : GPU.VRAMFlat_BBGExtPal)[slot * SlotSize + pal * PaletteSize]; -} + for (int i = 0; i < 2; i++) + { + // reference points for rotscale layers are only updated if the layer is enabled + // TODO do they get updated if the layer isn't a rotscale layer? + if (!(LayerEnable & (4<> 16) & 0x3) == 3) - return true; - if ((CaptureCnt & (1<<25)) && ((CaptureCnt >> 29) & 0x3) != 0) - return true; - - return false; - } - - void SampleFIFO(u32 offset, u32 num); - - void VBlank(); - virtual void VBlankEnd(); - - void CheckWindows(u32 line); + void UpdateRegistersPreDraw(bool reset); + void UpdateRegistersPostDraw(bool reset); + void UpdateWindows(u32 line); u16* GetBGExtPal(u32 slot, u32 pal); u16* GetOBJExtPal(); @@ -75,19 +59,19 @@ class Unit void GetBGVRAM(u8*& data, u32& mask) const; void GetOBJVRAM(u8*& data, u32& mask) const; - void UpdateMosaicCounters(u32 line); - void CalculateWindowMask(u32 line, u8* windowMask, const u8* objWindow); + void GetCaptureInfo_BG(int* info) const; + void GetCaptureInfo_OBJ(int* info) const; + + void CalculateWindowMask(u8* windowMask, const u8* objWindow); u32 Num; bool Enabled; - u16 DispFIFO[16]; - u32 DispFIFOReadPtr; - u32 DispFIFOWritePtr; - - u16 DispFIFOBuffer[256]; - u32 DispCnt; + u32 DispCntLatch[3]; + u8 LayerEnable; // layer enable - enable delayed by 2 scanlines + u8 OBJEnable; // OBJ enable (for OBJ rendering) - enable delayed by 1 scanline + u8 ForcedBlank; // forced blank - disable delayed by 2 scanlines u16 BGCnt[4]; u16 BGXPos[4]; @@ -97,6 +81,8 @@ class Unit s32 BGYRef[2]; s32 BGXRefInternal[2]; s32 BGYRefInternal[2]; + s32 BGXRefReload[2]; + s32 BGYRefReload[2]; s16 BGRotA[2]; s16 BGRotB[2]; s16 BGRotC[2]; @@ -105,49 +91,51 @@ class Unit u8 Win0Coords[4]; u8 Win1Coords[4]; u8 WinCnt[4]; - u32 Win0Active; - u32 Win1Active; + u8 Win0Active; + u8 Win1Active; u8 BGMosaicSize[2]; u8 OBJMosaicSize[2]; u8 BGMosaicY, BGMosaicYMax; - u8 OBJMosaicYCount, OBJMosaicY, OBJMosaicYMax; + u8 OBJMosaicY; + bool BGMosaicLatch; + bool OBJMosaicLatch; + u32 BGMosaicLine; + u32 OBJMosaicLine; u16 BlendCnt; u16 BlendAlpha; u8 EVA, EVB; u8 EVY; - bool CaptureLatch; - u32 CaptureCnt; - - u16 MasterBrightness; private: + friend class Renderer2D; + melonDS::GPU& GPU; }; class Renderer2D { public: + explicit Renderer2D(melonDS::GPU2D& gpu2D) : GPU(gpu2D.GPU), GPU2D(gpu2D) {} virtual ~Renderer2D() {} + virtual bool Init() = 0; + virtual void Reset() = 0; - virtual void DrawScanline(u32 line, Unit* unit) = 0; - virtual void DrawSprites(u32 line, Unit* unit) = 0; + virtual void DrawScanline(u32 line) = 0; + virtual void DrawSprites(u32 line) = 0; - virtual void VBlankEnd(Unit* unitA, Unit* unitB) = 0; + virtual void VBlank() = 0; + virtual void VBlankEnd() = 0; - void SetFramebuffer(u32* unitA, u32* unitB) - { - Framebuffer[0] = unitA; - Framebuffer[1] = unitB; - } -protected: - u32* Framebuffer[2]; + virtual bool NeedsShaderCompile() { return false; } + virtual void ShaderCompileStep(int& current, int& count) {} - Unit* CurUnit; +protected: + melonDS::GPU& GPU; + melonDS::GPU2D& GPU2D; }; } -} #endif diff --git a/src/GPU2D_OpenGL.cpp b/src/GPU2D_OpenGL.cpp new file mode 100644 index 0000000000..9a86213e22 --- /dev/null +++ b/src/GPU2D_OpenGL.cpp @@ -0,0 +1,1906 @@ +/* + Copyright 2016-2025 melonDS team + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include +#include "GPU_OpenGL.h" +#include "GPU2D_OpenGL.h" +#include "GPU.h" +#include "GPU3D.h" + +namespace melonDS +{ +using Platform::Log; +using Platform::LogLevel; + +#include "OpenGL_shaders/2DLayerPreVS.h" +#include "OpenGL_shaders/2DLayerPreFS.h" +#include "OpenGL_shaders/2DSpritePreVS.h" +#include "OpenGL_shaders/2DSpritePreFS.h" +#include "OpenGL_shaders/2DSpriteVS.h" +#include "OpenGL_shaders/2DSpriteFS.h" +#include "OpenGL_shaders/2DCompositorVS.h" +#include "OpenGL_shaders/2DCompositorFS.h" + + +int GLRenderer2D::ShaderCount = 0; +GLuint GLRenderer2D::LayerPreShader = 0; +GLint GLRenderer2D::LayerPreCurBGULoc = 0; +GLuint GLRenderer2D::SpritePreShader = 0; +GLuint GLRenderer2D::SpriteShader = 0; +GLint GLRenderer2D::SpriteRenderTransULoc = 0; +GLuint GLRenderer2D::CompositorShader = 0; +GLint GLRenderer2D::CompositorScaleULoc = 0; +GLuint GLRenderer2D::MosaicTex = 0; + + +GLRenderer2D::GLRenderer2D(melonDS::GPU2D& gpu2D, GLRenderer& parent) + : Renderer2D(gpu2D), Parent(parent) +{ + ScaleFactor = 0; +} + +#define glDefaultTexParams(target) \ + glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); \ + glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); \ + glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_NEAREST); \ + glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + +bool GLRenderer2D::Init() +{ + GLint uniloc; + + if (ShaderCount++ == 0) + { + // compile shaders + + if (!OpenGL::CompileVertexFragmentProgram(LayerPreShader, + k2DLayerPreVS, k2DLayerPreFS, + "2DLayerPreShader", + {{"vPosition", 0}}, + {{"oColor", 0}})) + return false; + + if (!OpenGL::CompileVertexFragmentProgram(SpritePreShader, + k2DSpritePreVS, k2DSpritePreFS, + "2DSpritePreShader", + {{"vPosition", 0}, {"vSpriteIndex", 1}}, + {{"oColor", 0}})) + return false; + + if (!OpenGL::CompileVertexFragmentProgram(SpriteShader, + k2DSpriteVS, k2DSpriteFS, + "2DSpriteShader", + {{"vPosition", 0}, {"vTexcoord", 1}, {"vSpriteIndex", 2}}, + {{"oColor", 0}, {"oFlags", 1}})) + return false; + + if (!OpenGL::CompileVertexFragmentProgram(CompositorShader, + k2DCompositorVS, k2DCompositorFS, + "2DCompositorShader", + {{"vPosition", 0}}, + {{"oColor", 0}})) + return false; + + // set up uniforms + + glUseProgram(LayerPreShader); + + uniloc = glGetUniformLocation(LayerPreShader, "VRAMTex"); + glUniform1i(uniloc, 0); + uniloc = glGetUniformLocation(LayerPreShader, "PalTex"); + glUniform1i(uniloc, 1); + + uniloc = glGetUniformBlockIndex(LayerPreShader, "ubBGConfig"); + glUniformBlockBinding(LayerPreShader, uniloc, 20); + + LayerPreCurBGULoc = glGetUniformLocation(LayerPreShader, "uCurBG"); + + + glUseProgram(SpritePreShader); + + uniloc = glGetUniformLocation(SpritePreShader, "VRAMTex"); + glUniform1i(uniloc, 0); + uniloc = glGetUniformLocation(SpritePreShader, "PalTex"); + glUniform1i(uniloc, 1); + + uniloc = glGetUniformBlockIndex(SpritePreShader, "ubSpriteConfig"); + glUniformBlockBinding(SpritePreShader, uniloc, 21); + + + glUseProgram(SpriteShader); + + uniloc = glGetUniformLocation(SpriteShader, "SpriteTex"); + glUniform1i(uniloc, 0); + uniloc = glGetUniformLocation(SpriteShader, "Capture128Tex"); + glUniform1i(uniloc, 1); + uniloc = glGetUniformLocation(SpriteShader, "Capture256Tex"); + glUniform1i(uniloc, 2); + + uniloc = glGetUniformBlockIndex(SpriteShader, "ubSpriteConfig"); + glUniformBlockBinding(SpriteShader, uniloc, 21); + uniloc = glGetUniformBlockIndex(SpriteShader, "ubSpriteScanlineConfig"); + glUniformBlockBinding(SpriteShader, uniloc, 24); + + SpriteRenderTransULoc = glGetUniformLocation(SpriteShader, "uRenderTransparent"); + + + glUseProgram(CompositorShader); + + uniloc = glGetUniformLocation(CompositorShader, "BGLayerTex[0]"); + glUniform1i(uniloc, 0); + uniloc = glGetUniformLocation(CompositorShader, "BGLayerTex[1]"); + glUniform1i(uniloc, 1); + uniloc = glGetUniformLocation(CompositorShader, "BGLayerTex[2]"); + glUniform1i(uniloc, 2); + uniloc = glGetUniformLocation(CompositorShader, "BGLayerTex[3]"); + glUniform1i(uniloc, 3); + uniloc = glGetUniformLocation(CompositorShader, "OBJLayerTex"); + glUniform1i(uniloc, 4); + uniloc = glGetUniformLocation(CompositorShader, "Capture128Tex"); + glUniform1i(uniloc, 5); + uniloc = glGetUniformLocation(CompositorShader, "Capture256Tex"); + glUniform1i(uniloc, 6); + uniloc = glGetUniformLocation(CompositorShader, "MosaicTex"); + glUniform1i(uniloc, 7); + + uniloc = glGetUniformBlockIndex(CompositorShader, "ubBGConfig"); + glUniformBlockBinding(CompositorShader, uniloc, 20); + uniloc = glGetUniformBlockIndex(CompositorShader, "ubScanlineConfig"); + glUniformBlockBinding(CompositorShader, uniloc, 22); + uniloc = glGetUniformBlockIndex(CompositorShader, "ubCompositorConfig"); + glUniformBlockBinding(CompositorShader, uniloc, 23); + + CompositorScaleULoc = glGetUniformLocation(CompositorShader, "uScaleFactor"); + + // generate mosaic lookup texture + + u8* mosaic_tex = new u8[256 * 16]; + for (int m = 0; m < 16; m++) + { + int mosx = 0; + for (int x = 0; x < 256; x++) + { + mosaic_tex[(m * 256) + x] = mosx; + + if (mosx == m) + mosx = 0; + else + mosx++; + } + } + + glGenTextures(1, &MosaicTex); + glBindTexture(GL_TEXTURE_2D, MosaicTex); + glDefaultTexParams(GL_TEXTURE_2D); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R8I, 256, 16, 0, GL_RED_INTEGER, GL_BYTE, mosaic_tex); + + delete[] mosaic_tex; + } + + // sprite prerender vertex data: 2x position, 1x sprite index + int sprdatasize = (3 * 6) * 128; + SpritePreVtxData = new u16[sprdatasize]; + + glGenBuffers(1, &SpritePreVtxBuffer); + glBindBuffer(GL_ARRAY_BUFFER, SpritePreVtxBuffer); + glBufferData(GL_ARRAY_BUFFER, sprdatasize * sizeof(u16), nullptr, GL_STREAM_DRAW); + + glGenVertexArrays(1, &SpritePreVtxArray); + glBindVertexArray(SpritePreVtxArray); + glEnableVertexAttribArray(0); // position + glVertexAttribIPointer(0, 2, GL_SHORT, 3 * sizeof(u16), (void*)0); + glEnableVertexAttribArray(1); // sprite index + glVertexAttribIPointer(1, 1, GL_SHORT, 3 * sizeof(u16), (void*)(2 * sizeof(u16))); + + // sprite vertex data: 2x position, 2x texcoord, 1x index + sprdatasize = (5 * 6) * 256; + SpriteVtxData = new u16[sprdatasize]; + + glGenBuffers(1, &SpriteVtxBuffer); + glBindBuffer(GL_ARRAY_BUFFER, SpriteVtxBuffer); + glBufferData(GL_ARRAY_BUFFER, sprdatasize * sizeof(u16), nullptr, GL_STREAM_DRAW); + + glGenVertexArrays(1, &SpriteVtxArray); + glBindVertexArray(SpriteVtxArray); + glEnableVertexAttribArray(0); // position + glVertexAttribIPointer(0, 2, GL_SHORT, 5 * sizeof(u16), (void*)0); + glEnableVertexAttribArray(1); // texcoord + glVertexAttribIPointer(1, 2, GL_SHORT, 5 * sizeof(u16), (void*)(2 * sizeof(u16))); + glEnableVertexAttribArray(2); // sprite index + glVertexAttribIPointer(2, 1, GL_SHORT, 5 * sizeof(u16), (void*)(4 * sizeof(u16))); + + // generate textures to hold raw BG and OBJ VRAM and palettes + + int bgheight = (GPU2D.Num == 0) ? 512 : 128; + int objheight = (GPU2D.Num == 0) ? 256 : 128; + + glGenTextures(1, &VRAMTex_BG); + glBindTexture(GL_TEXTURE_2D, VRAMTex_BG); + glDefaultTexParams(GL_TEXTURE_2D); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R8UI, 1024, bgheight, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, nullptr); + + glGenTextures(1, &VRAMTex_OBJ); + glBindTexture(GL_TEXTURE_2D, VRAMTex_OBJ); + glDefaultTexParams(GL_TEXTURE_2D); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R8UI, 1024, objheight, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, nullptr); + + glGenTextures(1, &PalTex_BG); + glBindTexture(GL_TEXTURE_2D, PalTex_BG); + glDefaultTexParams(GL_TEXTURE_2D); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB5_A1, 256, 1+(4*16), 0, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, nullptr); + + glGenTextures(1, &PalTex_OBJ); + glBindTexture(GL_TEXTURE_2D, PalTex_OBJ); + glDefaultTexParams(GL_TEXTURE_2D); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB5_A1, 256, 1+16, 0, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, nullptr); + + // generate texture to hold pre-rendered BG layers + + glGenTextures(22, AllBGLayerTex); + glGenFramebuffers(22, AllBGLayerFB); + + const u16 bgsizes[8][3] = { + {128, 128, 2}, + {256, 256, 4}, + {256, 512, 4}, + {512, 256, 4}, + {512, 512, 4}, + {512, 1024, 1}, + {1024, 512, 1}, + {1024, 1024, 2} + }; + + int l = 0; + for (int j = 0; j < 8; j++) + { + const u16* sz = bgsizes[j]; + + for (int k = 0; k < sz[2]; k++) + { + glBindTexture(GL_TEXTURE_2D, AllBGLayerTex[l]); + glDefaultTexParams(GL_TEXTURE_2D); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, sz[0], sz[1], 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + + glBindFramebuffer(GL_FRAMEBUFFER, AllBGLayerFB[l]); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, AllBGLayerTex[l], 0); + glDrawBuffer(GL_COLOR_ATTACHMENT0); + + l++; + } + } + + // generate texture to hold pre-rendered sprites + + glGenTextures(1, &SpriteTex); + glBindTexture(GL_TEXTURE_2D, SpriteTex); + glDefaultTexParams(GL_TEXTURE_2D); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1024, 512, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + + glGenFramebuffers(1, &SpriteFB); + glBindFramebuffer(GL_FRAMEBUFFER, SpriteFB); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, SpriteTex, 0); + glDrawBuffer(GL_COLOR_ATTACHMENT0); + + // generate texture to hold final (upscaled) sprites + + glGenTextures(1, &OBJLayerTex); + glBindTexture(GL_TEXTURE_2D_ARRAY, OBJLayerTex); + glDefaultTexParams(GL_TEXTURE_2D_ARRAY); + + glGenTextures(1, &OBJDepthTex); + glBindTexture(GL_TEXTURE_2D, OBJDepthTex); + glDefaultTexParams(GL_TEXTURE_2D); + + glGenFramebuffers(1, &OBJLayerFB); + + // generate texture for the compositor output + + glGenTextures(1, &OutputTex); + glBindTexture(GL_TEXTURE_2D, OutputTex); + glDefaultTexParams(GL_TEXTURE_2D); + + glGenFramebuffers(1, &OutputFB); + + Parent.OutputTex2D[GPU2D.Num] = OutputTex; + + // generate UBOs + + glGenBuffers(1, &LayerConfigUBO); + glBindBuffer(GL_UNIFORM_BUFFER, LayerConfigUBO); + static_assert((sizeof(sLayerConfig) & 15) == 0); + glBufferData(GL_UNIFORM_BUFFER, sizeof(sLayerConfig), nullptr, GL_STREAM_DRAW); + + glGenBuffers(1, &SpriteConfigUBO); + glBindBuffer(GL_UNIFORM_BUFFER, SpriteConfigUBO); + static_assert((sizeof(sSpriteConfig) & 15) == 0); + glBufferData(GL_UNIFORM_BUFFER, sizeof(sSpriteConfig), nullptr, GL_STREAM_DRAW); + + glGenBuffers(1, &ScanlineConfigUBO); + glBindBuffer(GL_UNIFORM_BUFFER, ScanlineConfigUBO); + static_assert((sizeof(sScanlineConfig) & 15) == 0); + glBufferData(GL_UNIFORM_BUFFER, sizeof(sScanlineConfig), nullptr, GL_STREAM_DRAW); + + glGenBuffers(1, &SpriteScanlineConfigUBO); + glBindBuffer(GL_UNIFORM_BUFFER, SpriteScanlineConfigUBO); + static_assert((sizeof(sSpriteScanlineConfig) & 15) == 0); + glBufferData(GL_UNIFORM_BUFFER, sizeof(sSpriteScanlineConfig), nullptr, GL_STREAM_DRAW); + + glGenBuffers(1, &CompositorConfigUBO); + glBindBuffer(GL_UNIFORM_BUFFER, CompositorConfigUBO); + static_assert((sizeof(sCompositorConfig) & 15) == 0); + glBufferData(GL_UNIFORM_BUFFER, sizeof(sCompositorConfig), nullptr, GL_STREAM_DRAW); + + return true; +} + +GLRenderer2D::~GLRenderer2D() +{ + if (--ShaderCount == 0) + { + glDeleteProgram(LayerPreShader); + glDeleteProgram(SpritePreShader); + glDeleteProgram(SpriteShader); + glDeleteProgram(CompositorShader); + + glDeleteTextures(1, &MosaicTex); + } + + glDeleteBuffers(1, &SpritePreVtxBuffer); + glDeleteVertexArrays(1, &SpritePreVtxArray); + + glDeleteBuffers(1, &SpriteVtxBuffer); + glDeleteVertexArrays(1, &SpriteVtxArray); + + glDeleteBuffers(1, &LayerConfigUBO); + glDeleteBuffers(1, &SpriteConfigUBO); + + glDeleteTextures(1, &VRAMTex_BG); + glDeleteTextures(1, &VRAMTex_OBJ); + glDeleteTextures(1, &PalTex_BG); + glDeleteTextures(1, &PalTex_OBJ); + + glDeleteTextures(22, AllBGLayerTex); + glDeleteFramebuffers(22, AllBGLayerFB); + + glDeleteTextures(1, &SpriteTex); + glDeleteFramebuffers(1, &SpriteFB); + + glDeleteTextures(1, &OBJLayerTex); + glDeleteTextures(1, &OBJDepthTex); + glDeleteFramebuffers(1, &OBJLayerFB); + + glDeleteTextures(1, &OutputTex); + glDeleteFramebuffers(1, &OutputFB); + + glDeleteBuffers(1, &ScanlineConfigUBO); + glDeleteBuffers(1, &SpriteScanlineConfigUBO); + glDeleteBuffers(1, &CompositorConfigUBO); +} + +void GLRenderer2D::Reset() +{ + memset(BGLayerFB, 0, sizeof(BGLayerFB)); + memset(BGLayerTex, 0, sizeof(BGLayerTex)); + + memset(&LayerConfig, 0, sizeof(LayerConfig)); + memset(&SpriteConfig, 0, sizeof(SpriteConfig)); + memset(&ScanlineConfig, 0, sizeof(ScanlineConfig)); + memset(&SpriteScanlineConfig, 0, sizeof(SpriteScanlineConfig)); + memset(&CompositorConfig, 0, sizeof(CompositorConfig)); + + int bgheight = (GPU2D.Num == 0) ? 512 : 128; + int objheight = (GPU2D.Num == 0) ? 256 : 128; + LayerConfig.uVRAMMask = bgheight - 1; + SpriteConfig.uVRAMMask = objheight - 1; + + LastLine = 0; + + UnitEnabled = false; + + DispCnt = 0; + LayerEnable = 0; + OBJEnable = 0; + ForcedBlank = 0; + memset(BGCnt, 0, sizeof(BGCnt)); + BlendCnt = 0; + EVA = 0; EVB = 0; EVY = 0; + + memset(BGVRAMRange, 0xFF, sizeof(BGVRAMRange)); + + LayerConfigDirty = true; + + LastSpriteLine = 0; + memset(OAM, 0, sizeof(OAM)); + NumSprites = 0; + SpriteUseMosaic = false; + + SpriteDispCnt = 0; + SpriteConfigDirty = true; + SpriteDirty = true; + + memset(TempPalBuffer, 0, sizeof(TempPalBuffer)); +} + +void GLRenderer2D::PostSavestate() +{ + Reset(); +} + + +void GLRenderer2D::SetScaleFactor(int scale) +{ + if (scale == ScaleFactor) + return; + + ScaleFactor = scale; + ScreenW = 256 * scale; + ScreenH = 192 * scale; + + const GLenum fbassign2[] = {GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1}; + + glUseProgram(CompositorShader); + glUniform1i(CompositorScaleULoc, ScaleFactor); + + glBindTexture(GL_TEXTURE_2D_ARRAY, OBJLayerTex); + glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA, ScreenW, ScreenH, 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + + glBindTexture(GL_TEXTURE_2D, OBJDepthTex); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, ScreenW, ScreenH, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, nullptr); + + glBindFramebuffer(GL_FRAMEBUFFER, OBJLayerFB); + glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, OBJLayerTex, 0, 0); + glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, OBJLayerTex, 0, 1); + glFramebufferTexture(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, OBJDepthTex, 0); + glDrawBuffers(2, fbassign2); + + glBindTexture(GL_TEXTURE_2D, OutputTex); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + + glBindFramebuffer(GL_FRAMEBUFFER, OutputFB); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, OutputTex, 0); + glDrawBuffer(GL_COLOR_ATTACHMENT0); +} + + +bool GLRenderer2D::IsScreenOn() +{ + if (!GPU.ScreensEnabled) return false; + if (!GPU2D.Enabled) return false; + if (GPU2D.ForcedBlank) return false; + + u16 masterbright = GPU2D.Num ? GPU.MasterBrightnessB : GPU.MasterBrightnessA; + u16 brightmode = masterbright >> 14; + u16 brightness = masterbright & 0x1F; + if ((brightmode == 1 || brightmode == 2) && brightness >= 16) + return false; + + u16 layers = GPU2D.LayerEnable | 0x20; + u16 bldeffect = (GPU2D.BlendCnt >> 6) & 0x3; + u16 bldlayers = GPU2D.BlendCnt & layers & 0x3F; + if ((bldeffect == 2 || bldeffect == 3) && bldlayers == layers && GPU2D.EVY >= 16 && + !(GPU2D.DispCnt & 0xE000)) + return false; + + u32 dispmode = (GPU2D.DispCnt >> 16) & 0x3; + if (dispmode != 1) + { + if (GPU2D.Num) return false; + if (!GPU.CaptureEnable) return false; + } + + return true; +} + + +void GLRenderer2D::UpdateAndRender(int line) +{ + u32 palmask = 1 << (GPU2D.Num * 2); + + // check if any 'critical' registers were modified + + u32 dispcnt_diff; + u8 layer_diff; + u16 bgcnt_diff[4]; + + dispcnt_diff = GPU2D.DispCnt ^ DispCnt; + layer_diff = GPU2D.LayerEnable ^ LayerEnable; + for (int layer = 0; layer < 4; layer++) + bgcnt_diff[layer] = GPU2D.BGCnt[layer] ^ BGCnt[layer]; + + u8 layer_pre_dirty = 0; + bool comp_dirty = false; + bool screenon = IsScreenOn(); + + if (dispcnt_diff & 0x8) + layer_pre_dirty |= 0x1; + if (dispcnt_diff & 0x7) + layer_pre_dirty |= 0xC; + if (dispcnt_diff & 0x7F000000) + layer_pre_dirty |= 0xF; + + if (dispcnt_diff & 0x0000E008) + comp_dirty = true; + else if (layer_diff & 0x1F) + comp_dirty = true; + else if (UnitEnabled != GPU2D.Enabled) + comp_dirty = true; + else if (ForcedBlank != GPU2D.ForcedBlank) + comp_dirty = true; + + for (int layer = 0; layer < 4; layer++) + { + u16 mask = 0xDFBC; + if (layer < 2) mask |= (1 << 13); + if (bgcnt_diff[layer] & mask) + layer_pre_dirty |= (1 << layer); + if (bgcnt_diff[layer] & (~mask)) + comp_dirty = true; + } + + if ((GPU2D.BlendCnt != BlendCnt) || + (GPU2D.EVA != EVA) || + (GPU2D.EVB != EVB) || + (GPU2D.EVY != EVY)) + comp_dirty = true; + + // check if VRAM was modified, and flatten it as needed + + static_assert(VRAMDirtyGranularity == 512); + NonStupidBitField<1024> bgDirty; + NonStupidBitField<64> bgExtPalDirty; + NonStupidBitField<16> objExtPalDirty; + + if (screenon) + { + if (GPU2D.Num == 0) + { + bgDirty = GPU.VRAMDirty_ABG.DeriveState(GPU.VRAMMap_ABG, GPU); + GPU.MakeVRAMFlat_ABGCoherent(bgDirty); + + bgExtPalDirty = GPU.VRAMDirty_ABGExtPal.DeriveState(GPU.VRAMMap_ABGExtPal, GPU); + GPU.MakeVRAMFlat_ABGExtPalCoherent(bgExtPalDirty); + objExtPalDirty = GPU.VRAMDirty_AOBJExtPal.DeriveState(&GPU.VRAMMap_AOBJExtPal, GPU); + GPU.MakeVRAMFlat_AOBJExtPalCoherent(objExtPalDirty); + } + else + { + auto _bgDirty = GPU.VRAMDirty_BBG.DeriveState(GPU.VRAMMap_BBG, GPU); + GPU.MakeVRAMFlat_BBGCoherent(_bgDirty); + for (int i = 0; i < 1024; i += 256) + memcpy(&bgDirty.Data[i>>6], _bgDirty.Data, 256>>3); + + bgExtPalDirty = GPU.VRAMDirty_BBGExtPal.DeriveState(GPU.VRAMMap_BBGExtPal, GPU); + GPU.MakeVRAMFlat_BBGExtPalCoherent(bgExtPalDirty); + objExtPalDirty = GPU.VRAMDirty_BOBJExtPal.DeriveState(&GPU.VRAMMap_BOBJExtPal, GPU); + GPU.MakeVRAMFlat_BOBJExtPalCoherent(objExtPalDirty); + } + } + + // for each layer, check if the VRAM and palettes involved are dirty + + for (int layer = 0; layer < 4; layer++) + { + const u32* rangeinfo = BGVRAMRange[layer]; + + // to consider: only check the tileset range that is actually used + // (would require parsing the tilemap) + for (int r = 0; r < 4; r+=2) + { + if (rangeinfo[r] == 0xFFFFFFFF) + continue; + + bool dirty = false; + u32 rstart = (rangeinfo[r] >> 9) & 0x3FF; + u32 rcount = (rangeinfo[r+1] >> 9); + if ((rstart + rcount) > 1024) + { + dirty = bgDirty.CheckRange(rstart, 1024-rstart) || + bgDirty.CheckRange(0, rcount-(1024-rstart)); + } + else + dirty = bgDirty.CheckRange(rstart, rcount); + + if (dirty) + layer_pre_dirty |= (1 << layer); + } + + auto& cfg = LayerConfig.uBGConfig[layer]; + if ((cfg.Type == 1 || cfg.Type == 3) && (cfg.PalOffset > 0)) + { + u32 pal = cfg.PalOffset - 1; + if (bgExtPalDirty.CheckRange(pal, pal + 16)) + layer_pre_dirty |= (1 << layer); + } + else if (cfg.Type <= 4) + { + if (GPU.PaletteDirty & palmask) + layer_pre_dirty |= (1 << layer); + } + } + + if (layer_pre_dirty) + comp_dirty = true; + + if (Parent.NeedPartialRender) + comp_dirty = true; + + // if needed, render sprites + + if ((comp_dirty || SpriteDirty) && (line > 0)) + { + DoRenderSprites(line); + } + + // if needed, composite the previous screen section + + if (comp_dirty && (line > 0)) + { + RenderScreen(LastLine, line); + LastLine = line; + } + + // update registers + + UnitEnabled = GPU2D.Enabled; + DispCnt = GPU2D.DispCnt; + LayerEnable = GPU2D.LayerEnable; + OBJEnable = GPU2D.OBJEnable; + ForcedBlank = GPU2D.ForcedBlank; + for (int layer = 0; layer < 4; layer++) + BGCnt[layer] = GPU2D.BGCnt[layer]; + BlendCnt = GPU2D.BlendCnt; + EVA = GPU2D.EVA; + EVB = GPU2D.EVB; + EVY = GPU2D.EVY; + + if (layer_pre_dirty || LayerConfigDirty) + UpdateLayerConfig(); + + UpdateScanlineConfig(line); + + // update VRAM and palettes + + int dirtybits = GPU2D.Num ? 256 : 1024; + if (bgDirty.CheckRange(0, dirtybits)) + { + // TODO: only do it for active layers? + // this would require keeping track of the dirty state for areas not included in any layer + + u8 *vram; + u32 vrammask; + GPU2D.GetBGVRAM(vram, vrammask); + + glBindTexture(GL_TEXTURE_2D, VRAMTex_BG); + + int texlen = dirtybits >> 6; + for (int i = 0; i < texlen; ) + { + if (!bgDirty.Data[i]) + { + i++; + continue; + } + + int start = i * 32; + for (;;) + { + i++; + if (i >= texlen) break; + if (!bgDirty.Data[i]) break; + } + int end = i * 32; + + glTexSubImage2D(GL_TEXTURE_2D, 0, + 0, start, + 1024, end - start, + GL_RED_INTEGER, GL_UNSIGNED_BYTE, + &vram[start * 1024]); + } + } + + if ((GPU.PaletteDirty & palmask) || bgExtPalDirty.CheckRange(0, 64)) + { + memcpy(&TempPalBuffer[0], &GPU.Palette[GPU2D.Num ? 0x400 : 0], 256*2); + for (int s = 0; s < 4; s++) + { + for (int p = 0; p < 16; p++) + { + u16 *pal = GPU2D.GetBGExtPal(s, p); + memcpy(&TempPalBuffer[(1 + ((s*16)+p)) * 256], pal, 256*2); + } + } + + glBindTexture(GL_TEXTURE_2D, PalTex_BG); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 1+(4*16), GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, + TempPalBuffer); + } + + GPU.PaletteDirty &= ~palmask; + + if (layer_pre_dirty) + { + // pre-render BG layers with the new settings + + glUseProgram(LayerPreShader); + + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_BLEND); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDepthMask(GL_FALSE); + + glBindBufferBase(GL_UNIFORM_BUFFER, 20, LayerConfigUBO); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, VRAMTex_BG); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, PalTex_BG); + + for (int layer = 0; layer < 4; layer++) + { + if (!(layer_pre_dirty & (1 << layer))) + continue; + + PrerenderLayer(layer); + } + } + + if (SpriteDirty) + { + // OAM and VRAM have already been updated prior + // palette needs to be updated here though + + // TODO make this only do it over the required subsection? + NumSprites = 0; + SpriteUseMosaic = false; + UpdateOAM(0, 192); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, VRAMTex_OBJ); + + memcpy(&TempPalBuffer[0], &GPU.Palette[GPU2D.Num ? 0x600 : 0x200], 256*2); + { + u16* pal = GPU2D.GetOBJExtPal(); + memcpy(&TempPalBuffer[256], pal, 256*16*2); + } + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, PalTex_OBJ); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 1+16, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, TempPalBuffer); + + PrerenderSprites(); + + LastSpriteLine = line; + } + + LayerConfigDirty = false; + SpriteDirty = false; +} + + +void GLRenderer2D::DrawScanline(u32 line) +{ + UpdateAndRender(line); +} + +void GLRenderer2D::VBlank() +{ + DoRenderSprites(192); + RenderScreen(LastLine, 192); + + LastSpriteLine = 0; + LastLine = 0; +} + +void GLRenderer2D::VBlankEnd() +{ +} + + +void GLRenderer2D::UpdateScanlineConfig(int line) +{ + auto& cfg = ScanlineConfig.uScanline[line]; + + // update BG layer coordinates + // Y coordinates are adjusted to account for vertical mosaic + // horizontal mosaic will be done during compositing + + u32 bgmode = DispCnt & 0x7; + bool xmosaic = (GPU2D.BGMosaicSize[0] > 0); + + if (DispCnt & (1<<3)) + { + // 3D layer + int xpos = GPU.GPU3D.GetRenderXPos() & 0x1FF; + cfg.BGOffset[0][0] = xpos - ((xpos & 0x100) << 1); + cfg.BGOffset[0][1] = line; + cfg.BGMosaicEnable[0] = false; + } + else + { + // text layer + cfg.BGOffset[0][0] = GPU2D.BGXPos[0]; + if (GPU2D.BGCnt[0] & (1<<6)) + { + cfg.BGOffset[0][1] = GPU2D.BGYPos[0] + GPU2D.BGMosaicLine; + cfg.BGMosaicEnable[0] = xmosaic; + } + else + { + cfg.BGOffset[0][1] = GPU2D.BGYPos[0] + line; + cfg.BGMosaicEnable[0] = false; + } + } + + // always a text layer + cfg.BGOffset[1][0] = GPU2D.BGXPos[1]; + if (GPU2D.BGCnt[1] & (1<<6)) + { + cfg.BGOffset[1][1] = GPU2D.BGYPos[1] + GPU2D.BGMosaicLine; + cfg.BGMosaicEnable[1] = xmosaic; + } + else + { + cfg.BGOffset[1][1] = GPU2D.BGYPos[1] + line; + cfg.BGMosaicEnable[1] = false; + } + + if ((bgmode == 2) || (bgmode >= 4 && bgmode <= 6)) + { + // rotscale layer + cfg.BGOffset[2][0] = GPU2D.BGXRefInternal[0]; + cfg.BGOffset[2][1] = GPU2D.BGYRefInternal[0]; + cfg.BGRotscale[0][0] = GPU2D.BGRotA[0]; + cfg.BGRotscale[0][1] = GPU2D.BGRotB[0]; + cfg.BGRotscale[0][2] = GPU2D.BGRotC[0]; + cfg.BGRotscale[0][3] = GPU2D.BGRotD[0]; + } + else + { + // text layer + cfg.BGOffset[2][0] = GPU2D.BGXPos[2]; + if (GPU2D.BGCnt[2] & (1<<6)) + cfg.BGOffset[2][1] = GPU2D.BGYPos[2] + GPU2D.BGMosaicLine; + else + cfg.BGOffset[2][1] = GPU2D.BGYPos[2] + line; + } + + if (GPU2D.BGCnt[2] & (1<<6)) + cfg.BGMosaicEnable[2] = xmosaic; + else + cfg.BGMosaicEnable[2] = false; + + if (bgmode >= 1 && bgmode <= 5) + { + // rotscale layer + cfg.BGOffset[3][0] = GPU2D.BGXRefInternal[1]; + cfg.BGOffset[3][1] = GPU2D.BGYRefInternal[1]; + cfg.BGRotscale[1][0] = GPU2D.BGRotA[1]; + cfg.BGRotscale[1][1] = GPU2D.BGRotB[1]; + cfg.BGRotscale[1][2] = GPU2D.BGRotC[1]; + cfg.BGRotscale[1][3] = GPU2D.BGRotD[1]; + } + else + { + // text layer + cfg.BGOffset[3][0] = GPU2D.BGXPos[3]; + if (GPU2D.BGCnt[3] & (1<<6)) + cfg.BGOffset[3][1] = GPU2D.BGYPos[3] + GPU2D.BGMosaicLine; + else + cfg.BGOffset[3][1] = GPU2D.BGYPos[3] + line; + } + + if (GPU2D.BGCnt[3] & (1<<6)) + cfg.BGMosaicEnable[3] = xmosaic; + else + cfg.BGMosaicEnable[3] = false; + + u16* pal = (u16*)&GPU.Palette[GPU2D.Num ? 0x400 : 0]; + cfg.BackColor = pal[0]; + + // mosaic + + cfg.MosaicSize[0] = GPU2D.BGMosaicSize[0]; + cfg.MosaicSize[1] = GPU2D.BGMosaicSize[1]; + cfg.MosaicSize[2] = GPU2D.OBJMosaicSize[0]; + cfg.MosaicSize[3] = GPU2D.OBJMosaicSize[1]; + + // windows + + //cfg.WinRegs = GPU2D.WinCnt[2] | (GPU2D.WinCnt[3] << 8) | (GPU2D.WinCnt[1] << 16) | (GPU2D.WinCnt[0] << 24); + if (GPU2D.DispCnt & 0xE000) + cfg.WinRegs = GPU2D.WinCnt[2]; + else + cfg.WinRegs = 0xFF; + + if (GPU2D.DispCnt & (1<<15)) + cfg.WinRegs |= (GPU2D.WinCnt[3] << 8); + else + cfg.WinRegs |= 0xFF00; + + if (GPU2D.DispCnt & (1<<14)) + cfg.WinRegs |= (GPU2D.WinCnt[1] << 16); + else + cfg.WinRegs |= 0xFF0000; + + if (GPU2D.DispCnt & (1<<13)) + cfg.WinRegs |= (GPU2D.WinCnt[0] << 24); + else + cfg.WinRegs |= 0xFF000000; + + cfg.WinMask = 0; + + if ((GPU2D.DispCnt & (1<<13)) && (GPU2D.Win0Active & 0x1)) + { + int x0 = GPU2D.Win0Coords[0]; + int x1 = GPU2D.Win0Coords[1]; + + if (x0 <= x1) + { + cfg.WinPos[0] = x0; + cfg.WinPos[1] = x1; + if (GPU2D.Win0Active == 0x3) + cfg.WinMask |= (1<<0); + cfg.WinMask |= (1<<1); + GPU2D.Win0Active &= ~0x2; + } + else + { + cfg.WinPos[0] = x1; + cfg.WinPos[1] = x0; + if (GPU2D.Win0Active == 0x3) + cfg.WinMask |= (1<<0); + cfg.WinMask |= (1<<2); + GPU2D.Win0Active |= 0x2; + } + } + else + { + cfg.WinPos[0] = 256; + cfg.WinPos[1] = 256; + } + + if ((GPU2D.DispCnt & (1<<14)) && (GPU2D.Win1Active & 0x1)) + { + int x0 = GPU2D.Win1Coords[0]; + int x1 = GPU2D.Win1Coords[1]; + + if (x0 <= x1) + { + cfg.WinPos[2] = x0; + cfg.WinPos[3] = x1; + if (GPU2D.Win1Active == 0x3) + cfg.WinMask |= (1<<3); + cfg.WinMask |= (1<<4); + GPU2D.Win1Active &= ~0x2; + } + else + { + cfg.WinPos[2] = x1; + cfg.WinPos[3] = x0; + if (GPU2D.Win1Active == 0x3) + cfg.WinMask |= (1<<3); + cfg.WinMask |= (1<<5); + GPU2D.Win1Active |= 0x2; + } + } + else + { + cfg.WinPos[2] = 256; + cfg.WinPos[3] = 256; + } +}; + +void GLRenderer2D::UpdateLayerConfig() +{ + // determine which parts of VRAM were used for captures + int capturemask = GPU2D.Num ? 0x7 : 0x1F; + int captureinfo[32]; + GPU2D.GetCaptureInfo_BG(captureinfo); + + u32 tilebase, mapbase; + if (!GPU2D.Num) + { + tilebase = ((GPU2D.DispCnt >> 24) & 0x7) << 16; + mapbase = ((GPU2D.DispCnt >> 27) & 0x7) << 16; + } + else + { + tilebase = 0; + mapbase = 0; + } + + int layertype[4] = {1, 1, 0, 0}; + switch (GPU2D.DispCnt & 0x7) + { + case 0: layertype[2] = 1; layertype[3] = 1; break; + case 1: layertype[2] = 1; layertype[3] = 2; break; + case 2: layertype[2] = 2; layertype[3] = 2; break; + case 3: layertype[2] = 1; layertype[3] = 3; break; + case 4: layertype[2] = 2; layertype[3] = 3; break; + case 5: layertype[2] = 3; layertype[3] = 3; break; + case 6: layertype[0] = 0; layertype[1] = 0; + layertype[2] = 4; layertype[3] = 0; break; + case 7: layertype[2] = 0; layertype[3] = 0; break; + } + + for (int layer = 0; layer < 4; layer++) + { + int type = layertype[layer]; + if (!type) + continue; + + u16 bgcnt = GPU2D.BGCnt[layer]; + auto& cfg = LayerConfig.uBGConfig[layer]; + + cfg.TileOffset = tilebase + (((bgcnt >> 2) & 0xF) << 14); + cfg.MapOffset = mapbase + (((bgcnt >> 8) & 0x1F) << 11); + cfg.PalOffset = 0; + + BGVRAMRange[layer][0] = cfg.TileOffset; + BGVRAMRange[layer][2] = cfg.MapOffset; + + if ((layer == 0) && (GPU2D.DispCnt & (1<<3))) + { + // 3D layer + + cfg.Size[0] = 256; cfg.Size[1] = 192; + cfg.Type = 6; + cfg.Clamp = 1; + + BGVRAMRange[layer][0] = 0xFFFFFFFF; + BGVRAMRange[layer][1] = 0xFFFFFFFF; + BGVRAMRange[layer][2] = 0xFFFFFFFF; + BGVRAMRange[layer][3] = 0xFFFFFFFF; + } + else if (type == 1) + { + // text layer + + u32 tilesz, mapsz; + switch (bgcnt >> 14) + { + case 0: cfg.Size[0] = 256; cfg.Size[1] = 256; mapsz = 0x800; break; + case 1: cfg.Size[0] = 512; cfg.Size[1] = 256; mapsz = 0x1000; break; + case 2: cfg.Size[0] = 256; cfg.Size[1] = 512; mapsz = 0x1000; break; + case 3: cfg.Size[0] = 512; cfg.Size[1] = 512; mapsz = 0x2000; break; + } + + if (bgcnt & (1<<7)) + { + // 256-color + cfg.Type = 1; + if (DispCnt & (1<<30)) + { + // extended palette + int paloff = layer; + if ((layer < 2) && (bgcnt & (1<<13))) + paloff += 2; + cfg.PalOffset = 1 + (16 * paloff); + } + + tilesz = 0x10000; + } + else + { + // 16-color + cfg.Type = 0; + + tilesz = 0x8000; + } + + cfg.Clamp = 0; + + int n = BGBaseIndex[0][bgcnt >> 14] + layer; + BGLayerTex[layer] = AllBGLayerTex[n]; + BGLayerFB[layer] = AllBGLayerFB[n]; + + BGVRAMRange[layer][1] = tilesz; + BGVRAMRange[layer][3] = mapsz; + } + else if (type == 2) + { + // affine layer + + u32 mapsz; + switch (bgcnt >> 14) + { + case 0: cfg.Size[0] = 128; cfg.Size[1] = 128; mapsz = 0x100; break; + case 1: cfg.Size[0] = 256; cfg.Size[1] = 256; mapsz = 0x400; break; + case 2: cfg.Size[0] = 512; cfg.Size[1] = 512; mapsz = 0x1000; break; + case 3: cfg.Size[0] = 1024; cfg.Size[1] = 1024; mapsz = 0x4000; break; + } + + cfg.Type = 2; + cfg.Clamp = !(bgcnt & (1<<13)); + + int n = BGBaseIndex[1][bgcnt >> 14] + layer - 2; + BGLayerTex[layer] = AllBGLayerTex[n]; + BGLayerFB[layer] = AllBGLayerFB[n]; + + BGVRAMRange[layer][1] = 0x4000; + BGVRAMRange[layer][3] = mapsz; + } + else if (type == 3) + { + // extended layer + + if (bgcnt & (1<<7)) + { + // bitmap modes + + u32 mapsz; + switch (bgcnt >> 14) + { + case 0: cfg.Size[0] = 128; cfg.Size[1] = 128; mapsz = 0x4000; break; + case 1: cfg.Size[0] = 256; cfg.Size[1] = 256; mapsz = 0x10000; break; + case 2: cfg.Size[0] = 512; cfg.Size[1] = 256; mapsz = 0x20000; break; + case 3: cfg.Size[0] = 512; cfg.Size[1] = 512; mapsz = 0x40000; break; + } + + u32 tileoffset = 0; + u32 mapoffset = ((bgcnt >> 8) & 0x1F) << 14; + + BGVRAMRange[layer][0] = 0xFFFFFFFF; + BGVRAMRange[layer][1] = 0xFFFFFFFF; + BGVRAMRange[layer][2] = mapoffset; + BGVRAMRange[layer][3] = mapsz; + + if (bgcnt & (1<<2)) + { + mapsz <<= 1; + + int capblock = -1; + if ((cfg.Size[0] == 128) || (cfg.Size[0] == 256)) + { + // if this is a direct color bitmap, and the width is 128 or 256 + // then it might be a display capture + u32 startaddr = mapoffset; + u32 endaddr = startaddr + mapsz; + + startaddr >>= 14; + endaddr = (endaddr + 0x3FFF) >> 14; + + for (u32 b = startaddr; b < endaddr; b++) + { + int blk = captureinfo[b & capturemask]; + if (blk == -1) continue; + + capblock = blk; + } + } + + if (capblock != -1) + { + if (cfg.Size[0] == 128) + { + cfg.Type = 7; + tileoffset = capblock; + mapoffset = (mapoffset >> 8) & 0x7F; + } + else + { + cfg.Type = 8; + tileoffset = capblock >> 2; + mapoffset = (mapoffset >> 9) & 0xFF; + } + } + else + cfg.Type = 5; + } + else + cfg.Type = 4; + + cfg.TileOffset = tileoffset; + cfg.MapOffset = mapoffset; + + int n = BGBaseIndex[2][bgcnt >> 14] + layer - 2; + BGLayerTex[layer] = AllBGLayerTex[n]; + BGLayerFB[layer] = AllBGLayerFB[n]; + } + else + { + // rotscale w/ tiles + + u32 mapsz; + switch (bgcnt >> 14) + { + case 0: cfg.Size[0] = 128; cfg.Size[1] = 128; mapsz = 0x200; break; + case 1: cfg.Size[0] = 256; cfg.Size[1] = 256; mapsz = 0x800; break; + case 2: cfg.Size[0] = 512; cfg.Size[1] = 512; mapsz = 0x2000; break; + case 3: cfg.Size[0] = 1024; cfg.Size[1] = 1024; mapsz = 0x8000; break; + } + + // this layer type is always 256-color + cfg.Type = 3; + if (DispCnt & (1<<30)) + { + // extended palette + int paloff = layer; + if ((layer < 2) && (bgcnt & (1<<13))) + paloff += 2; + cfg.PalOffset = 1 + (16 * paloff); + } + + int n = BGBaseIndex[1][bgcnt >> 14] + layer - 2; + BGLayerTex[layer] = AllBGLayerTex[n]; + BGLayerFB[layer] = AllBGLayerFB[n]; + + BGVRAMRange[layer][1] = 0x10000; + BGVRAMRange[layer][3] = mapsz; + } + + cfg.Clamp = !(bgcnt & (1<<13)); + } + else //if (type == 4) + { + // large layer + + u32 mapsz; + switch (bgcnt >> 14) + { + case 0: cfg.Size[0] = 512; cfg.Size[1] = 1024; mapsz = 0x80000; break; + case 1: cfg.Size[0] = 1024; cfg.Size[1] = 512; mapsz = 0x80000; break; + case 2: cfg.Size[0] = 512; cfg.Size[1] = 256; mapsz = 0x20000; break; + case 3: cfg.Size[0] = 512; cfg.Size[1] = 512; mapsz = 0x40000; break; + } + + cfg.Type = 4; + cfg.TileOffset = 0; + cfg.MapOffset = 0; + cfg.Clamp = !(bgcnt & (1<<13)); + + int n = BGBaseIndex[3][bgcnt >> 14]; + BGLayerTex[layer] = AllBGLayerTex[n]; + BGLayerFB[layer] = AllBGLayerFB[n]; + + BGVRAMRange[layer][0] = 0xFFFFFFFF; + BGVRAMRange[layer][1] = 0xFFFFFFFF; + BGVRAMRange[layer][3] = mapsz; + } + } + + glBindBuffer(GL_UNIFORM_BUFFER, LayerConfigUBO); + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(LayerConfig), &LayerConfig); +} + +void GLRenderer2D::UpdateOAM(int ystart, int yend) +{ + auto& cfg = SpriteConfig; + u16* oam = OAM; + + // determine which parts of VRAM were used for captures + int capturemask = GPU2D.Num ? 0x7 : 0xF; + int captureinfo[16]; + GPU2D.GetCaptureInfo_OBJ(captureinfo); + + for (int i = 0; i < 32; i++) + { + s16* rotscale = (s16*)&oam[(i * 16) + 3]; + auto& rotdst = cfg.uRotscale[i]; + + rotdst[0] = rotscale[0]; + rotdst[1] = rotscale[4]; + rotdst[2] = rotscale[8]; + rotdst[3] = rotscale[12]; + } + + const u8 spritewidth[16] = + { + 8, 16, 8, 8, + 16, 32, 8, 8, + 32, 32, 16, 8, + 64, 64, 32, 8 + }; + const u8 spriteheight[16] = + { + 8, 8, 16, 8, + 16, 8, 32, 8, + 32, 16, 32, 8, + 64, 32, 64, 8 + }; + + for (int sprnum = 0; sprnum < 128; sprnum++) + { + u16* attrib = &oam[sprnum * 4]; + + u32 sprtype = (attrib[0] >> 8) & 0x3; + if (sprtype == 2) // sprite disabled + continue; + + // note on sprite position: + // X > 255 is interpreted as negative (-256..-1) + // Y > 127 is interpreted as both positive (128..255) and negative (-128..-1) + + s32 xpos = (s32)(attrib[1] << 23) >> 23; + s32 ypos = (s32)(attrib[0] << 24) >> 24; + + u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12); + s32 width = spritewidth[sizeparam]; + s32 height = spriteheight[sizeparam]; + s32 boundwidth = width; + s32 boundheight = height; + + if (sprtype == 3) + { + // double-size rotscale sprite + boundwidth <<= 1; + boundheight <<= 1; + } + + if (xpos <= -boundwidth) + continue; + + bool yc0 = ((ypos + boundheight) > ystart) && (ypos < yend); + bool yc1 = (((ypos&0xFF) + boundheight) > ystart) && ((ypos&0xFF) < yend); + if (!(yc0 || yc1)) + continue; + + u32 sprmode = (attrib[0] >> 10) & 0x3; + if (sprmode == 3) + { + if ((GPU2D.DispCnt & 0x60) == 0x60) + continue; + if ((attrib[2] >> 12) == 0) + continue; + } + + if (NumSprites >= 128) + { + Log(LogLevel::Error, "GPU2D_OpenGL: SPRITE BUFFER IS FULL!!!!!\n"); + break; + } + + // add this sprite to the OAM array + + auto& sprcfg = cfg.uOAM[NumSprites]; + + sprcfg.Position[0] = (u32)xpos; + sprcfg.Position[1] = (u32)ypos; + sprcfg.Size[0] = width; + sprcfg.Size[1] = height; + sprcfg.BoundSize[0] = boundwidth; + sprcfg.BoundSize[1] = boundheight; + + if (sprtype & 1) + { + sprcfg.Flip[0] = 0; + sprcfg.Flip[1] = 0; + sprcfg.Rotscale = (attrib[1] >> 9) & 0x1F; + } + else + { + sprcfg.Flip[0] = !!(attrib[1] & (1<<12)); + sprcfg.Flip[1] = !!(attrib[1] & (1<<13)); + sprcfg.Rotscale = (u32)-1; + } + + sprcfg.OBJMode = sprmode; + sprcfg.Mosaic = !!(attrib[0] & (1<<12)) && (sprmode != 2); + sprcfg.BGPrio = (attrib[2] >> 10) & 0x3; + + u32 tilenum = attrib[2] & 0x3FF; + + if (sprmode == 3) + { + // bitmap sprite + + sprcfg.Type = 2; + + if (GPU2D.DispCnt & (1<<6)) + { + // 1D mapping + sprcfg.TileOffset = tilenum << (7 + ((GPU2D.DispCnt >> 22) & 0x1)); + sprcfg.TileStride = width * 2; + } + else + { + bool is256 = !!(GPU2D.DispCnt & (1<<5)); + int capblock = -1; + + u32 tileoffset, tilestride; + if (is256) + { + // 2D mapping, 256 pixels + tileoffset = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); + tilestride = 256 * 2; + } + else + { + // 2D mapping, 128 pixels + tileoffset = ((tilenum & 0x00F) << 4) + ((tilenum & 0x3F0) << 7); + tilestride = 128 * 2; + } + + // if this is a direct color bitmap, and the width is 128 or 256 + // then it might be a display capture + u32 startaddr = tileoffset; + u32 endaddr = startaddr + (height * tilestride); + + startaddr >>= 14; + endaddr = (endaddr + 0x3FFF) >> 14; + + for (u32 b = startaddr; b < endaddr; b++) + { + int blk = captureinfo[b & capturemask]; + if (blk == -1) continue; + + capblock = blk; + } + + if (capblock != -1) + { + if (!is256) + { + sprcfg.Type = 3; + tilestride = capblock; + tileoffset &= 0x7FFF; + } + else + { + sprcfg.Type = 4; + tilestride = capblock >> 2; + tileoffset &= 0x1FFFF; + } + } + + sprcfg.TileOffset = tileoffset; + sprcfg.TileStride = tilestride; + } + + sprcfg.PalOffset = 1 + (attrib[2] >> 12); // alpha + } + else + { + if (GPU2D.DispCnt & (1<<4)) + { + // 1D mapping + sprcfg.TileOffset = tilenum << (5 + ((GPU2D.DispCnt >> 20) & 0x3)); + sprcfg.TileStride = (width >> 3) * 32; + if (attrib[0] & (1<<13)) + sprcfg.TileStride <<= 1; + } + else + { + // 2D mapping + sprcfg.TileOffset = tilenum << 5; + sprcfg.TileStride = 32 * 32; + } + + if (attrib[0] & (1<<13)) + { + // 256-color sprite + sprcfg.Type = 1; + if (GPU2D.DispCnt & (1<<31)) + sprcfg.PalOffset = 1 + (attrib[2] >> 12); + else + sprcfg.PalOffset = 0; + } + else + { + // 16-color sprite + sprcfg.Type = 0; + sprcfg.PalOffset = (attrib[2] >> 12) << 4; + } + } + + NumSprites++; + + if (sprcfg.Mosaic && (GPU2D.OBJMosaicSize[0] > 0)) + SpriteUseMosaic = true; + } + + glBindBuffer(GL_UNIFORM_BUFFER, SpriteConfigUBO); + glBufferSubData(GL_UNIFORM_BUFFER, + 0, + offsetof(sSpriteConfig, uOAM) + (NumSprites * sizeof(cfg.uOAM[0])), + &cfg); +} + +void GLRenderer2D::UpdateCompositorConfig() +{ + // compositor info buffer + for (int i = 0; i < 4; i++) + CompositorConfig.uBGPrio[i] = -1; + + for (int layer = 0; layer < 4; layer++) + { + if (!(LayerEnable & (1 << layer))) + continue; + + int prio = BGCnt[layer] & 0x3; + CompositorConfig.uBGPrio[layer] = prio; + } + + CompositorConfig.uEnableOBJ = !!(LayerEnable & (1<<4)); + + CompositorConfig.uEnable3D = !!(DispCnt & (1<<3)); + + CompositorConfig.uBlendCnt = BlendCnt; + CompositorConfig.uBlendEffect = (BlendCnt >> 6) & 0x3; + CompositorConfig.uBlendCoef[0] = EVA; + CompositorConfig.uBlendCoef[1] = EVB; + CompositorConfig.uBlendCoef[2] = EVY; + + glBindBuffer(GL_UNIFORM_BUFFER, CompositorConfigUBO); + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(CompositorConfig), &CompositorConfig); +} + + +void GLRenderer2D::PrerenderSprites() +{ + u16* vtxbuf = SpritePreVtxData; + int vtxnum = 0; + + for (int i = 0; i < NumSprites; i++) + { + auto& sprite = SpriteConfig.uOAM[i]; + if (sprite.Type >= 3) + continue; + + *vtxbuf++ = 0; *vtxbuf++ = 1; *vtxbuf++ = i; + *vtxbuf++ = 1; *vtxbuf++ = 0; *vtxbuf++ = i; + *vtxbuf++ = 1; *vtxbuf++ = 1; *vtxbuf++ = i; + *vtxbuf++ = 0; *vtxbuf++ = 1; *vtxbuf++ = i; + *vtxbuf++ = 0; *vtxbuf++ = 0; *vtxbuf++ = i; + *vtxbuf++ = 1; *vtxbuf++ = 0; *vtxbuf++ = i; + vtxnum += 6; + } + + if (vtxnum == 0) return; + + glUseProgram(SpritePreShader); + + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_BLEND); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDepthMask(GL_FALSE); + + glBindBufferBase(GL_UNIFORM_BUFFER, 21, SpriteConfigUBO); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, SpriteFB); + glViewport(0, 0, 1024, 512); + + glBindBuffer(GL_ARRAY_BUFFER, SpritePreVtxBuffer); + glBufferSubData(GL_ARRAY_BUFFER, 0, vtxnum * 3 * sizeof(u16), SpritePreVtxData); + + glBindVertexArray(SpritePreVtxArray); + glDrawArrays(GL_TRIANGLES, 0, vtxnum); +} + +void GLRenderer2D::PrerenderLayer(int layer) +{ + auto& cfg = LayerConfig.uBGConfig[layer]; + + if (cfg.Type >= 6) + return; + + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, BGLayerFB[layer]); + + glUniform1i(LayerPreCurBGULoc, layer); + + // set layer size + glViewport(0, 0, cfg.Size[0], cfg.Size[1]); + + glBindBuffer(GL_ARRAY_BUFFER, Parent.RectVtxBuffer); + glBindVertexArray(Parent.RectVtxArray); + glDrawArrays(GL_TRIANGLES, 0, 2*3); +} + + +void GLRenderer2D::DoRenderSprites(int line) +{ + int ystart = LastSpriteLine; + int yend = line; + + glUseProgram(SpriteShader); + + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_BLEND); + + glBindBufferBase(GL_UNIFORM_BUFFER, 21, SpriteConfigUBO); + glBindBufferBase(GL_UNIFORM_BUFFER, 24, SpriteScanlineConfigUBO); + + glBindBuffer(GL_UNIFORM_BUFFER, SpriteScanlineConfigUBO); + glBufferSubData(GL_UNIFORM_BUFFER, + ystart * sizeof(s32), + (yend - ystart) * sizeof(s32), + &SpriteScanlineConfig.uMosaicLine[ystart]); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, OBJLayerFB); + glViewport(0, 0, ScreenW, ScreenH); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, SpriteTex); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D_ARRAY, Parent.CaptureOutput128Tex); + + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D_ARRAY, Parent.CaptureOutput256Tex); + + glEnable(GL_SCISSOR_TEST); + glScissor(0, ystart * ScaleFactor, ScreenW, (yend-ystart) * ScaleFactor); + + // NOTE + // this requires two passes for mosaic emulation, because mosaic flags get set for + // transparent pixels too, and priority is only checked against opaque pixels + + glClearColor(0, 0, 0, 0); + glClearDepth(1); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glColorMaski(1, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDepthMask(GL_TRUE); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + glColorMaski(0, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glDepthMask(GL_FALSE); + + if (SpriteUseMosaic) + { + glUniform1i(SpriteRenderTransULoc, 1); + glColorMaski(1, GL_FALSE, GL_TRUE, GL_FALSE, GL_TRUE); + + RenderSprites(false, ystart, yend); + } + + glUniform1i(SpriteRenderTransULoc, 0); + glColorMaski(1, GL_FALSE, GL_FALSE, GL_TRUE, GL_FALSE); + + RenderSprites(true, ystart, yend); + + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_LESS); + glDepthMask(GL_TRUE); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glColorMaski(1, GL_TRUE, GL_TRUE, GL_FALSE, GL_TRUE); + + RenderSprites(false, ystart, yend); + + glDisable(GL_SCISSOR_TEST); +} + +void GLRenderer2D::RenderSprites(bool window, int ystart, int yend) +{ + if (window) + { + if (!(GPU2D.DispCnt & (1<<15))) + return; + } + + u16* vtxbuf = SpriteVtxData; + int vtxnum = 0; + + for (int i = 0; i < NumSprites; i++) + { + auto& sprite = SpriteConfig.uOAM[i]; + + bool iswin = (sprite.OBJMode == 2); + if (iswin != window) + continue; + + s32 xpos = sprite.Position[0]; + s32 ypos = sprite.Position[1]; + s32 boundwidth = sprite.BoundSize[0]; + s32 boundheight = sprite.BoundSize[1]; + + bool yc0 = ((ypos + boundheight) > ystart) && (ypos < yend); + bool yc1 = (((ypos&0xFF) + boundheight) > ystart) && ((ypos&0xFF) < yend); + + if (yc0) + { + s32 x0 = xpos, x1 = xpos + boundwidth; + s32 y0 = ypos, y1 = ypos + boundheight; + + *vtxbuf++ = x0; *vtxbuf++ = y1; *vtxbuf++ = 0; *vtxbuf++ = 1; *vtxbuf++ = i; + *vtxbuf++ = x1; *vtxbuf++ = y0; *vtxbuf++ = 1; *vtxbuf++ = 0; *vtxbuf++ = i; + *vtxbuf++ = x1; *vtxbuf++ = y1; *vtxbuf++ = 1; *vtxbuf++ = 1; *vtxbuf++ = i; + *vtxbuf++ = x0; *vtxbuf++ = y1; *vtxbuf++ = 0; *vtxbuf++ = 1; *vtxbuf++ = i; + *vtxbuf++ = x0; *vtxbuf++ = y0; *vtxbuf++ = 0; *vtxbuf++ = 0; *vtxbuf++ = i; + *vtxbuf++ = x1; *vtxbuf++ = y0; *vtxbuf++ = 1; *vtxbuf++ = 0; *vtxbuf++ = i; + vtxnum += 6; + } + + if (yc1) + { + ypos &= 0xFF; + s32 x0 = xpos, x1 = xpos + boundwidth; + s32 y0 = ypos, y1 = ypos + boundheight; + + *vtxbuf++ = x0; *vtxbuf++ = y1; *vtxbuf++ = 0; *vtxbuf++ = 1; *vtxbuf++ = i; + *vtxbuf++ = x1; *vtxbuf++ = y0; *vtxbuf++ = 1; *vtxbuf++ = 0; *vtxbuf++ = i; + *vtxbuf++ = x1; *vtxbuf++ = y1; *vtxbuf++ = 1; *vtxbuf++ = 1; *vtxbuf++ = i; + *vtxbuf++ = x0; *vtxbuf++ = y1; *vtxbuf++ = 0; *vtxbuf++ = 1; *vtxbuf++ = i; + *vtxbuf++ = x0; *vtxbuf++ = y0; *vtxbuf++ = 0; *vtxbuf++ = 0; *vtxbuf++ = i; + *vtxbuf++ = x1; *vtxbuf++ = y0; *vtxbuf++ = 1; *vtxbuf++ = 0; *vtxbuf++ = i; + vtxnum += 6; + } + } + + if (vtxnum == 0) return; + + glBindBuffer(GL_ARRAY_BUFFER, SpriteVtxBuffer); + glBufferSubData(GL_ARRAY_BUFFER, 0, vtxnum * 5 * sizeof(u16), SpriteVtxData); + + glBindVertexArray(SpriteVtxArray); + glDrawArrays(GL_TRIANGLES, 0, vtxnum); +} + +void GLRenderer2D::RenderScreen(int ystart, int yend) +{ + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, OutputFB); + + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_BLEND); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDepthMask(GL_FALSE); + + glViewport(0, 0, ScreenW, ScreenH); + + glEnable(GL_SCISSOR_TEST); + glScissor(0, ystart * ScaleFactor, ScreenW, (yend-ystart) * ScaleFactor); + + if (ForcedBlank || !UnitEnabled) + { + if (!UnitEnabled) + { + if (GPU2D.Num) + glClearColor(1, 1, 1, 1); + else + glClearColor(0, 0, 0, 1); + } + else + glClearColor(1, 1, 1, 1); + + glClear(GL_COLOR_BUFFER_BIT); + + glDisable(GL_SCISSOR_TEST); + return; + } + + glUseProgram(CompositorShader); + + glBindBufferBase(GL_UNIFORM_BUFFER, 20, LayerConfigUBO); + glBindBufferBase(GL_UNIFORM_BUFFER, 22, ScanlineConfigUBO); + glBindBufferBase(GL_UNIFORM_BUFFER, 23, CompositorConfigUBO); + + glBindBuffer(GL_UNIFORM_BUFFER, ScanlineConfigUBO); + glBufferSubData(GL_UNIFORM_BUFFER, + ystart * sizeof(sScanlineConfig::sScanline), + (yend - ystart) * sizeof(sScanlineConfig::sScanline), + &ScanlineConfig.uScanline[ystart]); + + UpdateCompositorConfig(); + + for (int i = 0; i < 4; i++) + { + glActiveTexture(GL_TEXTURE0 + i); + + if ((i == 0) && (DispCnt & (1<<3))) + glBindTexture(GL_TEXTURE_2D, Parent.OutputTex3D); + else + glBindTexture(GL_TEXTURE_2D, BGLayerTex[i]); + + GLint wrapmode = LayerConfig.uBGConfig[i].Clamp ? GL_CLAMP_TO_BORDER : GL_REPEAT; + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, wrapmode); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, wrapmode); + } + + glActiveTexture(GL_TEXTURE4); + glBindTexture(GL_TEXTURE_2D_ARRAY, OBJLayerTex); + + glActiveTexture(GL_TEXTURE5); + glBindTexture(GL_TEXTURE_2D_ARRAY, Parent.CaptureOutput128Tex); + + glActiveTexture(GL_TEXTURE6); + glBindTexture(GL_TEXTURE_2D_ARRAY, Parent.CaptureOutput256Tex); + + glActiveTexture(GL_TEXTURE7); + glBindTexture(GL_TEXTURE_2D, MosaicTex); + + glBindBuffer(GL_ARRAY_BUFFER, Parent.RectVtxBuffer); + glBindVertexArray(Parent.RectVtxArray); + glDrawArrays(GL_TRIANGLES, 0, 2*3); + + glDisable(GL_SCISSOR_TEST); +} + +void GLRenderer2D::DrawSprites(u32 line) +{ + u32 oammask = 1 << GPU2D.Num; + bool dirty = false; + bool screenon = IsScreenOn(); + + SpriteScanlineConfig.uMosaicLine[line] = GPU2D.OBJMosaicLine; + + u32 dispcnt_diff = GPU2D.DispCnt ^ SpriteDispCnt; + SpriteDispCnt = GPU2D.DispCnt; // TODO CHECKME might not be right to do it here + if (dispcnt_diff & 0x80F000F0) + dirty = true; + + static_assert(VRAMDirtyGranularity == 512); + NonStupidBitField<512> objDirty; + + if (screenon) + { + if (GPU2D.Num == 0) + { + objDirty = GPU.VRAMDirty_AOBJ.DeriveState(GPU.VRAMMap_AOBJ, GPU); + GPU.MakeVRAMFlat_AOBJCoherent(objDirty); + } + else + { + auto _objDirty = GPU.VRAMDirty_BOBJ.DeriveState(GPU.VRAMMap_BOBJ, GPU); + GPU.MakeVRAMFlat_BOBJCoherent(_objDirty); + memcpy(objDirty.Data, _objDirty.Data, 256>>3); + } + } + + u8* vram; u32 vrammask; + GPU2D.GetOBJVRAM(vram, vrammask); + + glBindTexture(GL_TEXTURE_2D, VRAMTex_OBJ); + + int texlen = (GPU2D.Num ? 256 : 512) >> 6; + for (int i = 0; i < texlen; ) + { + if (!objDirty.Data[i]) + { + i++; + continue; + } + + int start = i * 32; + for (;;) + { + i++; + if (i >= texlen) break; + if (!objDirty.Data[i]) break; + } + int end = i * 32; + + glTexSubImage2D(GL_TEXTURE_2D, 0, + 0, start, + 1024, end - start, + GL_RED_INTEGER, GL_UNSIGNED_BYTE, + &vram[start * 1024]); + dirty = true; + } + + if ((GPU.OAMDirty & oammask) || SpriteConfigDirty) + { + memcpy(OAM, &GPU.OAM[GPU2D.Num ? 0x400 : 0], 0x400); + GPU.OAMDirty &= ~oammask; + SpriteConfigDirty = false; + dirty = true; + } + + // DrawScanline() for the next scanline will be called after this + // so it will be able to do the actual sprite rendering + if (dirty) + SpriteDirty = true; +} + +} diff --git a/src/GPU2D_OpenGL.h b/src/GPU2D_OpenGL.h new file mode 100644 index 0000000000..8b9580dbcc --- /dev/null +++ b/src/GPU2D_OpenGL.h @@ -0,0 +1,227 @@ +/* + Copyright 2016-2025 melonDS team + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#pragma once + +#include +#include +#include "OpenGLSupport.h" +#include "GPU2D.h" + +namespace melonDS +{ +class GLRenderer; + +class GLRenderer2D : public Renderer2D +{ +public: + GLRenderer2D(melonDS::GPU2D& gpu2D, GLRenderer& parent); + ~GLRenderer2D() override; + bool Init() override; + void Reset() override; + + void PostSavestate(); + + void SetScaleFactor(int scale); + + void DrawScanline(u32 line) override; + void DrawSprites(u32 line) override; + void VBlank() override; + void VBlankEnd() override; + +private: + friend class GLRenderer; + GLRenderer& Parent; + + int ScaleFactor; + int ScreenW, ScreenH; + + static int ShaderCount; + + static GLuint LayerPreShader; + static GLint LayerPreCurBGULoc; + + GLuint ScanlineConfigUBO; + GLuint SpriteScanlineConfigUBO; + + static GLuint SpritePreShader; + GLuint SpritePreVtxBuffer; + GLuint SpritePreVtxArray; + u16* SpritePreVtxData; + + static GLuint SpriteShader; + static GLint SpriteRenderTransULoc; + GLuint SpriteVtxBuffer; + GLuint SpriteVtxArray; + u16* SpriteVtxData; + + static GLuint CompositorShader; + GLuint CompositorConfigUBO; + static GLint CompositorScaleULoc; + + // base index for a BG layer within the BG texture arrays + // based on BG type and size + const u8 BGBaseIndex[4][4] = { + {2, 10, 6, 14}, // text mode + {0, 4, 16, 20}, // rotscale + {0, 4, 12, 16}, // bitmap + {18, 19, 12, 16}, // large bitmap + }; + + GLuint LayerConfigUBO; + GLuint SpriteConfigUBO; + + GLuint VRAMTex_BG; + GLuint VRAMTex_OBJ; + GLuint PalTex_BG; + GLuint PalTex_OBJ; + + static GLuint MosaicTex; + + GLuint AllBGLayerFB[22]; + GLuint AllBGLayerTex[22]; + + GLuint BGLayerFB[4]; + GLuint BGLayerTex[4]; + + GLuint SpriteFB; + GLuint SpriteTex; + + GLuint OBJLayerFB; + GLuint OBJLayerTex; + GLuint OBJDepthTex; + + GLuint OutputFB; + GLuint OutputTex; + + // std140 compliant config struct for the layer shader + struct sLayerConfig + { + u32 uVRAMMask; + u32 __pad0[3]; + struct sBGConfig + { + u32 Size[2]; + u32 Type; + u32 PalOffset; + u32 TileOffset; + u32 MapOffset; + u32 Clamp; + u32 __pad0[1]; + } uBGConfig[4]; + } LayerConfig; + + struct sSpriteConfig + { + u32 uVRAMMask; + u32 __pad0[3]; + s32 uRotscale[32][4]; + struct sOAM + { + s32 Position[2]; + s32 Flip[2]; + s32 Size[2]; + s32 BoundSize[2]; + u32 OBJMode; + u32 Type; + u32 PalOffset; + u32 TileOffset; + u32 TileStride; + u32 Rotscale; + u32 BGPrio; + u32 Mosaic; + } uOAM[128]; + } SpriteConfig; + int NumSprites; + bool SpriteUseMosaic; + + struct sScanlineConfig + { + struct sScanline + { + s32 BGOffset[4][4]; // really [4][2] + s32 BGRotscale[2][4]; + u32 BackColor; // 96 + u32 WinRegs; // 100 + u32 WinMask; // 104 + u32 __pad0[1]; + s32 WinPos[4]; + u32 BGMosaicEnable[4]; + s32 MosaicSize[4]; + } uScanline[192]; + } ScanlineConfig; + + struct sSpriteScanlineConfig + { + s32 uMosaicLine[192]; + } SpriteScanlineConfig; + + struct sCompositorConfig + { + u32 uBGPrio[4]; + u32 uEnableOBJ; + u32 uEnable3D; + u32 uBlendCnt; + u32 uBlendEffect; + u32 uBlendCoef[4]; + } CompositorConfig; + + int LastLine; + + bool UnitEnabled; + + u32 DispCnt; + u8 LayerEnable; + u8 OBJEnable; + u8 ForcedBlank; + u16 BGCnt[4]; + u16 BlendCnt; + u8 EVA, EVB, EVY; + + u32 BGVRAMRange[4][4]; + + bool LayerConfigDirty; + + int LastSpriteLine; + u16 OAM[512]; + + u32 SpriteDispCnt; + bool SpriteConfigDirty; + bool SpriteDirty; + + u16 TempPalBuffer[256 * (1 + (4*16))]; + + bool IsScreenOn(); + + void UpdateAndRender(int line); + + void UpdateScanlineConfig(int line); + void UpdateLayerConfig(); + void UpdateOAM(int ystart, int yend); + void UpdateCompositorConfig(); + + void PrerenderSprites(); + void PrerenderLayer(int layer); + + void DoRenderSprites(int line); + void RenderSprites(bool window, int ystart, int yend); + + void RenderScreen(int ystart, int yend); +}; + +} diff --git a/src/GPU2D_Soft.cpp b/src/GPU2D_Soft.cpp index 2f8afcba18..86dd057097 100644 --- a/src/GPU2D_Soft.cpp +++ b/src/GPU2D_Soft.cpp @@ -16,21 +16,33 @@ with melonDS. If not, see http://www.gnu.org/licenses/. */ -#include "GPU2D_Soft.h" -#include "GPU.h" -#include "GPU3D.h" +#include "GPU_Soft.h" +#include "GPU_ColorOp.h" namespace melonDS { -namespace GPU2D -{ -SoftRenderer::SoftRenderer(melonDS::GPU& gpu) - : Renderer2D(), GPU(gpu) + +SoftRenderer2D::SoftRenderer2D(melonDS::GPU2D& gpu2D, SoftRenderer& parent) + : Renderer2D(gpu2D), Parent(parent) { // mosaic table is initialized at compile-time } -u32 SoftRenderer::ColorComposite(int i, u32 val1, u32 val2) const +SoftRenderer2D::~SoftRenderer2D() +{ +} + +void SoftRenderer2D::Reset() +{ + memset(BGOBJLine, 0, sizeof(BGOBJLine)); + memset(WindowMask, 0, sizeof(WindowMask)); + memset(OBJLine, 0, sizeof(OBJLine)); + memset(OBJWindow, 0, sizeof(OBJWindow)); + + NumSprites = 0; +} + +u32 SoftRenderer2D::ColorComposite(int i, u32 val1, u32 val2) const { u32 coloreffect = 0; u32 eva, evb; @@ -38,7 +50,7 @@ u32 SoftRenderer::ColorComposite(int i, u32 val1, u32 val2) const u32 flag1 = val1 >> 24; u32 flag2 = val2 >> 24; - u32 blendCnt = CurUnit->BlendCnt; + u32 blendCnt = GPU2D.BlendCnt; u32 target2; if (flag2 & 0x80) target2 = 0x1000; @@ -58,8 +70,8 @@ u32 SoftRenderer::ColorComposite(int i, u32 val1, u32 val2) const } else { - eva = CurUnit->EVA; - evb = CurUnit->EVB; + eva = GPU2D.EVA; + evb = GPU2D.EVB; } } else if ((flag1 & 0x40) && (blendCnt & target2)) @@ -81,8 +93,8 @@ u32 SoftRenderer::ColorComposite(int i, u32 val1, u32 val2) const { if (blendCnt & target2) { - eva = CurUnit->EVA; - evb = CurUnit->EVB; + eva = GPU2D.EVA; + evb = GPU2D.EVB; } else coloreffect = 0; @@ -92,27 +104,41 @@ u32 SoftRenderer::ColorComposite(int i, u32 val1, u32 val2) const switch (coloreffect) { - case 0: return val1; - case 1: return ColorBlend4(val1, val2, eva, evb); - case 2: return ColorBrightnessUp(val1, CurUnit->EVY, 0x8); - case 3: return ColorBrightnessDown(val1, CurUnit->EVY, 0x7); - case 4: return ColorBlend5(val1, val2); + case 0: return val1; + case 1: return ColorBlend4(val1, val2, eva, evb); + case 2: return ColorBrightnessUp(val1, GPU2D.EVY, 0x8); + case 3: return ColorBrightnessDown(val1, GPU2D.EVY, 0x7); + case 4: return ColorBlend5(val1, val2); } return val1; } -void SoftRenderer::DrawScanline(u32 line, Unit* unit) +void SoftRenderer2D::DrawScanline(u32 line) { - CurUnit = unit; + u32* dst = Parent.Output2D[GPU2D.Num]; - int stride = GPU.GPU3D.IsRendererAccelerated() ? (256*3 + 1) : 256; - u32* dst = &Framebuffer[CurUnit->Num][stride * line]; + if (!GPU2D.Enabled) + { + // if this 2D unit is disabled in POWCNT, the output is a fixed color + // (black for unit A, white for unit B) + u32 fillcolor = (GPU2D.Num == 0) ? 0xFF000000 : 0xFF3F3F3F; + for (int i = 0; i < 256; i++) + dst[i] = fillcolor; - int n3dline = line; - line = GPU.VCount; + return; + } - if (CurUnit->Num == 0) + if (GPU2D.ForcedBlank) + { + // forced blank + for (int i = 0; i < 256; i++) + dst[i] = 0xFF3F3F3F; + + return; + } + + if (GPU2D.Num == 0) { auto bgDirty = GPU.VRAMDirty_ABG.DeriveState(GPU.VRAMMap_ABG, GPU); GPU.MakeVRAMFlat_ABGCoherent(bgDirty); @@ -131,458 +157,46 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit) GPU.MakeVRAMFlat_BOBJExtPalCoherent(objExtPalDirty); } - bool forceblank = false; - - // scanlines that end up outside of the GPU drawing range - // (as a result of writing to VCount) are filled white - if (line > 192) forceblank = true; - - // GPU B can be completely disabled by POWCNT1 - // oddly that's not the case for GPU A - if (CurUnit->Num && !CurUnit->Enabled) forceblank = true; - - if (line == 0 && CurUnit->CaptureCnt & (1 << 31) && !forceblank) - CurUnit->CaptureLatch = true; - - if (CurUnit->Num == 0) - { - if (!GPU.GPU3D.IsRendererAccelerated()) - _3DLine = GPU.GPU3D.GetLine(n3dline); - else if (CurUnit->CaptureLatch && (((CurUnit->CaptureCnt >> 29) & 0x3) != 1)) - { - _3DLine = GPU.GPU3D.GetLine(n3dline); - //GPU3D::GLRenderer::PrepareCaptureFrame(); - } - } - - if (forceblank) - { - for (int i = 0; i < 256; i++) - dst[i] = 0xFFFFFFFF; - - if (GPU.GPU3D.IsRendererAccelerated()) - { - dst[256*3] = 0; - } - return; - } - - u32 dispmode = CurUnit->DispCnt >> 16; - dispmode &= (CurUnit->Num ? 0x1 : 0x3); - - // always render regular graphics - DrawScanline_BGOBJ(line); - CurUnit->UpdateMosaicCounters(line); - - switch (dispmode) - { - case 0: // screen off - { - for (int i = 0; i < 256; i++) - dst[i] = 0x003F3F3F; - } - break; - - case 1: // regular display - { - int i = 0; - for (; i < (stride & ~1); i+=2) - *(u64*)&dst[i] = *(u64*)&BGOBJLine[i]; - } - break; - - case 2: // VRAM display - { - u32 vrambank = (CurUnit->DispCnt >> 18) & 0x3; - if (GPU.VRAMMap_LCDC & (1<> 4; - u8 b = (color & 0x7C00) >> 9; - - dst[i] = r | (g << 8) | (b << 16); - } - } - else - { - for (int i = 0; i < 256; i++) - { - dst[i] = 0; - } - } - } - break; - - case 3: // FIFO display - { - for (int i = 0; i < 256; i++) - { - u16 color = CurUnit->DispFIFOBuffer[i]; - u8 r = (color & 0x001F) << 1; - u8 g = (color & 0x03E0) >> 4; - u8 b = (color & 0x7C00) >> 9; - - dst[i] = r | (g << 8) | (b << 16); - } - } - break; - } - - // capture - if ((CurUnit->Num == 0) && CurUnit->CaptureLatch) - { - u32 capwidth, capheight; - switch ((CurUnit->CaptureCnt >> 20) & 0x3) - { - case 0: capwidth = 128; capheight = 128; break; - case 1: capwidth = 256; capheight = 64; break; - case 2: capwidth = 256; capheight = 128; break; - case 3: capwidth = 256; capheight = 192; break; - } - - if (line < capheight) - DoCapture(line, capwidth); - } - - u32 masterBrightness = CurUnit->MasterBrightness; - - if (GPU.GPU3D.IsRendererAccelerated()) - { - u32 xpos = GPU.GPU3D.GetRenderXPos(); - - dst[256*3] = masterBrightness | - (CurUnit->DispCnt & 0x30000) | - (xpos << 24) | ((xpos & 0x100) << 15); - return; - } - - // master brightness - if (dispmode != 0) - { - if ((masterBrightness >> 14) == 1) - { - // up - u32 factor = masterBrightness & 0x1F; - if (factor > 16) factor = 16; - - for (int i = 0; i < 256; i++) - { - dst[i] = ColorBrightnessUp(dst[i], factor, 0x0); - } - } - else if ((masterBrightness >> 14) == 2) - { - // down - u32 factor = masterBrightness & 0x1F; - if (factor > 16) factor = 16; - - for (int i = 0; i < 256; i++) - { - dst[i] = ColorBrightnessDown(dst[i], factor, 0xF); - } - } - } - - // convert to 32-bit BGRA - // note: 32-bit RGBA would be more straightforward, but - // BGRA seems to be more compatible (Direct2D soft, cairo...) - for (int i = 0; i < 256; i+=2) - { - u64 c = *(u64*)&dst[i]; - - u64 r = (c << 18) & 0xFC000000FC0000; - u64 g = (c << 2) & 0xFC000000FC00; - u64 b = (c >> 14) & 0xFC000000FC; - c = r | g | b; - - *(u64*)&dst[i] = c | ((c & 0x00C0C0C000C0C0C0) >> 6) | 0xFF000000FF000000; - } -} - -void SoftRenderer::VBlankEnd(Unit* unitA, Unit* unitB) -{ -#ifdef OGLRENDERER_ENABLED - if (Renderer3D& renderer3d = GPU.GPU3D.GetCurrentRenderer(); renderer3d.Accelerated) - { - if ((unitA->CaptureCnt & (1<<31)) && (((unitA->CaptureCnt >> 29) & 0x3) != 1)) - { - renderer3d.PrepareCaptureFrame(); - } - } -#endif -} - -void SoftRenderer::DoCapture(u32 line, u32 width) -{ - u32 captureCnt = CurUnit->CaptureCnt; - u32 dstvram = (captureCnt >> 16) & 0x3; - - // TODO: confirm this - // it should work like VRAM display mode, which requires VRAM to be mapped to LCDC - if (!(GPU.VRAMMap_LCDC & (1<> 18) & 0x3) << 14) + (line * width); - - // TODO: handle 3D in GPU3D::CurrentRenderer->Accelerated mode!! - - u32* srcA; - if (captureCnt & (1<<24)) - { - srcA = _3DLine; - } - else - { - srcA = BGOBJLine; - if (GPU.GPU3D.IsRendererAccelerated()) - { - // in GPU3D::CurrentRenderer->Accelerated mode, compositing is normally done on the GPU - // but when doing display capture, we do need the composited output - // so we do it here - - for (int i = 0; i < 256; i++) - { - u32 val1 = BGOBJLine[i]; - u32 val2 = BGOBJLine[256+i]; - u32 val3 = BGOBJLine[512+i]; - - u32 compmode = (val3 >> 24) & 0xF; - - if (compmode == 4) - { - // 3D on top, blending - - u32 _3dval = _3DLine[i]; - if ((_3dval >> 24) > 0) - val1 = ColorBlend5(_3dval, val1); - else - val1 = val2; - } - else if (compmode == 1) - { - // 3D on bottom, blending - - u32 _3dval = _3DLine[i]; - if ((_3dval >> 24) > 0) - { - u32 eva = (val3 >> 8) & 0x1F; - u32 evb = (val3 >> 16) & 0x1F; - - val1 = ColorBlend4(val1, _3dval, eva, evb); - } - else - val1 = val2; - } - else if (compmode <= 3) - { - // 3D on top, normal/fade - - u32 _3dval = _3DLine[i]; - if ((_3dval >> 24) > 0) - { - u32 evy = (val3 >> 8) & 0x1F; - - val1 = _3dval; - if (compmode == 2) val1 = ColorBrightnessUp(val1, evy, 0x8); - else if (compmode == 3) val1 = ColorBrightnessDown(val1, evy, 0x7); - } - else - val1 = val2; - } - - BGOBJLine[i] = val1; - } - } - } - - u16* srcB = NULL; - u32 srcBaddr = line * 256; - - if (captureCnt & (1<<25)) - { - srcB = &CurUnit->DispFIFOBuffer[0]; - srcBaddr = 0; - } - else - { - u32 srcvram = (CurUnit->DispCnt >> 18) & 0x3; - if (GPU.VRAMMap_LCDC & (1<DispCnt >> 16) & 0x3) != 2) - srcBaddr += ((captureCnt >> 26) & 0x3) << 14; - } - - dstaddr &= 0xFFFF; - srcBaddr &= 0xFFFF; - - static_assert(VRAMDirtyGranularity == 512); - GPU.VRAMDirty[dstvram][(dstaddr * 2) / VRAMDirtyGranularity] = true; - - switch ((captureCnt >> 29) & 0x3) - { - case 0: // source A - { - for (u32 i = 0; i < width; i++) - { - u32 val = srcA[i]; - - // TODO: check what happens when alpha=0 - - u32 r = (val >> 1) & 0x1F; - u32 g = (val >> 9) & 0x1F; - u32 b = (val >> 17) & 0x1F; - u32 a = ((val >> 24) != 0) ? 0x8000 : 0; - - dst[dstaddr] = r | (g << 5) | (b << 10) | a; - dstaddr = (dstaddr + 1) & 0xFFFF; - } - } - break; - - case 1: // source B - { - if (srcB) - { - for (u32 i = 0; i < width; i++) - { - dst[dstaddr] = srcB[srcBaddr]; - srcBaddr = (srcBaddr + 1) & 0xFFFF; - dstaddr = (dstaddr + 1) & 0xFFFF; - } - } - else - { - for (u32 i = 0; i < width; i++) - { - dst[dstaddr] = 0; - dstaddr = (dstaddr + 1) & 0xFFFF; - } - } - } - break; - - case 2: // sources A+B - case 3: - { - u32 eva = captureCnt & 0x1F; - u32 evb = (captureCnt >> 8) & 0x1F; - - // checkme - if (eva > 16) eva = 16; - if (evb > 16) evb = 16; - - if (srcB) - { - for (u32 i = 0; i < width; i++) - { - u32 val = srcA[i]; - - // TODO: check what happens when alpha=0 - - u32 rA = (val >> 1) & 0x1F; - u32 gA = (val >> 9) & 0x1F; - u32 bA = (val >> 17) & 0x1F; - u32 aA = ((val >> 24) != 0) ? 1 : 0; - - val = srcB[srcBaddr]; - - u32 rB = val & 0x1F; - u32 gB = (val >> 5) & 0x1F; - u32 bB = (val >> 10) & 0x1F; - u32 aB = val >> 15; - - u32 rD = ((rA * aA * eva) + (rB * aB * evb) + 8) >> 4; - u32 gD = ((gA * aA * eva) + (gB * aB * evb) + 8) >> 4; - u32 bD = ((bA * aA * eva) + (bB * aB * evb) + 8) >> 4; - u32 aD = (eva>0 ? aA : 0) | (evb>0 ? aB : 0); - - if (rD > 0x1F) rD = 0x1F; - if (gD > 0x1F) gD = 0x1F; - if (bD > 0x1F) bD = 0x1F; - - dst[dstaddr] = rD | (gD << 5) | (bD << 10) | (aD << 15); - srcBaddr = (srcBaddr + 1) & 0xFFFF; - dstaddr = (dstaddr + 1) & 0xFFFF; - } - } - else - { - for (u32 i = 0; i < width; i++) - { - u32 val = srcA[i]; - - // TODO: check what happens when alpha=0 - - u32 rA = (val >> 1) & 0x1F; - u32 gA = (val >> 9) & 0x1F; - u32 bA = (val >> 17) & 0x1F; - u32 aA = ((val >> 24) != 0) ? 1 : 0; - - u32 rD = ((rA * aA * eva) + 8) >> 4; - u32 gD = ((gA * aA * eva) + 8) >> 4; - u32 bD = ((bA * aA * eva) + 8) >> 4; - u32 aD = (eva>0 ? aA : 0); - - dst[dstaddr] = rD | (gD << 5) | (bD << 10) | (aD << 15); - dstaddr = (dstaddr + 1) & 0xFFFF; - } - } - } - break; - } + // render BG layers and sprites + DrawScanline_BGOBJ(line, dst); } #define DoDrawBG(type, line, num) \ do \ { \ - if ((bgCnt[num] & 0x0040) && (CurUnit->BGMosaicSize[0] > 0)) \ + if ((bgCnt[num] & (1<<6)) && (GPU2D.BGMosaicSize[0] > 0)) \ { \ - if (GPU.GPU3D.IsRendererAccelerated()) DrawBG_##type(line, num); \ - else DrawBG_##type(line, num); \ + DrawBG_##type(line, num); \ } \ else \ { \ - if (GPU.GPU3D.IsRendererAccelerated()) DrawBG_##type(line, num); \ - else DrawBG_##type(line, num); \ + DrawBG_##type(line, num); \ } \ } while (false) #define DoDrawBG_Large(line) \ do \ { \ - if ((bgCnt[2] & 0x0040) && (CurUnit->BGMosaicSize[0] > 0)) \ + if ((bgCnt[2] & (1<<6)) && (GPU2D.BGMosaicSize[0] > 0)) \ { \ - if (GPU.GPU3D.IsRendererAccelerated()) DrawBG_Large(line); \ - else DrawBG_Large(line); \ + DrawBG_Large(line); \ } \ else \ { \ - if (GPU.GPU3D.IsRendererAccelerated()) DrawBG_Large(line); \ - else DrawBG_Large(line); \ + DrawBG_Large(line); \ } \ } while (false) -#define DoInterleaveSprites(prio) \ - if (GPU.GPU3D.IsRendererAccelerated()) InterleaveSprites(prio); else InterleaveSprites(prio); - template -void SoftRenderer::DrawScanlineBGMode(u32 line) +void SoftRenderer2D::DrawScanlineBGMode(u32 line) { - u32 dispCnt = CurUnit->DispCnt; - u16* bgCnt = CurUnit->BGCnt; + u32 dispCnt = GPU2D.DispCnt; + u16* bgCnt = GPU2D.BGCnt; for (int i = 3; i >= 0; i--) { if ((bgCnt[3] & 0x3) == i) { - if (dispCnt & 0x0800) + if (GPU2D.LayerEnable & (1<<3)) { if (bgmode >= 3) DoDrawBG(Extended, line, 3); @@ -594,7 +208,7 @@ void SoftRenderer::DrawScanlineBGMode(u32 line) } if ((bgCnt[2] & 0x3) == i) { - if (dispCnt & 0x0400) + if (GPU2D.LayerEnable & (1<<2)) { if (bgmode == 5) DoDrawBG(Extended, line, 2); @@ -606,107 +220,100 @@ void SoftRenderer::DrawScanlineBGMode(u32 line) } if ((bgCnt[1] & 0x3) == i) { - if (dispCnt & 0x0200) + if (GPU2D.LayerEnable & (1<<1)) { DoDrawBG(Text, line, 1); } } if ((bgCnt[0] & 0x3) == i) { - if (dispCnt & 0x0100) + if (GPU2D.LayerEnable & (1<<0)) { - if (!CurUnit->Num && (dispCnt & 0x8)) + if (!GPU2D.Num && (dispCnt & 0x8)) DrawBG_3D(); else DoDrawBG(Text, line, 0); } } - if ((dispCnt & 0x1000) && NumSprites[CurUnit->Num]) + if ((GPU2D.LayerEnable & (1<<4)) && NumSprites) { - DoInterleaveSprites(0x40000 | (i<<16)); + InterleaveSprites(i); } } } -void SoftRenderer::DrawScanlineBGMode6(u32 line) +void SoftRenderer2D::DrawScanlineBGMode6(u32 line) { - u32 dispCnt = CurUnit->DispCnt; - u16* bgCnt = CurUnit->BGCnt; + u32 dispCnt = GPU2D.DispCnt; + u16* bgCnt = GPU2D.BGCnt; for (int i = 3; i >= 0; i--) { if ((bgCnt[2] & 0x3) == i) { - if (dispCnt & 0x0400) + if (GPU2D.LayerEnable & (1<<2)) { DoDrawBG_Large(line); } } if ((bgCnt[0] & 0x3) == i) { - if (dispCnt & 0x0100) + if (GPU2D.LayerEnable & (1<<0)) { - if ((!CurUnit->Num) && (dispCnt & 0x8)) + if ((!GPU2D.Num) && (dispCnt & 0x8)) DrawBG_3D(); } } - if ((dispCnt & 0x1000) && NumSprites[CurUnit->Num]) + if ((GPU2D.LayerEnable & (1<<4)) && NumSprites) { - DoInterleaveSprites(0x40000 | (i<<16)) + InterleaveSprites(i); } } } -void SoftRenderer::DrawScanlineBGMode7(u32 line) +void SoftRenderer2D::DrawScanlineBGMode7(u32 line) { - u32 dispCnt = CurUnit->DispCnt; - u16* bgCnt = CurUnit->BGCnt; + u32 dispCnt = GPU2D.DispCnt; + u16* bgCnt = GPU2D.BGCnt; // mode 7 only has text-mode BG0 and BG1 for (int i = 3; i >= 0; i--) { if ((bgCnt[1] & 0x3) == i) { - if (dispCnt & 0x0200) + if (GPU2D.LayerEnable & (1<<1)) { DoDrawBG(Text, line, 1); } } if ((bgCnt[0] & 0x3) == i) { - if (dispCnt & 0x0100) + if (GPU2D.LayerEnable & (1<<0)) { - if (!CurUnit->Num && (dispCnt & 0x8)) + if (!GPU2D.Num && (dispCnt & 0x8)) DrawBG_3D(); else DoDrawBG(Text, line, 0); } } - if ((dispCnt & 0x1000) && NumSprites[CurUnit->Num]) + if ((GPU2D.LayerEnable & (1<<4)) && NumSprites) { - DoInterleaveSprites(0x40000 | (i<<16)) + InterleaveSprites(i); } } } -void SoftRenderer::DrawScanline_BGOBJ(u32 line) +void SoftRenderer2D::DrawScanline_BGOBJ(u32 line, u32* dst) { - // forced blank disables BG/OBJ compositing - if (CurUnit->DispCnt & (1<<7)) - { - for (int i = 0; i < 256; i++) - BGOBJLine[i] = 0xFF3F3F3F; - - return; - } - u64 backdrop; - if (CurUnit->Num) backdrop = *(u16*)&GPU.Palette[0x400]; - else backdrop = *(u16*)&GPU.Palette[0]; + if (GPU2D.Num) + backdrop = *(u16*)&GPU.Palette[0x400]; + else + backdrop = *(u16*)&GPU.Palette[0]; { u8 r = (backdrop & 0x001F) << 1; - u8 g = (backdrop & 0x03E0) >> 4; + u8 g = ((backdrop & 0x03E0) >> 4) | ((backdrop & 0x8000) >> 15); u8 b = (backdrop & 0x7C00) >> 9; backdrop = r | (g << 8) | (b << 16) | 0x20000000; @@ -714,233 +321,103 @@ void SoftRenderer::DrawScanline_BGOBJ(u32 line) for (int i = 0; i < 256; i+=2) *(u64*)&BGOBJLine[i] = backdrop; + for (int i = 256; i < 512; i+=2) + *(u64*)&BGOBJLine[i] = 0; } - if (CurUnit->DispCnt & 0xE000) - CurUnit->CalculateWindowMask(line, WindowMask, OBJWindow[CurUnit->Num]); + if (GPU2D.DispCnt & 0xE000) + GPU2D.CalculateWindowMask(WindowMask, OBJWindow); else memset(WindowMask, 0xFF, 256); ApplySpriteMosaicX(); - CurBGXMosaicTable = MosaicTable[CurUnit->BGMosaicSize[0]].data(); + CurBGXMosaicTable = MosaicTable[GPU2D.BGMosaicSize[0]].data(); - switch (CurUnit->DispCnt & 0x7) + switch (GPU2D.DispCnt & 0x7) { - case 0: DrawScanlineBGMode<0>(line); break; - case 1: DrawScanlineBGMode<1>(line); break; - case 2: DrawScanlineBGMode<2>(line); break; - case 3: DrawScanlineBGMode<3>(line); break; - case 4: DrawScanlineBGMode<4>(line); break; - case 5: DrawScanlineBGMode<5>(line); break; - case 6: DrawScanlineBGMode6(line); break; - case 7: DrawScanlineBGMode7(line); break; + case 0: DrawScanlineBGMode<0>(line); break; + case 1: DrawScanlineBGMode<1>(line); break; + case 2: DrawScanlineBGMode<2>(line); break; + case 3: DrawScanlineBGMode<3>(line); break; + case 4: DrawScanlineBGMode<4>(line); break; + case 5: DrawScanlineBGMode<5>(line); break; + case 6: DrawScanlineBGMode6(line); break; + case 7: DrawScanlineBGMode7(line); break; } // color special effects // can likely be optimized - if (!GPU.GPU3D.IsRendererAccelerated()) - { - for (int i = 0; i < 256; i++) - { - u32 val1 = BGOBJLine[i]; - u32 val2 = BGOBJLine[256+i]; - - BGOBJLine[i] = ColorComposite(i, val1, val2); - } - } - else - { - if (CurUnit->Num == 0) - { - for (int i = 0; i < 256; i++) - { - u32 val1 = BGOBJLine[i]; - u32 val2 = BGOBJLine[256+i]; - u32 val3 = BGOBJLine[512+i]; - - u32 flag1 = val1 >> 24; - u32 flag2 = val2 >> 24; - - u32 bldcnteffect = (CurUnit->BlendCnt >> 6) & 0x3; - - u32 target1; - if (flag1 & 0x80) target1 = 0x0010; - else if (flag1 & 0x40) target1 = 0x0001; - else target1 = flag1; - - u32 target2; - if (flag2 & 0x80) target2 = 0x1000; - else if (flag2 & 0x40) target2 = 0x0100; - else target2 = flag2 << 8; - - if (((flag1 & 0xC0) == 0x40) && (CurUnit->BlendCnt & target2)) - { - // 3D on top, blending - - BGOBJLine[i] = val2; - BGOBJLine[256+i] = ColorComposite(i, val2, val3); - BGOBJLine[512+i] = 0x04000000; - } - else if ((flag1 & 0xC0) == 0x40) - { - // 3D on top, normal/fade - - if (bldcnteffect == 1) bldcnteffect = 0; - if (!(CurUnit->BlendCnt & 0x0001)) bldcnteffect = 0; - if (!(WindowMask[i] & 0x20)) bldcnteffect = 0; - - BGOBJLine[i] = val2; - BGOBJLine[256+i] = ColorComposite(i, val2, val3); - BGOBJLine[512+i] = (bldcnteffect << 24) | (CurUnit->EVY << 8); - } - else if (((flag2 & 0xC0) == 0x40) && ((CurUnit->BlendCnt & 0x01C0) == 0x0140)) - { - // 3D on bottom, blending - - u32 eva, evb; - if ((flag1 & 0xC0) == 0xC0) - { - eva = flag1 & 0x1F; - evb = 16 - eva; - } - else if (((CurUnit->BlendCnt & target1) && (WindowMask[i] & 0x20)) || - ((flag1 & 0xC0) == 0x80)) - { - eva = CurUnit->EVA; - evb = CurUnit->EVB; - } - else - bldcnteffect = 7; - - BGOBJLine[i] = val1; - BGOBJLine[256+i] = ColorComposite(i, val1, val3); - BGOBJLine[512+i] = (bldcnteffect << 24) | (CurUnit->EVB << 16) | (CurUnit->EVA << 8); - } - else - { - // no potential 3D pixel involved - - BGOBJLine[i] = ColorComposite(i, val1, val2); - BGOBJLine[256+i] = 0; - BGOBJLine[512+i] = 0x07000000; - } - } - } - else - { - for (int i = 0; i < 256; i++) - { - u32 val1 = BGOBJLine[i]; - u32 val2 = BGOBJLine[256+i]; - - BGOBJLine[i] = ColorComposite(i, val1, val2); - BGOBJLine[256+i] = 0; - BGOBJLine[512+i] = 0x07000000; - } - } - } - - if (CurUnit->BGMosaicY >= CurUnit->BGMosaicYMax) + for (int i = 0; i < 256; i++) { - CurUnit->BGMosaicY = 0; - CurUnit->BGMosaicYMax = CurUnit->BGMosaicSize[1]; - } - else - CurUnit->BGMosaicY++; + u32 val1 = BGOBJLine[i]; + u32 val2 = BGOBJLine[256+i]; - /*if (OBJMosaicY >= OBJMosaicYMax) - { - OBJMosaicY = 0; - OBJMosaicYMax = OBJMosaicSize[1]; + dst[i] = ColorComposite(i, val1, val2); } - else - OBJMosaicY++;*/ } -void SoftRenderer::DrawPixel_Normal(u32* dst, u16 color, u32 flag) -{ - u8 r = (color & 0x001F) << 1; - u8 g = (color & 0x03E0) >> 4; - u8 b = (color & 0x7C00) >> 9; - //g |= ((color & 0x8000) >> 15); - - *(dst+256) = *dst; - *dst = r | (g << 8) | (b << 16) | flag; -} - -void SoftRenderer::DrawPixel_Accel(u32* dst, u16 color, u32 flag) +void SoftRenderer2D::DrawPixel(u32* dst, u16 color, u32 flag) { u8 r = (color & 0x001F) << 1; - u8 g = (color & 0x03E0) >> 4; + u8 g = ((color & 0x03E0) >> 4) | ((color & 0x8000) >> 15); u8 b = (color & 0x7C00) >> 9; - *(dst+512) = *(dst+256); *(dst+256) = *dst; *dst = r | (g << 8) | (b << 16) | flag; } -void SoftRenderer::DrawBG_3D() +void SoftRenderer2D::DrawBG_3D() { - int i = 0; - - if (GPU.GPU3D.IsRendererAccelerated()) - { - for (i = 0; i < 256; i++) - { - if (!(WindowMask[i] & 0x01)) continue; - - BGOBJLine[i+512] = BGOBJLine[i+256]; - BGOBJLine[i+256] = BGOBJLine[i]; - BGOBJLine[i] = 0x40000000; // 3D-layer placeholder - } - } - else + for (int i = 0; i < 256; i++) { - for (i = 0; i < 256; i++) - { - u32 c = _3DLine[i]; + u32 c = Parent.Output3D[i]; - if ((c >> 24) == 0) continue; - if (!(WindowMask[i] & 0x01)) continue; + if ((c >> 24) == 0) continue; + if (!(WindowMask[i] & 0x01)) continue; - BGOBJLine[i+256] = BGOBJLine[i]; - BGOBJLine[i] = c | 0x40000000; - } + BGOBJLine[i+256] = BGOBJLine[i]; + BGOBJLine[i] = c | 0x40000000; } } -template -void SoftRenderer::DrawBG_Text(u32 line, u32 bgnum) +template +void SoftRenderer2D::DrawBG_Text(u32 line, u32 bgnum) { // workaround for backgrounds missing on aarch64 with lto build asm volatile ("" : : : "memory"); - u16 bgcnt = CurUnit->BGCnt[bgnum]; + u16 bgcnt = GPU2D.BGCnt[bgnum]; u32 tilesetaddr, tilemapaddr; u16* pal; u32 extpal, extpalslot; - u16 xoff = CurUnit->BGXPos[bgnum]; - u16 yoff = CurUnit->BGYPos[bgnum] + line; + u16 xoff = GPU2D.BGXPos[bgnum]; + u16 yoff = GPU2D.BGYPos[bgnum]; - if (bgcnt & 0x0040) + if (bgcnt & (1<<6)) + yoff += GPU2D.BGMosaicLine; + else + yoff += line; + /*u16 yoff = GPU2D.BGYPos[bgnum] + line; + + if (bgcnt & (1<<6)) { // vertical mosaic - yoff -= CurUnit->BGMosaicY; - } + yoff -= GPU2D.BGMosaicY; + }*/ - u32 widexmask = (bgcnt & 0x4000) ? 0x100 : 0; + u32 widexmask = (bgcnt & (1<<14)) ? 0x100 : 0; - extpal = (CurUnit->DispCnt & 0x40000000); + extpal = (GPU2D.DispCnt & (1<<30)); if (extpal) extpalslot = ((bgnum<2) && (bgcnt&0x2000)) ? (2+bgnum) : bgnum; u8* bgvram; u32 bgvrammask; - CurUnit->GetBGVRAM(bgvram, bgvrammask); - if (CurUnit->Num) + GPU2D.GetBGVRAM(bgvram, bgvrammask); + if (GPU2D.Num) { tilesetaddr = ((bgcnt & 0x003C) << 12); tilemapaddr = ((bgcnt & 0x1F00) << 3); @@ -949,17 +426,17 @@ void SoftRenderer::DrawBG_Text(u32 line, u32 bgnum) } else { - tilesetaddr = ((CurUnit->DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); - tilemapaddr = ((CurUnit->DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((GPU2D.DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((GPU2D.DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU.Palette[0]; } // adjust Y position in tilemap - if (bgcnt & 0x8000) + if (bgcnt & (1<<15)) { tilemapaddr += ((yoff & 0x1F8) << 3); - if (bgcnt & 0x4000) + if (bgcnt & (1<<14)) tilemapaddr += ((yoff & 0x100) << 3); } else @@ -971,7 +448,7 @@ void SoftRenderer::DrawBG_Text(u32 line, u32 bgnum) u8 color; u32 lastxpos; - if (bgcnt & 0x0080) + if (bgcnt & (1<<7)) { // 256-color @@ -980,11 +457,11 @@ void SoftRenderer::DrawBG_Text(u32 line, u32 bgnum) { curtile = *(u16*)&bgvram[(tilemapaddr + ((xoff & 0xF8) >> 2) + ((xoff & widexmask) << 3)) & bgvrammask]; - if (extpal) curpal = CurUnit->GetBGExtPal(extpalslot, curtile>>12); + if (extpal) curpal = GPU2D.GetBGExtPal(extpalslot, curtile>>12); else curpal = pal; pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 6) - + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3); + + (((curtile & (1<<11)) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3); } if (mosaic) lastxpos = xoff; @@ -1001,11 +478,11 @@ void SoftRenderer::DrawBG_Text(u32 line, u32 bgnum) // load a new tile curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask]; - if (extpal) curpal = CurUnit->GetBGExtPal(extpalslot, curtile>>12); + if (extpal) curpal = GPU2D.GetBGExtPal(extpalslot, curtile>>12); else curpal = pal; pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 6) - + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3); + + (((curtile & (1<<11)) ? (7-(yoff&0x7)) : (yoff&0x7)) << 3); if (mosaic) lastxpos = xpos; } @@ -1013,11 +490,11 @@ void SoftRenderer::DrawBG_Text(u32 line, u32 bgnum) // draw pixel if (WindowMask[i] & (1<> 2) + ((xoff & widexmask) << 3))) & bgvrammask]; curpal = pal + ((curtile & 0xF000) >> 8); pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5) - + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); + + (((curtile & (1<<11)) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); } if (mosaic) lastxpos = xoff; @@ -1051,7 +528,7 @@ void SoftRenderer::DrawBG_Text(u32 line, u32 bgnum) curtile = *(u16*)&bgvram[(tilemapaddr + ((xpos & 0xF8) >> 2) + ((xpos & widexmask) << 3)) & bgvrammask]; curpal = pal + ((curtile & 0xF000) >> 8); pixelsaddr = tilesetaddr + ((curtile & 0x03FF) << 5) - + (((curtile & 0x0800) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); + + (((curtile & (1<<11)) ? (7-(yoff&0x7)) : (yoff&0x7)) << 2); if (mosaic) lastxpos = xpos; } @@ -1059,7 +536,7 @@ void SoftRenderer::DrawBG_Text(u32 line, u32 bgnum) // draw pixel if (WindowMask[i] & (1<> 1)) & bgvrammask] >> 4; @@ -1070,7 +547,7 @@ void SoftRenderer::DrawBG_Text(u32 line, u32 bgnum) } if (color) - drawPixel(&BGOBJLine[i], curpal[color], 0x01000000< -void SoftRenderer::DrawBG_Affine(u32 line, u32 bgnum) +template +void SoftRenderer2D::DrawBG_Affine(u32 line, u32 bgnum) { - u16 bgcnt = CurUnit->BGCnt[bgnum]; + u16 bgcnt = GPU2D.BGCnt[bgnum]; u32 tilesetaddr, tilemapaddr; u16* pal; u32 coordmask; u32 yshift; - switch (bgcnt & 0xC000) + switch ((bgcnt >> 14) & 0x3) { - case 0x0000: coordmask = 0x07800; yshift = 7; break; - case 0x4000: coordmask = 0x0F800; yshift = 8; break; - case 0x8000: coordmask = 0x1F800; yshift = 9; break; - case 0xC000: coordmask = 0x3F800; yshift = 10; break; + case 0: coordmask = 0x07800; yshift = 7; break; + case 1: coordmask = 0x0F800; yshift = 8; break; + case 2: coordmask = 0x1F800; yshift = 9; break; + case 3: coordmask = 0x3F800; yshift = 10; break; } u32 overflowmask; - if (bgcnt & 0x2000) overflowmask = 0; - else overflowmask = ~(coordmask | 0x7FF); + if (bgcnt & (1<<13)) overflowmask = 0; + else overflowmask = ~(coordmask | 0x7FF); - s16 rotA = CurUnit->BGRotA[bgnum-2]; - s16 rotB = CurUnit->BGRotB[bgnum-2]; - s16 rotC = CurUnit->BGRotC[bgnum-2]; - s16 rotD = CurUnit->BGRotD[bgnum-2]; + s16 rotA = GPU2D.BGRotA[bgnum-2]; + s16 rotC = GPU2D.BGRotC[bgnum-2]; - s32 rotX = CurUnit->BGXRefInternal[bgnum-2]; - s32 rotY = CurUnit->BGYRefInternal[bgnum-2]; - - if (bgcnt & 0x0040) - { - // vertical mosaic - rotX -= (CurUnit->BGMosaicY * rotB); - rotY -= (CurUnit->BGMosaicY * rotD); - } + s32 rotX = GPU2D.BGXRefInternal[bgnum-2]; + s32 rotY = GPU2D.BGYRefInternal[bgnum-2]; u8* bgvram; u32 bgvrammask; - CurUnit->GetBGVRAM(bgvram, bgvrammask); + GPU2D.GetBGVRAM(bgvram, bgvrammask); - if (CurUnit->Num) + if (GPU2D.Num) { tilesetaddr = ((bgcnt & 0x003C) << 12); tilemapaddr = ((bgcnt & 0x1F00) << 3); @@ -1128,8 +596,8 @@ void SoftRenderer::DrawBG_Affine(u32 line, u32 bgnum) } else { - tilesetaddr = ((CurUnit->DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); - tilemapaddr = ((CurUnit->DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((GPU2D.DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((GPU2D.DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU.Palette[0]; } @@ -1167,22 +635,19 @@ void SoftRenderer::DrawBG_Affine(u32 line, u32 bgnum) color = bgvram[(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff) & bgvrammask]; if (color) - drawPixel(&BGOBJLine[i], pal[color], 0x01000000<BGXRefInternal[bgnum-2] += rotB; - CurUnit->BGYRefInternal[bgnum-2] += rotD; } -template -void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum) +template +void SoftRenderer2D::DrawBG_Extended(u32 line, u32 bgnum) { - u16 bgcnt = CurUnit->BGCnt[bgnum]; + u16 bgcnt = GPU2D.BGCnt[bgnum]; u32 tilesetaddr, tilemapaddr; u16* pal; @@ -1190,41 +655,32 @@ void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum) u8* bgvram; u32 bgvrammask; - CurUnit->GetBGVRAM(bgvram, bgvrammask); - - extpal = (CurUnit->DispCnt & 0x40000000); + GPU2D.GetBGVRAM(bgvram, bgvrammask); - s16 rotA = CurUnit->BGRotA[bgnum-2]; - s16 rotB = CurUnit->BGRotB[bgnum-2]; - s16 rotC = CurUnit->BGRotC[bgnum-2]; - s16 rotD = CurUnit->BGRotD[bgnum-2]; + extpal = (GPU2D.DispCnt & (1<<30)); - s32 rotX = CurUnit->BGXRefInternal[bgnum-2]; - s32 rotY = CurUnit->BGYRefInternal[bgnum-2]; + s16 rotA = GPU2D.BGRotA[bgnum-2]; + s16 rotC = GPU2D.BGRotC[bgnum-2]; - if (bgcnt & 0x0040) - { - // vertical mosaic - rotX -= (CurUnit->BGMosaicY * rotB); - rotY -= (CurUnit->BGMosaicY * rotD); - } + s32 rotX = GPU2D.BGXRefInternal[bgnum-2]; + s32 rotY = GPU2D.BGYRefInternal[bgnum-2]; - if (bgcnt & 0x0080) + if (bgcnt & (1<<7)) { // bitmap modes u32 xmask, ymask; u32 yshift; - switch (bgcnt & 0xC000) + switch ((bgcnt >> 14) & 0x3) { - case 0x0000: xmask = 0x07FFF; ymask = 0x07FFF; yshift = 7; break; - case 0x4000: xmask = 0x0FFFF; ymask = 0x0FFFF; yshift = 8; break; - case 0x8000: xmask = 0x1FFFF; ymask = 0x0FFFF; yshift = 9; break; - case 0xC000: xmask = 0x1FFFF; ymask = 0x1FFFF; yshift = 9; break; + case 0: xmask = 0x07FFF; ymask = 0x07FFF; yshift = 7; break; + case 1: xmask = 0x0FFFF; ymask = 0x0FFFF; yshift = 8; break; + case 2: xmask = 0x1FFFF; ymask = 0x0FFFF; yshift = 9; break; + case 3: xmask = 0x1FFFF; ymask = 0x1FFFF; yshift = 9; break; } u32 ofxmask, ofymask; - if (bgcnt & 0x2000) + if (bgcnt & (1<<13)) { ofxmask = 0; ofymask = 0; @@ -1235,10 +691,9 @@ void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum) ofymask = ~ymask; } - if (CurUnit->Num) tilemapaddr = ((bgcnt & 0x1F00) << 6); - else tilemapaddr = ((bgcnt & 0x1F00) << 6); + tilemapaddr = ((bgcnt & 0x1F00) << 6); - if (bgcnt & 0x0004) + if (bgcnt & (1<<2)) { // direct color bitmap @@ -1266,7 +721,7 @@ void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum) color = *(u16*)&bgvram[(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)) & bgvrammask]; if (color & 0x8000) - drawPixel(&BGOBJLine[i], color, 0x01000000<Num) pal = (u16*)&GPU.Palette[0x400]; - else pal = (u16*)&GPU.Palette[0]; + if (GPU2D.Num) pal = (u16*)&GPU.Palette[0x400]; + else pal = (u16*)&GPU.Palette[0]; u8 color; @@ -1305,7 +760,7 @@ void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum) color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask]; if (color) - drawPixel(&BGOBJLine[i], pal[color], 0x01000000<> 14) & 0x3) { - case 0x0000: coordmask = 0x07800; yshift = 7; break; - case 0x4000: coordmask = 0x0F800; yshift = 8; break; - case 0x8000: coordmask = 0x1F800; yshift = 9; break; - case 0xC000: coordmask = 0x3F800; yshift = 10; break; + case 0: coordmask = 0x07800; yshift = 7; break; + case 1: coordmask = 0x0F800; yshift = 8; break; + case 2: coordmask = 0x1F800; yshift = 9; break; + case 3: coordmask = 0x3F800; yshift = 10; break; } u32 overflowmask; - if (bgcnt & 0x2000) overflowmask = 0; - else overflowmask = ~(coordmask | 0x7FF); + if (bgcnt & (1<<13)) overflowmask = 0; + else overflowmask = ~(coordmask | 0x7FF); - if (CurUnit->Num) + if (GPU2D.Num) { tilesetaddr = ((bgcnt & 0x003C) << 12); tilemapaddr = ((bgcnt & 0x1F00) << 3); @@ -1341,8 +796,8 @@ void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum) } else { - tilesetaddr = ((CurUnit->DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); - tilemapaddr = ((CurUnit->DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); + tilesetaddr = ((GPU2D.DispCnt & 0x07000000) >> 8) + ((bgcnt & 0x003C) << 12); + tilemapaddr = ((GPU2D.DispCnt & 0x38000000) >> 11) + ((bgcnt & 0x1F00) << 3); pal = (u16*)&GPU.Palette[0]; } @@ -1374,20 +829,20 @@ void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum) { curtile = *(u16*)&bgvram[(tilemapaddr + (((((finalY & coordmask) >> 11) << yshift) + ((finalX & coordmask) >> 11)) << 1)) & bgvrammask]; - if (extpal) curpal = CurUnit->GetBGExtPal(bgnum, curtile>>12); + if (extpal) curpal = GPU2D.GetBGExtPal(bgnum, curtile>>12); else curpal = pal; // draw pixel u32 tilexoff = (finalX >> 8) & 0x7; u32 tileyoff = (finalY >> 8) & 0x7; - if (curtile & 0x0400) tilexoff = 7-tilexoff; - if (curtile & 0x0800) tileyoff = 7-tileyoff; + if (curtile & (1<<10)) tilexoff = 7-tilexoff; + if (curtile & (1<<11)) tileyoff = 7-tileyoff; color = bgvram[(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff) & bgvrammask]; if (color) - drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<BGXRefInternal[bgnum-2] += rotB; - CurUnit->BGYRefInternal[bgnum-2] += rotD; } -template -void SoftRenderer::DrawBG_Large(u32 line) // BG is always BG2 +template +void SoftRenderer2D::DrawBG_Large(u32 line) // BG is always BG2 { - u16 bgcnt = CurUnit->BGCnt[2]; + u16 bgcnt = GPU2D.BGCnt[2]; u16* pal; @@ -1414,16 +866,16 @@ void SoftRenderer::DrawBG_Large(u32 line) // BG is always BG2 // 3: 512x512 u32 xmask, ymask; u32 yshift; - switch (bgcnt & 0xC000) + switch ((bgcnt >> 14) & 0x3) { - case 0x0000: xmask = 0x1FFFF; ymask = 0x3FFFF; yshift = 9; break; - case 0x4000: xmask = 0x3FFFF; ymask = 0x1FFFF; yshift = 10; break; - case 0x8000: xmask = 0x1FFFF; ymask = 0x0FFFF; yshift = 9; break; - case 0xC000: xmask = 0x1FFFF; ymask = 0x1FFFF; yshift = 9; break; + case 0: xmask = 0x1FFFF; ymask = 0x3FFFF; yshift = 9; break; + case 1: xmask = 0x3FFFF; ymask = 0x1FFFF; yshift = 10; break; + case 2: xmask = 0x1FFFF; ymask = 0x0FFFF; yshift = 9; break; + case 3: xmask = 0x1FFFF; ymask = 0x1FFFF; yshift = 9; break; } u32 ofxmask, ofymask; - if (bgcnt & 0x2000) + if (bgcnt & (1<<13)) { ofxmask = 0; ofymask = 0; @@ -1434,29 +886,20 @@ void SoftRenderer::DrawBG_Large(u32 line) // BG is always BG2 ofymask = ~ymask; } - s16 rotA = CurUnit->BGRotA[0]; - s16 rotB = CurUnit->BGRotB[0]; - s16 rotC = CurUnit->BGRotC[0]; - s16 rotD = CurUnit->BGRotD[0]; - - s32 rotX = CurUnit->BGXRefInternal[0]; - s32 rotY = CurUnit->BGYRefInternal[0]; + s16 rotA = GPU2D.BGRotA[0]; + s16 rotC = GPU2D.BGRotC[0]; - if (bgcnt & 0x0040) - { - // vertical mosaic - rotX -= (CurUnit->BGMosaicY * rotB); - rotY -= (CurUnit->BGMosaicY * rotD); - } + s32 rotX = GPU2D.BGXRefInternal[0]; + s32 rotY = GPU2D.BGYRefInternal[0]; u8* bgvram; u32 bgvrammask; - CurUnit->GetBGVRAM(bgvram, bgvrammask); + GPU2D.GetBGVRAM(bgvram, bgvrammask); // 256-color bitmap - if (CurUnit->Num) pal = (u16*)&GPU.Palette[0x400]; - else pal = (u16*)&GPU.Palette[0]; + if (GPU2D.Num) pal = (u16*)&GPU.Palette[0x400]; + else pal = (u16*)&GPU.Palette[0]; u8 color; @@ -1482,126 +925,109 @@ void SoftRenderer::DrawBG_Large(u32 line) // BG is always BG2 color = bgvram[((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask]; if (color) - drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2); + DrawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2); } } rotX += rotA; rotY += rotC; } - - CurUnit->BGXRefInternal[0] += rotB; - CurUnit->BGYRefInternal[0] += rotD; } -// OBJ line buffer: -// * bit0-15: color (bit15=1: direct color, bit15=0: palette index, bit12=0 to indicate extpal) -// * bit16-17: BG-relative priority -// * bit18: non-transparent sprite pixel exists here -// * bit19: X mosaic should be applied here -// * bit24-31: compositor flags -void SoftRenderer::ApplySpriteMosaicX() +void SoftRenderer2D::ApplySpriteMosaicX() { - // apply X mosaic if needed - // X mosaic for sprites is applied after all sprites are rendered - - if (CurUnit->OBJMosaicSize[0] == 0) return; + /* + * apply X mosaic if needed + * X mosaic for sprites is applied after all sprites are rendered + * + * rules: + * pixels are processed from left to right + * current pixel value is latched if: + * - the X mosaic counter is 0 + * - the current pixel doesn't receive sprite mosaic + * - the current pixel receives sprite mosaic and the previous one didn't, or vice versa + * - the current BG-relative priority value is lower than the previous one + */ + + u8 mosw = GPU2D.OBJMosaicSize[0]; + if (mosw == 0) return; + + u8 mosx = 0; + u32 latchcolor; + for (int i = 0; i < 256; i++) + { + u32 curcolor = OBJLine[i]; + bool latch = false; - u32* objLine = OBJLine[CurUnit->Num]; + if (mosx == 0) + latch = true; + else if (!(curcolor & OBJ_Mosaic)) + latch = true; + else if (!(latchcolor & OBJ_Mosaic)) + latch = true; + else if ((curcolor & OBJ_BGPrioMask) < (latchcolor & OBJ_BGPrioMask)) + latch = true; - u8* curOBJXMosaicTable = MosaicTable[CurUnit->OBJMosaicSize[0]].data(); + if (latch) + latchcolor = curcolor; - u32 lastcolor = objLine[0]; + OBJLine[i] = latchcolor; - for (u32 i = 1; i < 256; i++) - { - u32 currentcolor = objLine[i]; - - if (!(lastcolor & currentcolor & 0x100000) || curOBJXMosaicTable[i] == 0) - lastcolor = currentcolor; + if (mosx == mosw) + mosx = 0; else - objLine[i] = lastcolor; + mosx++; } } -template -void SoftRenderer::InterleaveSprites(u32 prio) +void SoftRenderer2D::InterleaveSprites(u32 prio) { - u32* objLine = OBJLine[CurUnit->Num]; - u16* pal = (u16*)&GPU.Palette[CurUnit->Num ? 0x600 : 0x200]; + u32 attrmask = (prio << 16) | OBJ_IsOpaque; + u16* pal = (u16*)&GPU.Palette[GPU2D.Num ? 0x600 : 0x200]; + u16* extpal = GPU2D.GetOBJExtPal(); - if (CurUnit->DispCnt & 0x80000000) + for (u32 i = 0; i < 256; i++) { - u16* extpal = CurUnit->GetOBJExtPal(); + if ((OBJLine[i] & OBJ_OpaPrioMask) != attrmask) + continue; + if (!(WindowMask[i] & 0x10)) + continue; - for (u32 i = 0; i < 256; i++) - { - if ((objLine[i] & 0x70000) != prio) continue; - if (!(WindowMask[i] & 0x10)) continue; + u16 color; + u32 pixel = OBJLine[i]; - u16 color; - u32 pixel = objLine[i]; - - if (pixel & 0x8000) - color = pixel & 0x7FFF; - else if (pixel & 0x1000) - color = pal[pixel & 0xFF]; - else - color = extpal[pixel & 0xFFF]; - - drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); - } - } - else - { - // optimized no-extpal version - - for (u32 i = 0; i < 256; i++) - { - if ((objLine[i] & 0x70000) != prio) continue; - if (!(WindowMask[i] & 0x10)) continue; - - u16 color; - u32 pixel = objLine[i]; - - if (pixel & 0x8000) - color = pixel & 0x7FFF; - else - color = pal[pixel & 0xFF]; + if (pixel & OBJ_DirectColor) + color = pixel & 0x7FFF; + else if (pixel & OBJ_StandardPal) + color = pal[pixel & 0xFF]; + else + color = extpal[pixel & 0xFFF]; - drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); - } + DrawPixel(&BGOBJLine[i], color, pixel & 0xFF000000); } } #define DoDrawSprite(type, ...) \ - if (iswin) \ - { \ - DrawSprite_##type(__VA_ARGS__); \ - } \ - else \ + do \ { \ - DrawSprite_##type(__VA_ARGS__); \ - } + if (iswin) \ + { \ + DrawSprite_##type(__VA_ARGS__); \ + } \ + else \ + { \ + DrawSprite_##type(__VA_ARGS__); \ + } \ + } while (0) -void SoftRenderer::DrawSprites(u32 line, Unit* unit) +void SoftRenderer2D::DrawSprites(u32 line) { - CurUnit = unit; - - if (line == 0) - { - // reset those counters here - // TODO: find out when those are supposed to be reset - // it would make sense to reset them at the end of VBlank - // however, sprites are rendered one scanline in advance - // so they need to be reset a bit earlier - - CurUnit->OBJMosaicY = 0; - CurUnit->OBJMosaicYCount = 0; - } + // the OBJ buffers don't get updated at all if the 2D engine is disabled + if (!GPU2D.Enabled) + return; - if (CurUnit->Num == 0) + if (GPU2D.Num == 0) { auto objDirty = GPU.VRAMDirty_AOBJ.DeriveState(GPU.VRAMMap_AOBJ, GPU); GPU.MakeVRAMFlat_AOBJCoherent(objDirty); @@ -1612,12 +1038,14 @@ void SoftRenderer::DrawSprites(u32 line, Unit* unit) GPU.MakeVRAMFlat_BOBJCoherent(objDirty); } - NumSprites[CurUnit->Num] = 0; - memset(OBJLine[CurUnit->Num], 0, 256*4); - memset(OBJWindow[CurUnit->Num], 0, 256); - if (!(CurUnit->DispCnt & 0x1000)) return; + NumSprites = 0; + memset(OBJLine, 0, sizeof(OBJLine)); + memset(OBJWindow, 0, sizeof(OBJWindow)); + + if (!GPU2D.OBJEnable) + return; - u16* oam = (u16*)&GPU.OAM[CurUnit->Num ? 0x400 : 0]; + u16* oam = (u16*)&GPU.OAM[GPU2D.Num ? 0x400 : 0]; const s32 spritewidth[16] = { @@ -1634,89 +1062,93 @@ void SoftRenderer::DrawSprites(u32 line, Unit* unit) 64, 32, 64, 8 }; - for (int bgnum = 0x0C00; bgnum >= 0x0000; bgnum -= 0x0400) + for (int sprnum = 0; sprnum < 128; sprnum++) { - for (int sprnum = 127; sprnum >= 0; sprnum--) - { - u16* attrib = &oam[sprnum*4]; + u16* attrib = &oam[sprnum*4]; - if ((attrib[2] & 0x0C00) != bgnum) - continue; + u16 sprtype = (attrib[0] >> 8) & 0x3; + if (sprtype == 2) // disabled + continue; - bool iswin = (((attrib[0] >> 10) & 0x3) == 2); + bool iswin = (((attrib[0] >> 10) & 0x3) == 2); - u32 sprline; - if ((attrib[0] & 0x1000) && !iswin) - { - // apply Y mosaic - sprline = CurUnit->OBJMosaicY; - } - else - sprline = line; - - if (attrib[0] & 0x0100) - { - u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12); - s32 width = spritewidth[sizeparam]; - s32 height = spriteheight[sizeparam]; - s32 boundwidth = width; - s32 boundheight = height; - - if (attrib[0] & 0x0200) - { - boundwidth <<= 1; - boundheight <<= 1; - } - - u32 ypos = attrib[0] & 0xFF; - if (((line - ypos) & 0xFF) >= (u32)boundheight) - continue; - ypos = (sprline - ypos) & 0xFF; + u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12); + s32 width = spritewidth[sizeparam]; + s32 height = spriteheight[sizeparam]; + s32 boundwidth = width; + s32 boundheight = height; - s32 xpos = (s32)(attrib[1] << 23) >> 23; - if (xpos <= -boundwidth) - continue; + if (sprtype == 3) // double-size rotscale sprite + { + boundwidth <<= 1; + boundheight <<= 1; + } - u32 rotparamgroup = (attrib[1] >> 9) & 0x1F; + // TODO checkme (128-tall sprite overflow thing) + s32 ypos = attrib[0] & 0xFF; + if (((line - ypos) & 0xFF) >= boundheight) + continue; - DoDrawSprite(Rotscale, sprnum, boundwidth, boundheight, width, height, xpos, ypos); + s32 xpos = (s32)(attrib[1] << 23) >> 23; + if (xpos <= -boundwidth) + continue; - NumSprites[CurUnit->Num]++; - } - else - { - if (attrib[0] & 0x0200) - continue; - - u32 sizeparam = (attrib[0] >> 14) | ((attrib[1] & 0xC000) >> 12); - s32 width = spritewidth[sizeparam]; - s32 height = spriteheight[sizeparam]; + if ((attrib[0] & (1<<12)) && (!iswin)) + { + // adjust Y position for sprite mosaic + // (sprite mosaic does not apply to OBJ-window sprites) + // a ypos greater than the sprite height means we underflowed, due to OBJMosaicLine being + // latched before the sprite's top, so we clamp it to 0 + ypos = (GPU2D.OBJMosaicLine - ypos) & 0xFF; + if (ypos >= boundheight) ypos = 0; + } + else + ypos = (line - ypos) & 0xFF; - u32 ypos = attrib[0] & 0xFF; - if (((line - ypos) & 0xFF) >= (u32)height) - continue; - ypos = (sprline - ypos) & 0xFF; + if (sprtype & 1) + DoDrawSprite(Rotscale, sprnum, boundwidth, boundheight, width, height, xpos, ypos); + else + DoDrawSprite(Normal, sprnum, width, height, xpos, ypos); - s32 xpos = (s32)(attrib[1] << 23) >> 23; - if (xpos <= -width) - continue; + NumSprites++; + } +} - DoDrawSprite(Normal, sprnum, width, height, xpos, ypos); +template +void SoftRenderer2D::DrawSpritePixel(int color, u32 pixelattr, s32 xpos) +{ + if (window) + { + if (color != -1) + OBJWindow[xpos] = 1; + } + else + { + u32 oldpixel = OBJLine[xpos]; + bool oldisopaque = !!(oldpixel & OBJ_IsOpaque); + bool newisopaque = (color != -1); + bool priocheck = (pixelattr & OBJ_BGPrioMask) < (oldpixel & OBJ_BGPrioMask); - NumSprites[CurUnit->Num]++; - } + if (newisopaque && (!oldisopaque || priocheck)) + { + OBJLine[xpos] = color | pixelattr; + } + else if (!newisopaque && !oldisopaque) + { + OBJLine[xpos] &= ~(OBJ_Mosaic | OBJ_BGPrioMask); + OBJLine[xpos] |= (pixelattr & (OBJ_IsSprite | OBJ_Mosaic | OBJ_BGPrioMask)); } } } template -void SoftRenderer::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos) +void SoftRenderer2D::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos) { - u16* oam = (u16*)&GPU.OAM[CurUnit->Num ? 0x400 : 0]; + u16* oam = (u16*)&GPU.OAM[GPU2D.Num ? 0x400 : 0]; u16* attrib = &oam[num * 4]; u16* rotparams = &oam[(((attrib[1] >> 9) & 0x1F) * 16) + 3]; - u32 pixelattr = ((attrib[2] & 0x0C00) << 6) | 0xC0000; + u32 pixelattr = ((attrib[2] & 0x0C00) << 6) | OBJ_IsSprite | OBJ_IsOpaque; u32 tilenum = attrib[2] & 0x03FF; u32 spritemode = window ? 0 : ((attrib[0] >> 10) & 0x3); @@ -1724,18 +1156,15 @@ void SoftRenderer::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u8* objvram; u32 objvrammask; - CurUnit->GetOBJVRAM(objvram, objvrammask); - - u32* objLine = OBJLine[CurUnit->Num]; - u8* objWindow = OBJWindow[CurUnit->Num]; + GPU2D.GetOBJVRAM(objvram, objvrammask); s32 centerX = boundwidth >> 1; s32 centerY = boundheight >> 1; - if ((attrib[0] & 0x1000) && !window) + if ((attrib[0] & (1<<12)) && !window) { // apply Y mosaic - pixelattr |= 0x100000; + pixelattr |= OBJ_Mosaic; } u32 xoff; @@ -1773,9 +1202,9 @@ void SoftRenderer::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, pixelattr |= (0xC0000000 | (alpha << 24)); u32 pixelsaddr; - if (CurUnit->DispCnt & 0x40) + if (GPU2D.DispCnt & 0x40) { - if (CurUnit->DispCnt & 0x20) + if (GPU2D.DispCnt & 0x20) { // 'reserved' // draws nothing @@ -1784,13 +1213,13 @@ void SoftRenderer::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, } else { - pixelsaddr = tilenum << (7 + ((CurUnit->DispCnt >> 22) & 0x1)); + pixelsaddr = tilenum << (7 + ((GPU2D.DispCnt >> 22) & 0x1)); ytilefactor = ((width >> 8) * 2); } } else { - if (CurUnit->DispCnt & 0x20) + if (GPU2D.DispCnt & 0x20) { pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); ytilefactor = (256 * 2); @@ -1808,16 +1237,7 @@ void SoftRenderer::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, { color = *(u16*)&objvram[(pixelsaddr + ((rotY >> 8) * ytilefactor) + ((rotX >> 8) << 1)) & objvrammask]; - if (color & 0x8000) - { - if (window) objWindow[xpos] = 1; - else objLine[xpos] = color | pixelattr; - } - else if (!window) - { - if (objLine[xpos] == 0) - objLine[xpos] = pixelattr & 0x180000; - } + DrawSpritePixel((color&0x8000) ? color : -1, pixelattr, xpos); } rotX += rotA; @@ -1829,9 +1249,9 @@ void SoftRenderer::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, else { u32 pixelsaddr = tilenum; - if (CurUnit->DispCnt & 0x10) + if (GPU2D.DispCnt & (1<<4)) { - pixelsaddr <<= ((CurUnit->DispCnt >> 20) & 0x3); + pixelsaddr <<= ((GPU2D.DispCnt >> 20) & 0x3); ytilefactor = (width >> 11) << ((attrib[0] & 0x2000) ? 1:0); } else @@ -1845,14 +1265,14 @@ void SoftRenderer::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, ytilefactor <<= 5; pixelsaddr <<= 5; - if (attrib[0] & 0x2000) + if (attrib[0] & (1<<13)) { // 256-color if (!window) { - if (!(CurUnit->DispCnt & 0x80000000)) - pixelattr |= 0x1000; + if (!(GPU2D.DispCnt & (1<<31))) + pixelattr |= OBJ_StandardPal; else pixelattr |= ((attrib[2] & 0xF000) >> 4); } @@ -1863,16 +1283,7 @@ void SoftRenderer::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, { color = objvram[(pixelsaddr + ((rotY>>11)*ytilefactor) + ((rotY&0x700)>>5) + ((rotX>>11)*64) + ((rotX&0x700)>>8)) & objvrammask]; - if (color) - { - if (window) objWindow[xpos] = 1; - else objLine[xpos] = color | pixelattr; - } - else if (!window) - { - if (objLine[xpos] == 0) - objLine[xpos] = pixelattr & 0x180000; - } + DrawSpritePixel(color ? color : -1, pixelattr, xpos); } rotX += rotA; @@ -1886,7 +1297,7 @@ void SoftRenderer::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, // 16-color if (!window) { - pixelattr |= 0x1000; + pixelattr |= OBJ_StandardPal; pixelattr |= ((attrib[2] & 0xF000) >> 8); } @@ -1900,16 +1311,7 @@ void SoftRenderer::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, else color &= 0x0F; - if (color) - { - if (window) objWindow[xpos] = 1; - else objLine[xpos] = color | pixelattr; - } - else if (!window) - { - if (objLine[xpos] == 0) - objLine[xpos] = pixelattr & 0x180000; - } + DrawSpritePixel(color ? color : -1, pixelattr, xpos); } rotX += rotA; @@ -1922,32 +1324,29 @@ void SoftRenderer::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, } template -void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos) +void SoftRenderer2D::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos) { - u16* oam = (u16*)&GPU.OAM[CurUnit->Num ? 0x400 : 0]; + u16* oam = (u16*)&GPU.OAM[GPU2D.Num ? 0x400 : 0]; u16* attrib = &oam[num * 4]; - u32 pixelattr = ((attrib[2] & 0x0C00) << 6) | 0xC0000; + u32 pixelattr = ((attrib[2] & 0x0C00) << 6) | OBJ_IsSprite | OBJ_IsOpaque; u32 tilenum = attrib[2] & 0x03FF; u32 spritemode = window ? 0 : ((attrib[0] >> 10) & 0x3); u32 wmask = width - 8; // really ((width - 1) & ~0x7) - if ((attrib[0] & 0x1000) && !window) + if ((attrib[0] & (1<<12)) && !window) { // apply Y mosaic - pixelattr |= 0x100000; + pixelattr |= OBJ_Mosaic; } u8* objvram; u32 objvrammask; - CurUnit->GetOBJVRAM(objvram, objvrammask); - - u32* objLine = OBJLine[CurUnit->Num]; - u8* objWindow = OBJWindow[CurUnit->Num]; + GPU2D.GetOBJVRAM(objvram, objvrammask); // yflip - if (attrib[1] & 0x2000) + if (attrib[1] & (1<<13)) ypos = height-1 - ypos; u32 xoff; @@ -1977,9 +1376,9 @@ void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s pixelattr |= (0xC0000000 | (alpha << 24)); u32 pixelsaddr = tilenum; - if (CurUnit->DispCnt & 0x40) + if (GPU2D.DispCnt & 0x40) { - if (CurUnit->DispCnt & 0x20) + if (GPU2D.DispCnt & 0x20) { // 'reserved' // draws nothing @@ -1988,13 +1387,13 @@ void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s } else { - pixelsaddr <<= (7 + ((CurUnit->DispCnt >> 22) & 0x1)); + pixelsaddr <<= (7 + ((GPU2D.DispCnt >> 22) & 0x1)); pixelsaddr += (ypos * width * 2); } } else { - if (CurUnit->DispCnt & 0x20) + if (GPU2D.DispCnt & 0x20) { pixelsaddr = ((tilenum & 0x01F) << 4) + ((tilenum & 0x3E0) << 7); pixelsaddr += (ypos * 256 * 2); @@ -2008,7 +1407,7 @@ void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s s32 pixelstride; - if (attrib[1] & 0x1000) // xflip + if (attrib[1] & (1<<12)) // xflip { pixelsaddr += ((width-1) << 1); pixelsaddr -= (xoff << 1); @@ -2026,16 +1425,7 @@ void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s pixelsaddr += pixelstride; - if (color & 0x8000) - { - if (window) objWindow[xpos] = 1; - else objLine[xpos] = color | pixelattr; - } - else if (!window) - { - if (objLine[xpos] == 0) - objLine[xpos] = pixelattr & 0x180000; - } + DrawSpritePixel((color&0x8000) ? color : -1, pixelattr, xpos); xoff++; xpos++; @@ -2044,9 +1434,9 @@ void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s else { u32 pixelsaddr = tilenum; - if (CurUnit->DispCnt & 0x10) + if (GPU2D.DispCnt & (1<<4)) { - pixelsaddr <<= ((CurUnit->DispCnt >> 20) & 0x3); + pixelsaddr <<= ((GPU2D.DispCnt >> 20) & 0x3); pixelsaddr += ((ypos >> 3) * (width >> 3)) << ((attrib[0] & 0x2000) ? 1:0); } else @@ -2057,7 +1447,7 @@ void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s if (spritemode == 1) pixelattr |= 0x80000000; else pixelattr |= 0x10000000; - if (attrib[0] & 0x2000) + if (attrib[0] & (1<<13)) { // 256-color pixelsaddr <<= 5; @@ -2066,13 +1456,13 @@ void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s if (!window) { - if (!(CurUnit->DispCnt & 0x80000000)) - pixelattr |= 0x1000; + if (!(GPU2D.DispCnt & (1<<31))) + pixelattr |= OBJ_StandardPal; else pixelattr |= ((attrib[2] & 0xF000) >> 4); } - if (attrib[1] & 0x1000) // xflip + if (attrib[1] & (1<<12)) // xflip { pixelsaddr += (((width-1) & wmask) << 3); pixelsaddr += ((width-1) & 0x7); @@ -2093,16 +1483,7 @@ void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s pixelsaddr += pixelstride; - if (color) - { - if (window) objWindow[xpos] = 1; - else objLine[xpos] = color | pixelattr; - } - else if (!window) - { - if (objLine[xpos] == 0) - objLine[xpos] = pixelattr & 0x180000; - } + DrawSpritePixel(color ? color : -1, pixelattr, xpos); xoff++; xpos++; @@ -2118,14 +1499,14 @@ void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s if (!window) { - pixelattr |= 0x1000; + pixelattr |= OBJ_StandardPal; pixelattr |= ((attrib[2] & 0xF000) >> 8); } // TODO: optimize VRAM access!! // TODO: do xflip better? the 'two pixels per byte' thing makes it a bit shitty - if (attrib[1] & 0x1000) // xflip + if (attrib[1] & (1<<12)) // xflip { pixelsaddr += (((width-1) & wmask) << 2); pixelsaddr += (((width-1) & 0x7) >> 1); @@ -2142,7 +1523,7 @@ void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s for (; xoff < xend;) { - if (attrib[1] & 0x1000) + if (attrib[1] & (1<<12)) { if (xoff & 0x1) { color = objvram[pixelsaddr & objvrammask] & 0x0F; pixelsaddr--; } else color = objvram[pixelsaddr & objvrammask] >> 4; @@ -2153,16 +1534,7 @@ void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s else color = objvram[pixelsaddr & objvrammask] & 0x0F; } - if (color) - { - if (window) objWindow[xpos] = 1; - else objLine[xpos] = color | pixelattr; - } - else if (!window) - { - if (objLine[xpos] == 0) - objLine[xpos] = pixelattr & 0x180000; - } + DrawSpritePixel(color ? color : -1, pixelattr, xpos); xoff++; xpos++; @@ -2173,4 +1545,3 @@ void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s } } -} diff --git a/src/GPU2D_Soft.h b/src/GPU2D_Soft.h index 4beaf8f99f..1e7be2b597 100644 --- a/src/GPU2D_Soft.h +++ b/src/GPU2D_Soft.h @@ -22,31 +22,43 @@ namespace melonDS { -class GPU; +class SoftRenderer; -namespace GPU2D -{ - -class SoftRenderer : public Renderer2D +class SoftRenderer2D : public Renderer2D { public: - SoftRenderer(melonDS::GPU& gpu); - ~SoftRenderer() override {} + SoftRenderer2D(melonDS::GPU2D& gpu2D, SoftRenderer& parent); + ~SoftRenderer2D() override; + bool Init() override { return true; } + void Reset() override; + + void DrawScanline(u32 line) override; + void DrawSprites(u32 line) override; + void VBlank() override {} + void VBlankEnd() override {}; - void DrawScanline(u32 line, Unit* unit) override; - void DrawSprites(u32 line, Unit* unit) override; - void VBlankEnd(Unit* unitA, Unit* unitB) override; private: - melonDS::GPU& GPU; - alignas(8) u32 BGOBJLine[256*3]; - u32* _3DLine; + SoftRenderer& Parent; + + enum + { + OBJ_StandardPal = (1<<12), + OBJ_DirectColor = (1<<15), + OBJ_BGPrioMask = (0x3<<16), + OBJ_IsOpaque = (1<<18), + OBJ_OpaPrioMask = (OBJ_BGPrioMask | OBJ_IsOpaque), + OBJ_IsSprite = (1<<19), + OBJ_Mosaic = (1<<20), + }; + + alignas(8) u32 BGOBJLine[256*2]; alignas(8) u8 WindowMask[256]; - alignas(8) u32 OBJLine[2][256]; - alignas(8) u8 OBJWindow[2][256]; + alignas(8) u32 OBJLine[256]; + alignas(8) u8 OBJWindow[256]; - u32 NumSprites[2]; + u32 NumSprites; u8* CurBGXMosaicTable; array2d MosaicTable = []() constexpr @@ -65,85 +77,26 @@ class SoftRenderer : public Renderer2D return table; }(); - static constexpr u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb) noexcept - { - u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb) + 0x000008) >> 4; - u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb) + 0x000800) >> 4) & 0x007F00; - u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb) + 0x080000) >> 4) & 0x7F0000; - - if (r > 0x00003F) r = 0x00003F; - if (g > 0x003F00) g = 0x003F00; - if (b > 0x3F0000) b = 0x3F0000; - - return r | g | b | 0xFF000000; - } - - static constexpr u32 ColorBlend5(u32 val1, u32 val2) noexcept - { - u32 eva = ((val1 >> 24) & 0x1F) + 1; - u32 evb = 32 - eva; - - if (eva == 32) return val1; - - u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb) + 0x000010) >> 5; - u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb) + 0x001000) >> 5) & 0x007F00; - u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb) + 0x100000) >> 5) & 0x7F0000; - - if (r > 0x00003F) r = 0x00003F; - if (g > 0x003F00) g = 0x003F00; - if (b > 0x3F0000) b = 0x3F0000; - - return r | g | b | 0xFF000000; - } - - static constexpr u32 ColorBrightnessUp(u32 val, u32 factor, u32 bias) noexcept - { - u32 rb = val & 0x3F003F; - u32 g = val & 0x003F00; - - rb += (((((0x3F003F - rb) * factor) + (bias*0x010001)) >> 4) & 0x3F003F); - g += (((((0x003F00 - g ) * factor) + (bias*0x000100)) >> 4) & 0x003F00); - - return rb | g | 0xFF000000; - } - - static constexpr u32 ColorBrightnessDown(u32 val, u32 factor, u32 bias) noexcept - { - u32 rb = val & 0x3F003F; - u32 g = val & 0x003F00; - - rb -= ((((rb * factor) + (bias*0x010001)) >> 4) & 0x3F003F); - g -= ((((g * factor) + (bias*0x000100)) >> 4) & 0x003F00); - - return rb | g | 0xFF000000; - } u32 ColorComposite(int i, u32 val1, u32 val2) const; template void DrawScanlineBGMode(u32 line); void DrawScanlineBGMode6(u32 line); void DrawScanlineBGMode7(u32 line); - void DrawScanline_BGOBJ(u32 line); + void DrawScanline_BGOBJ(u32 line, u32* dst); - static void DrawPixel_Normal(u32* dst, u16 color, u32 flag); - static void DrawPixel_Accel(u32* dst, u16 color, u32 flag); - - typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag); + static void DrawPixel(u32* dst, u16 color, u32 flag); void DrawBG_3D(); - template void DrawBG_Text(u32 line, u32 bgnum); - template void DrawBG_Affine(u32 line, u32 bgnum); - template void DrawBG_Extended(u32 line, u32 bgnum); - template void DrawBG_Large(u32 line); + template void DrawBG_Text(u32 line, u32 bgnum); + template void DrawBG_Affine(u32 line, u32 bgnum); + template void DrawBG_Extended(u32 line, u32 bgnum); + template void DrawBG_Large(u32 line); void ApplySpriteMosaicX(); - template void InterleaveSprites(u32 prio); + template void DrawSpritePixel(int color, u32 pixelattr, s32 xpos); template void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos); template void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos); - - void DoCapture(u32 line, u32 width); }; } - -} \ No newline at end of file diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp index 1204741d40..f72c5a3425 100644 --- a/src/GPU3D.cpp +++ b/src/GPU3D.cpp @@ -141,9 +141,9 @@ const u8 CmdNumParams[256] = void MatrixLoadIdentity(s32* m); -GPU3D::GPU3D(melonDS::NDS& nds, std::unique_ptr&& renderer) noexcept : - NDS(nds), - CurrentRenderer(renderer ? std::move(renderer) : std::make_unique()) +GPU3D::GPU3D(melonDS::GPU& gpu) noexcept : + NDS(gpu.NDS), + GPU(gpu) { } @@ -160,12 +160,6 @@ void Vertex::DoSavestate(Savestate* file) noexcept file->VarArray(HiresPosition, sizeof(HiresPosition)); } -void GPU3D::SetCurrentRenderer(std::unique_ptr&& renderer) noexcept -{ - CurrentRenderer = std::move(renderer); - CurrentRenderer->Reset(NDS.GPU); -} - void GPU3D::ResetRenderingState() noexcept { RenderNumPolygons = 0; @@ -304,21 +298,12 @@ void GPU3D::Reset() noexcept FlushAttributes = 0; RenderXPos = 0; - - if (CurrentRenderer) - CurrentRenderer->Reset(NDS.GPU); } void GPU3D::DoSavestate(Savestate* file) noexcept { file->Section("GP3D"); - SoftRenderer* softRenderer = dynamic_cast(CurrentRenderer.get()); - if (softRenderer && softRenderer->IsThreaded()) - { - softRenderer->SetupRenderThread(NDS.GPU); - } - CmdFIFO.DoSavestate(file); CmdPIPE.DoSavestate(file); @@ -555,11 +540,8 @@ void GPU3D::DoSavestate(Savestate* file) noexcept file->Var32(&CurPolygonAttr); file->Var32(&TexParam); file->Var32(&TexPalette); + RenderFrameIdentical = false; - if (softRenderer && softRenderer->IsThreaded()) - { - softRenderer->EnableRenderThread(); - } } @@ -2427,22 +2409,6 @@ void GPU3D::CheckFIFODMA() noexcept NDS.CheckDMAs(0, 0x07); } -void GPU3D::VCount144(GPU& gpu) noexcept -{ - CurrentRenderer->VCount144(gpu); -} - -void GPU3D::RestartFrame(GPU& gpu) noexcept -{ - CurrentRenderer->RestartFrame(gpu); -} - -void GPU3D::Stop(const GPU& gpu) noexcept -{ - if (CurrentRenderer) - CurrentRenderer->Stop(gpu); -} - bool YSort(Polygon* a, Polygon* b) { @@ -2533,59 +2499,15 @@ void GPU3D::VBlank() noexcept } } -void GPU3D::VCount215(GPU& gpu) noexcept -{ - CurrentRenderer->RenderFrame(gpu); -} -void GPU3D::SetRenderXPos(u16 xpos) noexcept +void GPU3D::SetRenderXPos(u16 xpos, u16 mask) noexcept { if (!RenderingEnabled) return; - RenderXPos = xpos & 0x01FF; + RenderXPos = (RenderXPos & ~mask) | (xpos & mask & 0x01FF); } -u32* GPU3D::GetLine(int line) noexcept -{ - if (!AbortFrame) - { - u32* rawline = CurrentRenderer->GetLine(line); - - if (RenderXPos == 0) return rawline; - - // apply X scroll - - if (RenderXPos & 0x100) - { - int i = 0, j = RenderXPos; - for (; j < 512; i++, j++) - ScrolledLine[i] = 0; - for (j = 0; i < 256; i++, j++) - ScrolledLine[i] = rawline[j]; - } - else - { - int i = 0, j = RenderXPos; - for (; j < 256; i++, j++) - ScrolledLine[i] = rawline[j]; - for (; i < 256; i++) - ScrolledLine[i] = 0; - } - } - else - { - memset(ScrolledLine, 0, 256*4); - } - - return ScrolledLine; -} - -bool GPU3D::IsRendererAccelerated() const noexcept -{ - return CurrentRenderer && CurrentRenderer->Accelerated; -} - void GPU3D::WriteToGXFIFO(u32 val) noexcept { if (NumCommands == 0) @@ -2995,15 +2917,5 @@ void GPU3D::Write32(u32 addr, u32 val) noexcept Log(LogLevel::Debug, "unknown GPU3D write32 %08X %08X\n", addr, val); } -void GPU3D::Blit(const GPU& gpu) noexcept -{ - if (CurrentRenderer) - CurrentRenderer->Blit(gpu); -} - -Renderer3D::Renderer3D(bool Accelerated) -: Accelerated(Accelerated) -{ } - } diff --git a/src/GPU3D.h b/src/GPU3D.h index 08c2208c47..2a86e022fe 100644 --- a/src/GPU3D.h +++ b/src/GPU3D.h @@ -88,7 +88,7 @@ class NDS; class GPU3D { public: - GPU3D(melonDS::NDS& nds, std::unique_ptr&& renderer = nullptr) noexcept; + GPU3D(melonDS::GPU& gpu) noexcept; ~GPU3D() noexcept = default; void Reset() noexcept; @@ -103,33 +103,22 @@ class GPU3D void CheckFIFOIRQ() noexcept; void CheckFIFODMA() noexcept; - void VCount144(GPU& gpu) noexcept; void VBlank() noexcept; - void VCount215(GPU& gpu) noexcept; - void RestartFrame(GPU& gpu) noexcept; - void Stop(const GPU& gpu) noexcept; - - void SetRenderXPos(u16 xpos) noexcept; + void SetRenderXPos(u16 xpos, u16 mask) noexcept; [[nodiscard]] u16 GetRenderXPos() const noexcept { return RenderXPos; } - u32* GetLine(int line) noexcept; void WriteToGXFIFO(u32 val) noexcept; - [[nodiscard]] bool IsRendererAccelerated() const noexcept; - [[nodiscard]] Renderer3D& GetCurrentRenderer() noexcept { return *CurrentRenderer; } - [[nodiscard]] const Renderer3D& GetCurrentRenderer() const noexcept { return *CurrentRenderer; } - void SetCurrentRenderer(std::unique_ptr&& renderer) noexcept; - u8 Read8(u32 addr) noexcept; u16 Read16(u32 addr) noexcept; u32 Read32(u32 addr) noexcept; void Write8(u32 addr, u8 val) noexcept; void Write16(u32 addr, u16 val) noexcept; void Write32(u32 addr, u32 val) noexcept; - void Blit(const GPU& gpu) noexcept; + private: - melonDS::NDS& NDS; + typedef union { u64 _contents; @@ -187,11 +176,10 @@ class GPU3D NormalPipeline = 0; } - std::unique_ptr CurrentRenderer = nullptr; - - u16 RenderXPos = 0; - public: + melonDS::NDS& NDS; + melonDS::GPU& GPU; + FIFO CmdFIFO {}; FIFO CmdPIPE {}; @@ -273,6 +261,8 @@ class GPU3D bool RenderFrameIdentical = false; // not part of the hardware state, don't serialize + u16 RenderXPos = 0; + bool AbortFrame = false; u64 Timestamp = 0; @@ -326,40 +316,33 @@ class GPU3D u32 FlushRequest = 0; u32 FlushAttributes = 0; - u32 ScrolledLine[256]; // not part of the hardware state, don't serialize }; class Renderer3D { public: + explicit Renderer3D(melonDS::GPU3D& gpu3D) : GPU(gpu3D.GPU), GPU3D(gpu3D) {} virtual ~Renderer3D() = default; Renderer3D(const Renderer3D&) = delete; Renderer3D& operator=(const Renderer3D&) = delete; + virtual bool Init() { return true; } + virtual void Reset() = 0; - virtual void Reset(GPU& gpu) = 0; - - // This "Accelerated" flag currently communicates if the framebuffer should - // be allocated differently and other little misc handlers. Ideally there - // are more detailed "traits" that we can ask of the Renderer3D type - const bool Accelerated; + virtual void RenderFrame() = 0; + virtual void FinishRendering() {} + virtual void RestartFrame() {}; - virtual void VCount144(GPU& gpu) {}; - virtual void Stop(const GPU& gpu) {} - virtual void RenderFrame(GPU& gpu) = 0; - virtual void RestartFrame(GPU& gpu) {}; + // return one scanline of the framebuffer, with X scroll applied + // this is used in software renderers virtual u32* GetLine(int line) = 0; - virtual void Blit(const GPU& gpu) {}; - - virtual void SetupAccelFrame() {} - virtual void PrepareCaptureFrame() {} - virtual void BindOutputTexture(int buffer) {} virtual bool NeedsShaderCompile() { return false; } virtual void ShaderCompileStep(int& current, int& count) {} protected: - Renderer3D(bool Accelerated); + melonDS::GPU& GPU; + melonDS::GPU3D& GPU3D; }; } diff --git a/src/GPU3D_Compute.cpp b/src/GPU3D_Compute.cpp index 7af6e2b7ac..50b1ecbb3b 100644 --- a/src/GPU3D_Compute.cpp +++ b/src/GPU3D_Compute.cpp @@ -16,7 +16,7 @@ with melonDS. If not, see http://www.gnu.org/licenses/. */ -#include "GPU3D_Compute.h" +#include "GPU_OpenGL.h" #include #include @@ -30,11 +30,14 @@ namespace melonDS { -ComputeRenderer::ComputeRenderer(GLCompositor&& compositor) - : Renderer3D(true), Texcache(TexcacheOpenGLLoader()), CurGLCompositor(std::move(compositor)) -{} +ComputeRenderer3D::ComputeRenderer3D(melonDS::GPU3D& gpu3D, GLRenderer& parent) + : Renderer3D(gpu3D), Parent(parent), Texcache(gpu3D.GPU, TexcacheOpenGLLoader(true)) +{ + ScaleFactor = 0; + HiresCoordinates = false; +} -bool ComputeRenderer::CompileShader(GLuint& shader, const std::string& source, const std::initializer_list& defines) +bool ComputeRenderer3D::CompileShader(GLuint& shader, const std::string& source, const std::initializer_list& defines) { std::string shaderName; std::string shaderSource; @@ -68,7 +71,7 @@ bool ComputeRenderer::CompileShader(GLuint& shader, const std::string& source, c return OpenGL::CompileComputeProgram(shader, shaderSource.c_str(), shaderName.c_str()); } -void ComputeRenderer::ShaderCompileStep(int& current, int& count) +void ComputeRenderer3D::ShaderCompileStep(int& current, int& count) { current = ShaderStepIdx; ShaderStepIdx++; @@ -185,61 +188,68 @@ void blah(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length printf("%s\n", message); } -std::unique_ptr ComputeRenderer::New() +bool ComputeRenderer3D::Init() { - std::optional compositor = GLCompositor::New(); - if (!compositor) - return nullptr; - - std::unique_ptr result = std::unique_ptr(new ComputeRenderer(std::move(*compositor))); - //glDebugMessageCallback(blah, NULL); //glEnable(GL_DEBUG_OUTPUT); - glGenBuffers(1, &result->YSpanSetupMemory); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, result->YSpanSetupMemory); + glGenBuffers(1, &YSpanSetupMemory); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, YSpanSetupMemory); glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(SpanSetupY)*MaxYSpanSetups, nullptr, GL_DYNAMIC_DRAW); - glGenBuffers(1, &result->RenderPolygonMemory); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, result->RenderPolygonMemory); + glGenBuffers(1, &RenderPolygonMemory); + glBindBuffer(GL_SHADER_STORAGE_BUFFER, RenderPolygonMemory); glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(RenderPolygon)*2048, nullptr, GL_DYNAMIC_DRAW); - glGenBuffers(1, &result->XSpanSetupMemory); - glGenBuffers(1, &result->BinResultMemory); - glGenBuffers(1, &result->FinalTileMemory); - glGenBuffers(1, &result->YSpanIndicesTextureMemory); - glGenBuffers(tilememoryLayer_Num, result->TileMemory); - glGenBuffers(1, &result->WorkDescMemory); + glGenBuffers(1, &XSpanSetupMemory); + glGenBuffers(1, &BinResultMemory); + glGenBuffers(1, &FinalTileMemory); + glGenBuffers(1, &YSpanIndicesTextureMemory); + glGenBuffers(tilememoryLayer_Num, TileMemory); + glGenBuffers(1, &WorkDescMemory); - glGenTextures(1, &result->YSpanIndicesTexture); - glGenTextures(1, &result->LowResFramebuffer); - glBindTexture(GL_TEXTURE_2D, result->LowResFramebuffer); - glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8UI, 256, 192); + glGenTextures(1, &YSpanIndicesTexture); - glGenBuffers(1, &result->MetaUniformMemory); - glBindBuffer(GL_UNIFORM_BUFFER, result->MetaUniformMemory); + glGenBuffers(1, &MetaUniformMemory); + glBindBuffer(GL_UNIFORM_BUFFER, MetaUniformMemory); glBufferData(GL_UNIFORM_BUFFER, sizeof(MetaUniform), nullptr, GL_DYNAMIC_DRAW); - glGenSamplers(9, result->Samplers); + glGenSamplers(9, Samplers); for (u32 j = 0; j < 3; j++) { for (u32 i = 0; i < 3; i++) { const GLenum translateWrapMode[3] = {GL_CLAMP_TO_EDGE, GL_REPEAT, GL_MIRRORED_REPEAT}; - glSamplerParameteri(result->Samplers[i+j*3], GL_TEXTURE_WRAP_S, translateWrapMode[i]); - glSamplerParameteri(result->Samplers[i+j*3], GL_TEXTURE_WRAP_T, translateWrapMode[j]); - glSamplerParameteri(result->Samplers[i+j*3], GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glSamplerParameterf(result->Samplers[i+j*3], GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glSamplerParameteri(Samplers[i+j*3], GL_TEXTURE_WRAP_S, translateWrapMode[i]); + glSamplerParameteri(Samplers[i+j*3], GL_TEXTURE_WRAP_T, translateWrapMode[j]); + glSamplerParameteri(Samplers[i+j*3], GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glSamplerParameteri(Samplers[i+j*3], GL_TEXTURE_MAG_FILTER, GL_NEAREST); } } - glGenBuffers(1, &result->PixelBuffer); - glBindBuffer(GL_PIXEL_PACK_BUFFER, result->PixelBuffer); - glBufferData(GL_PIXEL_PACK_BUFFER, 256*192*4, NULL, GL_DYNAMIC_READ); + // init textures for the clear bitmap + glGenTextures(2, ClearBitmapTex); + + glBindTexture(GL_TEXTURE_2D, ClearBitmapTex[0]); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, 256, 256, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr); - return result; + glBindTexture(GL_TEXTURE_2D, ClearBitmapTex[1]); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, 256, 256, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr); + + ClearBitmap[0] = new u32[256*256]; + ClearBitmap[1] = new u32[256*256]; + + return true; } -ComputeRenderer::~ComputeRenderer() +ComputeRenderer3D::~ComputeRenderer3D() { Texcache.Reset(); @@ -256,10 +266,13 @@ ComputeRenderer::~ComputeRenderer() glDeleteBuffers(1, &MetaUniformMemory); glDeleteSamplers(9, Samplers); - glDeleteBuffers(1, &PixelBuffer); + + glDeleteTextures(2, ClearBitmapTex); + delete[] ClearBitmap[0]; + delete[] ClearBitmap[1]; } -void ComputeRenderer::DeleteShaders() +void ComputeRenderer3D::DeleteShaders() { std::initializer_list allPrograms = { @@ -301,17 +314,16 @@ void ComputeRenderer::DeleteShaders() glDeleteProgram(program); } -void ComputeRenderer::Reset(GPU& gpu) +void ComputeRenderer3D::Reset() { Texcache.Reset(); + ClearBitmapDirty = 0x3; } -void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinates) +void ComputeRenderer3D::SetRenderSettings(int scale, bool highResolutionCoordinates) { u8 TileScale; - CurGLCompositor.SetScaleFactor(scale); - if (ScaleFactor != -1) { DeleteShaders(); @@ -331,7 +343,7 @@ void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinate std::printf("Scale: %d\n", ScaleFactor); std::printf("TileScale: %d\n", TileScale); - + TileSize = std::min(8 * TileScale, 32); CoarseTileCountY = TileSize < 32 ? 4 : 6; ClearCoarseBinMaskLocalSize = TileSize < 32 ? 64 : 48; @@ -371,6 +383,8 @@ void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinate glBindTexture(GL_TEXTURE_2D, Framebuffer); glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, ScreenWidth, ScreenHeight); + Parent.OutputTex3D = Framebuffer; + // eh those are pretty bad guesses // though real hw shouldn't be eable to render all 2048 polygons on every line either int maxYSpanIndices = 64*2048 * ScaleFactor; @@ -386,12 +400,8 @@ void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinate glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA16UI, YSpanIndicesTextureMemory); } -void ComputeRenderer::VCount144(GPU& gpu) -{ - -} -void ComputeRenderer::SetupAttrs(SpanSetupY* span, Polygon* poly, int from, int to) +void ComputeRenderer3D::SetupAttrs(SpanSetupY* span, Polygon* poly, int from, int to) { span->Z0 = poly->FinalZ[from]; span->W0 = poly->FinalW[from]; @@ -409,7 +419,7 @@ void ComputeRenderer::SetupAttrs(SpanSetupY* span, Polygon* poly, int from, int span->TexcoordV1 = poly->Vertices[to]->TexCoords[1]; } -void ComputeRenderer::SetupYSpanDummy(RenderPolygon* rp, SpanSetupY* span, Polygon* poly, int vertex, int side, s32 positions[10][2]) +void ComputeRenderer3D::SetupYSpanDummy(RenderPolygon* rp, SpanSetupY* span, Polygon* poly, int vertex, int side, s32 positions[10][2]) { s32 x0 = positions[vertex][0]; if (side) @@ -450,7 +460,7 @@ void ComputeRenderer::SetupYSpanDummy(RenderPolygon* rp, SpanSetupY* span, Polyg SetupAttrs(span, poly, vertex, vertex); } -void ComputeRenderer::SetupYSpan(RenderPolygon* rp, SpanSetupY* span, Polygon* poly, int from, int to, int side, s32 positions[10][2]) +void ComputeRenderer3D::SetupYSpan(RenderPolygon* rp, SpanSetupY* span, Polygon* poly, int from, int to, int side, s32 positions[10][2]) { span->X0 = positions[from][0]; span->X1 = positions[to][0]; @@ -597,10 +607,12 @@ struct Variant GLuint Texture, Sampler; u16 Width, Height; u8 BlendMode; + int CaptureYOffset; bool operator==(const Variant& other) { - return Texture == other.Texture && Sampler == other.Sampler && BlendMode == other.BlendMode; + return Texture == other.Texture && Sampler == other.Sampler && BlendMode == other.BlendMode && + CaptureYOffset == other.CaptureYOffset; } }; @@ -618,14 +630,60 @@ struct Variant => 20 Shader + 1x Shadow Mask */ -void ComputeRenderer::RenderFrame(GPU& gpu) +void ComputeRenderer3D::RenderFrame() { assert(!NeedsShaderCompile()); - if (!Texcache.Update(gpu) && gpu.GPU3D.RenderFrameIdentical) + u8 clrBitmapDirty; + if (!Texcache.Update(clrBitmapDirty) && GPU3D.RenderFrameIdentical) { return; } + // figure out which chunks of texture memory contain display captures + int captureinfo[16]; + GPU.GetCaptureInfo_Texture(captureinfo); + + // if we're using a clear bitmap, set that up + ClearBitmapDirty |= clrBitmapDirty; + if (GPU3D.RenderDispCnt & (1<<14)) + { + if (ClearBitmapDirty & (1<<0)) + { + u16* vram = (u16*)&GPU.VRAMFlat_Texture[0x40000]; + for (int i = 0; i < 256*256; i++) + { + u16 color = vram[i]; + u32 r = (color << 1) & 0x3E; if (r) r++; + u32 g = (color >> 4) & 0x3E; if (g) g++; + u32 b = (color >> 9) & 0x3E; if (b) b++; + u32 a = (color & 0x8000) ? 31 : 0; + + ClearBitmap[0][i] = r | (g << 8) | (b << 16) | (a << 24); + } + + glBindTexture(GL_TEXTURE_2D, ClearBitmapTex[0]); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 256, GL_RED_INTEGER, GL_UNSIGNED_INT, ClearBitmap[0]); + } + + if (ClearBitmapDirty & (1<<1)) + { + u16* vram = (u16*)&GPU.VRAMFlat_Texture[0x60000]; + for (int i = 0; i < 256*256; i++) + { + u16 val = vram[i]; + u32 depth = ((val & 0x7FFF) * 0x200) + 0x1FF; + u32 fog = (val & 0x8000) << 9; + + ClearBitmap[1][i] = depth | fog; + } + + glBindTexture(GL_TEXTURE_2D, ClearBitmapTex[1]); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 256, GL_RED_INTEGER, GL_UNSIGNED_INT, ClearBitmap[1]); + } + + ClearBitmapDirty = 0; + } + int numYSpans = 0; int numSetupIndices = 0; @@ -643,12 +701,13 @@ void ComputeRenderer::RenderFrame(GPU& gpu) */ u32 numVariants = 0, prevVariant, prevTexLayer; Variant variants[MaxVariants]; + u32 capLastVariant[16] = {0}; - bool enableTextureMaps = gpu.GPU3D.RenderDispCnt & (1<<0); + bool enableTextureMaps = GPU3D.RenderDispCnt & (1<<0); - for (int i = 0; i < gpu.GPU3D.RenderNumPolygons; i++) + for (int i = 0; i < GPU3D.RenderNumPolygons; i++) { - Polygon* polygon = gpu.GPU3D.RenderPolygonRAM[i]; + Polygon* polygon = GPU3D.RenderPolygonRAM[i]; u32 nverts = polygon->NumVertices; u32 vtop = polygon->VTop, vbot = polygon->VBottom; @@ -664,7 +723,7 @@ void ComputeRenderer::RenderFrame(GPU& gpu) { // if the whole texture attribute matches // the texture layer will also match - Polygon* prevPolygon = gpu.GPU3D.RenderPolygonRAM[i - 1]; + Polygon* prevPolygon = GPU3D.RenderPolygonRAM[i - 1]; foundVariant = prevPolygon->TexParam == polygon->TexParam && prevPolygon->TexPalette == polygon->TexPalette && (prevPolygon->Attr & 0x30) == (polygon->Attr & 0x30) @@ -679,9 +738,55 @@ void ComputeRenderer::RenderFrame(GPU& gpu) variant.Sampler = 0; u32* textureLastVariant = nullptr; // we always need to look up the texture to get the layer of the array texture - if (enableTextureMaps && (polygon->TexParam >> 26) & 0x7) + u32 textype = (polygon->TexParam >> 26) & 0x7; + if (enableTextureMaps && textype) { - Texcache.GetTexture(gpu, polygon->TexParam, polygon->TexPalette, variant.Texture, prevTexLayer, textureLastVariant); + u32 texaddr = polygon->TexParam & 0xFFFF; + u32 texwidth = TextureWidth(polygon->TexParam); + u32 texheight = TextureHeight(polygon->TexParam); + int capblock = -1; + if ((textype == 7) && ((texwidth == 128) || (texwidth == 256))) + { + // if this is a direct color texture, and the width is 128 or 256 + // then it might be a display capture + u32 startaddr = texaddr << 3; + u32 endaddr = startaddr + (texheight * texwidth * 2); + + startaddr >>= 15; + endaddr = (endaddr + 0x7FFF) >> 15; + + for (u32 b = startaddr; b < endaddr; b++) + { + int blk = captureinfo[b]; + if (blk == -1) continue; + + capblock = blk; + } + } + + if (capblock != -1) + { + if (texwidth == 128) + { + variant.Texture = -1; + variant.CaptureYOffset = (int)((texaddr >> 5) & 0x7F); + prevTexLayer = capblock; + } + else + { + variant.Texture = -2; + variant.CaptureYOffset = (int)((texaddr >> 6) & 0xFF); + prevTexLayer = capblock >> 2; + } + + textureLastVariant = &capLastVariant[capblock]; + } + else + { + Texcache.GetTexture(polygon->TexParam, polygon->TexPalette, variant.Texture, prevTexLayer, textureLastVariant); + variant.CaptureYOffset = -1; + } + bool wrapS = (polygon->TexParam >> 16) & 1; bool wrapT = (polygon->TexParam >> 17) & 1; bool mirrorS = (polygon->TexParam >> 18) & 1; @@ -874,7 +979,7 @@ void ComputeRenderer::RenderFrame(GPU& gpu) glBufferSubData(GL_TEXTURE_BUFFER, 0, numSetupIndices*4*2, YSpanIndices.data()); glBindBuffer(GL_SHADER_STORAGE_BUFFER, RenderPolygonMemory); - glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, gpu.GPU3D.RenderNumPolygons*sizeof(RenderPolygon), RenderPolygons); + glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, GPU3D.RenderNumPolygons*sizeof(RenderPolygon), RenderPolygons); // we haven't accessed image data yet, so we don't need to invalidate anything } @@ -891,22 +996,27 @@ void ComputeRenderer::RenderFrame(GPU& gpu) glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 7, WorkDescMemory); MetaUniform meta; - meta.DispCnt = gpu.GPU3D.RenderDispCnt; - meta.NumPolygons = gpu.GPU3D.RenderNumPolygons; + meta.DispCnt = GPU3D.RenderDispCnt; + meta.NumPolygons = GPU3D.RenderNumPolygons; meta.NumVariants = numVariants; - meta.AlphaRef = gpu.GPU3D.RenderAlphaRef; + meta.AlphaRef = GPU3D.RenderAlphaRef; { - u32 r = (gpu.GPU3D.RenderClearAttr1 << 1) & 0x3E; if (r) r++; - u32 g = (gpu.GPU3D.RenderClearAttr1 >> 4) & 0x3E; if (g) g++; - u32 b = (gpu.GPU3D.RenderClearAttr1 >> 9) & 0x3E; if (b) b++; - u32 a = (gpu.GPU3D.RenderClearAttr1 >> 16) & 0x1F; + u32 r = (GPU3D.RenderClearAttr1 << 1) & 0x3E; if (r) r++; + u32 g = (GPU3D.RenderClearAttr1 >> 4) & 0x3E; if (g) g++; + u32 b = (GPU3D.RenderClearAttr1 >> 9) & 0x3E; if (b) b++; + u32 a = (GPU3D.RenderClearAttr1 >> 16) & 0x1F; meta.ClearColor = r | (g << 8) | (b << 16) | (a << 24); - meta.ClearDepth = ((gpu.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; - meta.ClearAttr = gpu.GPU3D.RenderClearAttr1 & 0x3F008000; + meta.ClearDepth = ((GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; + meta.ClearAttr = GPU3D.RenderClearAttr1 & 0x3F008000; + + u8 xoff = (GPU3D.RenderClearAttr2 >> 16) & 0xFF; + u8 yoff = (GPU3D.RenderClearAttr2 >> 24) & 0xFF; + meta.ClearBitmapOffset[0] = (float)xoff / 256.0; + meta.ClearBitmapOffset[1] = (float)yoff / 256.0; } for (u32 i = 0; i < 32; i++) { - u32 color = gpu.GPU3D.RenderToonTable[i]; + u32 color = GPU3D.RenderToonTable[i]; u32 r = (color << 1) & 0x3E; u32 g = (color >> 4) & 0x3E; u32 b = (color >> 9) & 0x3E; @@ -918,11 +1028,11 @@ void ComputeRenderer::RenderFrame(GPU& gpu) } for (u32 i = 0; i < 34; i++) { - meta.ToonTable[i*4+1] = gpu.GPU3D.RenderFogDensityTable[i]; + meta.ToonTable[i*4+1] = GPU3D.RenderFogDensityTable[i]; } for (u32 i = 0; i < 8; i++) { - u32 color = gpu.GPU3D.RenderEdgeTable[i]; + u32 color = GPU3D.RenderEdgeTable[i]; u32 r = (color << 1) & 0x3E; u32 g = (color >> 4) & 0x3E; u32 b = (color >> 9) & 0x3E; @@ -932,13 +1042,13 @@ void ComputeRenderer::RenderFrame(GPU& gpu) meta.ToonTable[i*4+2] = r | (g << 8) | (b << 16); } - meta.FogOffset = gpu.GPU3D.RenderFogOffset; - meta.FogShift = gpu.GPU3D.RenderFogShift; + meta.FogOffset = GPU3D.RenderFogOffset; + meta.FogShift = GPU3D.RenderFogShift; { - u32 fogR = (gpu.GPU3D.RenderFogColor << 1) & 0x3E; if (fogR) fogR++; - u32 fogG = (gpu.GPU3D.RenderFogColor >> 4) & 0x3E; if (fogG) fogG++; - u32 fogB = (gpu.GPU3D.RenderFogColor >> 9) & 0x3E; if (fogB) fogB++; - u32 fogA = (gpu.GPU3D.RenderFogColor >> 16) & 0x1F; + u32 fogR = (GPU3D.RenderFogColor << 1) & 0x3E; if (fogR) fogR++; + u32 fogG = (GPU3D.RenderFogColor >> 4) & 0x3E; if (fogG) fogG++; + u32 fogB = (GPU3D.RenderFogColor >> 9) & 0x3E; if (fogB) fogB++; + u32 fogA = (GPU3D.RenderFogColor >> 16) & 0x1F; meta.FogColor = fogR | (fogG << 8) | (fogB << 16) | (fogA << 24); } @@ -952,7 +1062,7 @@ void ComputeRenderer::RenderFrame(GPU& gpu) bool wbuffer = false; if (numYSpans > 0) { - wbuffer = gpu.GPU3D.RenderPolygonRAM[0]->WBuffer; + wbuffer = GPU3D.RenderPolygonRAM[0]->WBuffer; glUseProgram(ShaderClearIndirectWorkCount); glDispatchCompute((numVariants+31)/32, 1, 1); @@ -965,7 +1075,7 @@ void ComputeRenderer::RenderFrame(GPU& gpu) // bin polygons glUseProgram(ShaderBinCombined); - glDispatchCompute(((gpu.GPU3D.RenderNumPolygons + 31) / 32), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH); + glDispatchCompute(((GPU3D.RenderNumPolygons + 31) / 32), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT); // calculate list offsets @@ -979,6 +1089,11 @@ void ComputeRenderer::RenderFrame(GPU& gpu) glDispatchComputeIndirect(offsetof(BinResultHeader, SortWorkWorkCount)); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D_ARRAY, Parent.CaptureOutput128Tex); + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D_ARRAY, Parent.CaptureOutput256Tex); + glActiveTexture(GL_TEXTURE0); for (int i = 0; i < tilememoryLayer_Num; i++) @@ -986,7 +1101,7 @@ void ComputeRenderer::RenderFrame(GPU& gpu) // rasterise { - bool highLightMode = gpu.GPU3D.RenderDispCnt & (1<<1); + bool highLightMode = GPU3D.RenderDispCnt & (1<<1); GLuint shadersNoTexture[] = { @@ -1021,14 +1136,31 @@ void ComputeRenderer::RenderFrame(GPU& gpu) else { shader = shadersUseTexture[variants[i].BlendMode]; + + GLuint texunit = 0; + bool unitchange = false; if (variants[i].Texture != prevTexture) { - glBindTexture(GL_TEXTURE_2D_ARRAY, variants[i].Texture); + bool iscap = (variants[i].Texture == (GLuint)-1 || variants[i].Texture == (GLuint)-2); + bool previscap = (prevTexture == (GLuint)-1 || prevTexture == (GLuint)-2); + if (iscap) + { + unitchange = true; + if (variants[i].Texture == (GLuint)-1) + texunit = 1; + else + texunit = 2; + } + else if (previscap) + unitchange = true; + + if (texunit == 0) + glBindTexture(GL_TEXTURE_2D_ARRAY, variants[i].Texture); prevTexture = variants[i].Texture; } - if (variants[i].Sampler != prevSampler) + if ((variants[i].Sampler != prevSampler) || unitchange) { - glBindSampler(0, variants[i].Sampler); + glBindSampler(texunit, variants[i].Sampler); prevSampler = variants[i].Sampler; } } @@ -1041,6 +1173,16 @@ void ComputeRenderer::RenderFrame(GPU& gpu) glUniform1ui(UniformIdxCurVariant, i); glUniform2f(UniformIdxTextureSize, 1.f / variants[i].Width, 1.f / variants[i].Height); + if (variants[i].CaptureYOffset != -1) + { + if (variants[i].Width == 128) + glUniform1i(UniformIdxTexIsCapture, 1); + else + glUniform1i(UniformIdxTexIsCapture, 2); + glUniform1f(UniformIdxCaptureYOffset, (float)variants[i].CaptureYOffset / (float)variants[i].Height); + } + else + glUniform1i(UniformIdxTexIsCapture, 0); glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, BinResultMemory); glDispatchComputeIndirect(offsetof(BinResultHeader, VariantWorkCount) + i*4*4); } @@ -1048,19 +1190,25 @@ void ComputeRenderer::RenderFrame(GPU& gpu) } glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + glBindSampler(0, 0); + glBindSampler(1, 0); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, ClearBitmapTex[0]); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, ClearBitmapTex[1]); + // compose final image glUseProgram(ShaderDepthBlend[wbuffer]); glDispatchCompute(ScreenWidth/TileSize, ScreenHeight/TileSize, 1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glBindImageTexture(0, Framebuffer, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8); - glBindImageTexture(1, LowResFramebuffer, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); u32 finalPassShader = 0; - if (gpu.GPU3D.RenderDispCnt & (1<<4)) + if (GPU3D.RenderDispCnt & (1<<4)) finalPassShader |= 0x4; - if (gpu.GPU3D.RenderDispCnt & (1<<7)) + if (GPU3D.RenderDispCnt & (1<<7)) finalPassShader |= 0x2; - if (gpu.GPU3D.RenderDispCnt & (1<<5)) + if (GPU3D.RenderDispCnt & (1<<5)) finalPassShader |= 0x1; glUseProgram(ShaderFinalPass[finalPassShader]); @@ -1068,6 +1216,8 @@ void ComputeRenderer::RenderFrame(GPU& gpu) glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); glBindSampler(0, 0); + glBindSampler(1, 0); + glBindSampler(2, 0); /*u64 starttime = armGetSystemTick(); EmuQueue.waitIdle(); @@ -1116,51 +1266,13 @@ void ComputeRenderer::RenderFrame(GPU& gpu) }*/ } -void ComputeRenderer::RestartFrame(GPU& gpu) -{ - -} - -u32* ComputeRenderer::GetLine(int line) -{ - int stride = 256; - - if (line == 0) - { - glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelBuffer); - u8* data = (u8*)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); - if (data) memcpy(&FramebufferCPU[0], data, 4*stride*192); - glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - } - - return &FramebufferCPU[stride * line]; -} - -void ComputeRenderer::SetupAccelFrame() -{ - glBindTexture(GL_TEXTURE_2D, Framebuffer); -} - -void ComputeRenderer::PrepareCaptureFrame() -{ - glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelBuffer); - glBindTexture(GL_TEXTURE_2D, LowResFramebuffer); - glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, nullptr); -} - -void ComputeRenderer::BindOutputTexture(int buffer) -{ - CurGLCompositor.BindOutputTexture(buffer); -} - -void ComputeRenderer::Blit(const GPU &gpu) +void ComputeRenderer3D::RestartFrame() { - CurGLCompositor.RenderFrame(gpu, *this); } -void ComputeRenderer::Stop(const GPU &gpu) +u32* ComputeRenderer3D::GetLine(int line) { - CurGLCompositor.Stop(gpu); + return nullptr; } } diff --git a/src/GPU3D_Compute.h b/src/GPU3D_Compute.h index 0dcaeb3f08..8405ecf42c 100644 --- a/src/GPU3D_Compute.h +++ b/src/GPU3D_Compute.h @@ -26,7 +26,6 @@ #include "GPU3D.h" #include "OpenGLSupport.h" -#include "GPU_OpenGL.h" #include "GPU3D_TexcacheOpenGL.h" @@ -34,35 +33,27 @@ namespace melonDS { +class GLRenderer; -class ComputeRenderer : public Renderer3D +class ComputeRenderer3D : public Renderer3D { public: - static std::unique_ptr New(); - ~ComputeRenderer() override; - - void Reset(GPU& gpu) override; + ComputeRenderer3D(melonDS::GPU3D& gpu3D, GLRenderer& parent); + ~ComputeRenderer3D() override; + bool Init() override; + void Reset() override; void SetRenderSettings(int scale, bool highResolutionCoordinates); - void VCount144(GPU& gpu) override; - - void RenderFrame(GPU& gpu) override; - void RestartFrame(GPU& gpu) override; + void RenderFrame() override; + void RestartFrame() override; u32* GetLine(int line) override; - void SetupAccelFrame() override; - void PrepareCaptureFrame() override; - - void BindOutputTexture(int buffer) override; - - void Blit(const GPU& gpu) override; - void Stop(const GPU& gpu) override; - bool NeedsShaderCompile() override { return ShaderStepIdx != 33; } void ShaderCompileStep(int& current, int& count) override; + private: - ComputeRenderer(GLCompositor&& compositor); + GLRenderer& Parent; GLuint ShaderInterpXSpans[2]; GLuint ShaderBinCombined; @@ -100,8 +91,6 @@ class ComputeRenderer : public Renderer3D GLuint TileMemory[tilememoryLayer_Num]; GLuint FinalTileMemory; - u32 DummyLine[256] = {}; - struct SpanSetupY { // Attributes @@ -178,6 +167,8 @@ class ComputeRenderer : public Renderer3D static constexpr int UniformIdxCurVariant = 0; static constexpr int UniformIdxTextureSize = 1; + static constexpr int UniformIdxTexIsCapture = 2; + static constexpr int UniformIdxCaptureYOffset = 3; static constexpr int MaxFullscreenLayers = 16; @@ -209,16 +200,18 @@ class ComputeRenderer : public Renderer3D u32 ClearColor, ClearDepth, ClearAttr; u32 FogOffset, FogShift, FogColor; + + float ClearBitmapOffset[2]; }; GLuint MetaUniformMemory; GLuint Samplers[9]; - GLuint Framebuffer = 0; - GLuint LowResFramebuffer; - GLuint PixelBuffer; + GLuint ClearBitmapTex[2]; + u32* ClearBitmap[2]; + u8 ClearBitmapDirty; - u32 FramebufferCPU[256*192]; + GLuint Framebuffer = 0; int ScreenWidth, ScreenHeight; int TilesPerLine, TileLines; @@ -226,8 +219,6 @@ class ComputeRenderer : public Renderer3D int MaxWorkTiles; bool HiresCoordinates; - GLCompositor CurGLCompositor; - int ShaderStepIdx = 0; void DeleteShaders(); diff --git a/src/GPU3D_Compute_shaders.h b/src/GPU3D_Compute_shaders.h index dab28a6ea4..daaa5fb430 100644 --- a/src/GPU3D_Compute_shaders.h +++ b/src/GPU3D_Compute_shaders.h @@ -369,6 +369,8 @@ layout (std140, binding = 0) uniform MetaUniform uint ClearColor, ClearDepth, ClearAttr; uint FogOffset, FogShift, FogColor; + + vec2 ClearBitmapOffset; }; #ifdef InterpSpans @@ -1061,9 +1063,13 @@ const std::string Rasterise = layout (local_size_x = TileSize, local_size_y = TileSize) in; layout (binding = 0) uniform usampler2DArray CurrentTexture; +layout (binding = 1) uniform sampler2DArray Capture128Texture; +layout (binding = 2) uniform sampler2DArray Capture256Texture; layout (location = 0) uniform uint CurVariant; layout (location = 1) uniform vec2 InvTextureSize; +layout (location = 2) uniform int TexIsCapture; +layout (location = 3) uniform float CaptureYOffset; void main() { @@ -1192,7 +1198,20 @@ void main() #ifdef UseTexture vec2 uvf = vec2(ivec2(u, v)) * vec2(1.0 / 16.0) * InvTextureSize; - uvec4 texcolor = texture(CurrentTexture, vec3(uvf, polygon.TextureLayer)); + // TODO: if they use a capture as a texture and make it repeat, or use a nonstandard height, + // it may require custom handling of texcoord wraparound + uvec4 texcolor; + if (TexIsCapture != 0) + { + uvf.y += CaptureYOffset; + if (TexIsCapture == 1) + texcolor = uvec4(texture(Capture128Texture, vec3(uvf, polygon.TextureLayer)) * vec4(63,63,63,31)); + else + texcolor = uvec4(texture(Capture256Texture, vec3(uvf, polygon.TextureLayer)) * vec4(63,63,63,31)); + } + else + texcolor = texture(CurrentTexture, vec3(uvf, polygon.TextureLayer)); + #ifdef Decal if (texcolor.a == 31) { @@ -1252,6 +1271,9 @@ const std::string DepthBlend = ResultBuffer + BinningBuffer + R"( +layout (binding = 0) uniform usampler2D ClearBitmapColor; +layout (binding = 1) uniform usampler2D ClearBitmapDepth; + layout (local_size_x = TileSize, local_size_y = TileSize) in; void PlotTranslucent(inout uint color, inout uint depth, inout uint attr, bool isShadow, uint tileColor, uint srcA, uint tileDepth, uint srcAttr, bool writeDepth) @@ -1453,9 +1475,23 @@ void main() uint coarseMaskLo = BinningMaskAndOffset[BinningCoarseMaskStart + linearTile*CoarseBinStride + 0]; uint coarseMaskHi = BinningMaskAndOffset[BinningCoarseMaskStart + linearTile*CoarseBinStride + 1]; - uvec2 color = uvec2(ClearColor, 0U); - uvec2 depth = uvec2(ClearDepth, 0U); + uvec2 color, depth; uvec2 attr = uvec2(ClearAttr, 0U); + if ((DispCnt & (1<<14)) != 0U) + { + float scale = 1.0 / ScreenWidth; + vec2 pos = (vec2(gl_GlobalInvocationID.xy) * scale) + ClearBitmapOffset; + color = uvec2(texture(ClearBitmapColor, pos).r, 0U); + depth = uvec2(texture(ClearBitmapDepth, pos).r, 0U); + attr.x = (attr.x & ~0x8000U) | ((depth.x >> 9) & 0x8000U); + depth.x &= 0xFFFFFFU; + } + else + { + color = uvec2(ClearColor, 0U); + depth = uvec2(ClearDepth, 0U); + } + uint stencil = 0U; bool prevIsShadowMask = false; @@ -1478,8 +1514,7 @@ const std::string FinalPass = layout (local_size_x = 32) in; -layout (binding = 0, rgba8) writeonly uniform image2D FinalFB; -layout (binding = 1, rgba8ui) writeonly uniform uimage2D LowResFB; +layout (binding = 0, rgba8) writeonly uniform image2D FinalFB; uint BlendFog(uint color, uint depth) { @@ -1635,23 +1670,9 @@ void main() //if ((gl_GlobalInvocationID.y % 8) == 7 || (gl_GlobalInvocationID.y % 8) == 7) // color.x = 0x1F00001FU | 0x40000000U; - vec4 result = vec4(bitfieldExtract(color.x, 16, 8), bitfieldExtract(color.x, 8, 8), color.x & 0x3FU, bitfieldExtract(color.x, 24, 8)); + vec4 result = vec4(color.x & 0x3FU, bitfieldExtract(color.x, 8, 8), bitfieldExtract(color.x, 16, 8), bitfieldExtract(color.x, 24, 8)); result /= vec4(63.0, 63.0, 63.0, 31.0); imageStore(FinalFB, ivec2(gl_GlobalInvocationID.xy), result); - - // It's a division by constant, so using the builtin division is fine - const int scale = ScreenWidth/256; - ivec2 lowresCoordinate = ivec2(gl_GlobalInvocationID.xy) / scale; - ivec2 lowresCoordinateRest = ivec2(gl_GlobalInvocationID.xy) % scale; - if (lowresCoordinateRest == ivec2(0, 0)) - { - uvec4 color8; - color8.x = bitfieldExtract(color.x, 0, 8); - color8.y = bitfieldExtract(color.x, 8, 8); - color8.z = bitfieldExtract(color.x, 16, 8); - color8.w = bitfieldExtract(color.x, 24, 8); - imageStore(LowResFB, lowresCoordinate, color8); - } } )"; diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp index 706c75e238..c322878bcd 100644 --- a/src/GPU3D_OpenGL.cpp +++ b/src/GPU3D_OpenGL.cpp @@ -16,34 +16,47 @@ with melonDS. If not, see http://www.gnu.org/licenses/. */ -#include "GPU3D_OpenGL.h" +#include "GPU_OpenGL.h" #include #include #include #include "NDS.h" #include "GPU.h" -#include "GPU3D_OpenGL_shaders.h" namespace melonDS { -bool GLRenderer::BuildRenderShader(u32 flags, const std::string& vs, const std::string& fs) +#include "OpenGL_shaders/3DClearVS.h" +#include "OpenGL_shaders/3DClearFS.h" +#include "OpenGL_shaders/3DClearBitmapVS.h" +#include "OpenGL_shaders/3DClearBitmapFS.h" +#include "OpenGL_shaders/3DRenderVS.h" +#include "OpenGL_shaders/3DRenderFS.h" +#include "OpenGL_shaders/3DFinalPassVS.h" +#include "OpenGL_shaders/3DFinalPassEdgeFS.h" +#include "OpenGL_shaders/3DFinalPassFogFS.h" + +bool GLRenderer3D::BuildRenderShader(bool wbuffer) { - char shadername[32]; - snprintf(shadername, sizeof(shadername), "RenderShader%02X", flags); + std::string wbufdef = "#define WBuffer\n"; - int headerlen = strlen(kShaderHeader); + char shadername[32]; + snprintf(shadername, sizeof(shadername), "RenderShader%c", wbuffer?'W':'Z'); - std::string vsbuf; - vsbuf += kShaderHeader; - vsbuf += kRenderVSCommon; - vsbuf += vs; + std::string vsbuf = k3DRenderVS; + if (wbuffer) + { + auto pos = vsbuf.find('\n') + 1; + vsbuf = vsbuf.substr(0, pos) + wbufdef + vsbuf.substr(pos); + } - std::string fsbuf; - fsbuf += kShaderHeader; - fsbuf += kRenderFSCommon; - fsbuf += fs; + std::string fsbuf = k3DRenderFS; + if (wbuffer) + { + auto pos = fsbuf.find('\n') + 1; + fsbuf = fsbuf.substr(0, pos) + wbufdef + fsbuf.substr(pos); + } GLuint prog; bool ret = OpenGL::CompileVertexFragmentProgram(prog, @@ -59,21 +72,26 @@ bool GLRenderer::BuildRenderShader(u32 flags, const std::string& vs, const std:: glUseProgram(prog); - uni_id = glGetUniformLocation(prog, "TexMem"); + uni_id = glGetUniformLocation(prog, "CurTexture"); glUniform1i(uni_id, 0); - uni_id = glGetUniformLocation(prog, "TexPalMem"); + uni_id = glGetUniformLocation(prog, "Capture128Texture"); glUniform1i(uni_id, 1); + uni_id = glGetUniformLocation(prog, "Capture256Texture"); + glUniform1i(uni_id, 2); - RenderShader[flags] = prog; + RenderShader[(int)wbuffer] = prog; return true; } -void GLRenderer::UseRenderShader(u32 flags) +void GLRenderer3D::UseRenderShader(bool wbuffer) { + int flags = (int)wbuffer; if (CurShaderID == flags) return; glUseProgram(RenderShader[flags]); CurShaderID = flags; + + RenderModeULoc = glGetUniformLocation(RenderShader[flags], "uRenderMode"); } void SetupDefaultTexParams(GLuint tex) @@ -85,27 +103,23 @@ void SetupDefaultTexParams(GLuint tex) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); } -GLRenderer::GLRenderer(GLCompositor&& compositor) noexcept : - Renderer3D(true), - CurGLCompositor(std::move(compositor)) +GLRenderer3D::GLRenderer3D(melonDS::GPU3D& gpu3D, GLRenderer& parent) noexcept : + Renderer3D(gpu3D), Parent(parent), Texcache(gpu3D.GPU, TexcacheOpenGLLoader(false)) { - // GLRenderer::New() will be used to actually initialize the renderer; + ClearBitmap[0] = new u32[256*256]; + ClearBitmap[1] = new u32[256*256]; + + ScaleFactor = 0; + BetterPolygons = false; + + // GLRenderer3D::Init() will be used to actually initialize the renderer; // The various glDelete* functions silently ignore invalid IDs, // so we can just let the destructor clean up a half-initialized renderer. } -std::unique_ptr GLRenderer::New() noexcept +bool GLRenderer3D::Init() { - assert(glEnable != nullptr); - - std::optional compositor = GLCompositor::New(); - if (!compositor) - return nullptr; - - // Will be returned if the initialization succeeds, - // or cleaned up via RAII if it fails. - std::unique_ptr result = std::unique_ptr(new GLRenderer(std::move(*compositor))); - compositor = std::nullopt; + GLint uni_id; glEnable(GL_DEPTH_TEST); glEnable(GL_STENCIL_TEST); @@ -113,83 +127,81 @@ std::unique_ptr GLRenderer::New() noexcept glDepthRange(0, 1); glClearDepth(1.0); - if (!OpenGL::CompileVertexFragmentProgram(result->ClearShaderPlain, - kClearVS, kClearFS, - "ClearShader", + if (!OpenGL::CompileVertexFragmentProgram(ClearShaderPlain, + k3DClearVS, k3DClearFS, + "ClearShaderPlain", {{"vPosition", 0}}, {{"oColor", 0}, {"oAttr", 1}})) - return nullptr; + return false; - result->ClearUniformLoc[0] = glGetUniformLocation(result->ClearShaderPlain, "uColor"); - result->ClearUniformLoc[1] = glGetUniformLocation(result->ClearShaderPlain, "uDepth"); - result->ClearUniformLoc[2] = glGetUniformLocation(result->ClearShaderPlain, "uOpaquePolyID"); - result->ClearUniformLoc[3] = glGetUniformLocation(result->ClearShaderPlain, "uFogFlag"); + ClearUniformLoc[0] = glGetUniformLocation(ClearShaderPlain, "uColor"); + ClearUniformLoc[1] = glGetUniformLocation(ClearShaderPlain, "uDepth"); + ClearUniformLoc[2] = glGetUniformLocation(ClearShaderPlain, "uOpaquePolyID"); + ClearUniformLoc[3] = glGetUniformLocation(ClearShaderPlain, "uFogFlag"); - memset(result->RenderShader, 0, sizeof(RenderShader)); + if (!OpenGL::CompileVertexFragmentProgram(ClearShaderBitmap, + k3DClearBitmapVS, k3DClearBitmapFS, + "ClearShaderBitmap", + {{"vPosition", 0}}, + {{"oColor", 0}, {"oAttr", 1}})) + return false; - if (!result->BuildRenderShader(0, kRenderVS_Z, kRenderFS_ZO)) - return nullptr; + ClearBitmapULoc[0] = glGetUniformLocation(ClearShaderBitmap, "uClearBitmapOffset"); + ClearBitmapULoc[1] = glGetUniformLocation(ClearShaderBitmap, "uOpaquePolyID"); - if (!result->BuildRenderShader(RenderFlag_WBuffer, kRenderVS_W, kRenderFS_WO)) - return nullptr; - - if (!result->BuildRenderShader(RenderFlag_Edge, kRenderVS_Z, kRenderFS_ZE)) - return nullptr; - - if (!result->BuildRenderShader(RenderFlag_Edge | RenderFlag_WBuffer, kRenderVS_W, kRenderFS_WE)) - return nullptr; - - if (!result->BuildRenderShader(RenderFlag_Trans, kRenderVS_Z, kRenderFS_ZT)) - return nullptr; + glUseProgram(ClearShaderBitmap); + uni_id = glGetUniformLocation(ClearShaderBitmap, "ClearBitmapColor"); + glUniform1i(uni_id, 0); + uni_id = glGetUniformLocation(ClearShaderBitmap, "ClearBitmapDepth"); + glUniform1i(uni_id, 1); - if (!result->BuildRenderShader(RenderFlag_Trans | RenderFlag_WBuffer, kRenderVS_W, kRenderFS_WT)) - return nullptr; + memset(RenderShader, 0, sizeof(RenderShader)); - if (!result->BuildRenderShader(RenderFlag_ShadowMask, kRenderVS_Z, kRenderFS_ZSM)) - return nullptr; + if (!BuildRenderShader(false)) + return false; - if (!result->BuildRenderShader(RenderFlag_ShadowMask | RenderFlag_WBuffer, kRenderVS_W, kRenderFS_WSM)) - return nullptr; + if (!BuildRenderShader(true)) + return false; - if (!OpenGL::CompileVertexFragmentProgram(result->FinalPassEdgeShader, - kFinalPassVS, kFinalPassEdgeFS, + if (!OpenGL::CompileVertexFragmentProgram(FinalPassEdgeShader, + k3DFinalPassVS, k3DFinalPassEdgeFS, "FinalPassEdgeShader", {{"vPosition", 0}}, {{"oColor", 0}})) - return nullptr; - if (!OpenGL::CompileVertexFragmentProgram(result->FinalPassFogShader, - kFinalPassVS, kFinalPassFogFS, + return false; + if (!OpenGL::CompileVertexFragmentProgram(FinalPassFogShader, + k3DFinalPassVS, k3DFinalPassFogFS, "FinalPassFogShader", {{"vPosition", 0}}, {{"oColor", 0}})) - return nullptr; + return false; - GLuint uni_id = glGetUniformBlockIndex(result->FinalPassEdgeShader, "uConfig"); - glUniformBlockBinding(result->FinalPassEdgeShader, uni_id, 0); + uni_id = glGetUniformBlockIndex(FinalPassEdgeShader, "uConfig"); + glUniformBlockBinding(FinalPassEdgeShader, uni_id, 0); - glUseProgram(result->FinalPassEdgeShader); - uni_id = glGetUniformLocation(result->FinalPassEdgeShader, "DepthBuffer"); + glUseProgram(FinalPassEdgeShader); + uni_id = glGetUniformLocation(FinalPassEdgeShader, "DepthBuffer"); glUniform1i(uni_id, 0); - uni_id = glGetUniformLocation(result->FinalPassEdgeShader, "AttrBuffer"); + uni_id = glGetUniformLocation(FinalPassEdgeShader, "AttrBuffer"); glUniform1i(uni_id, 1); - uni_id = glGetUniformBlockIndex(result->FinalPassFogShader, "uConfig"); - glUniformBlockBinding(result->FinalPassFogShader, uni_id, 0); + uni_id = glGetUniformBlockIndex(FinalPassFogShader, "uConfig"); + glUniformBlockBinding(FinalPassFogShader, uni_id, 0); - glUseProgram(result->FinalPassFogShader); - uni_id = glGetUniformLocation(result->FinalPassFogShader, "DepthBuffer"); + glUseProgram(FinalPassFogShader); + uni_id = glGetUniformLocation(FinalPassFogShader, "DepthBuffer"); glUniform1i(uni_id, 0); - uni_id = glGetUniformLocation(result->FinalPassFogShader, "AttrBuffer"); + uni_id = glGetUniformLocation(FinalPassFogShader, "AttrBuffer"); glUniform1i(uni_id, 1); - memset(&result->ShaderConfig, 0, sizeof(ShaderConfig)); + memset(&ShaderConfig, 0, sizeof(ShaderConfig)); - glGenBuffers(1, &result->ShaderConfigUBO); - glBindBuffer(GL_UNIFORM_BUFFER, result->ShaderConfigUBO); + glGenBuffers(1, &ShaderConfigUBO); + glBindBuffer(GL_UNIFORM_BUFFER, ShaderConfigUBO); static_assert((sizeof(ShaderConfig) & 15) == 0); - glBufferData(GL_UNIFORM_BUFFER, sizeof(ShaderConfig), &result->ShaderConfig, GL_STATIC_DRAW); - glBindBufferBase(GL_UNIFORM_BUFFER, 0, result->ShaderConfigUBO); + glBufferData(GL_UNIFORM_BUFFER, sizeof(ShaderConfig), &ShaderConfig, GL_STATIC_DRAW); + glBindBufferBase(GL_UNIFORM_BUFFER, 0, ShaderConfigUBO); float clearvtx[6*2] = @@ -203,22 +215,39 @@ std::unique_ptr GLRenderer::New() noexcept 1.0, 1.0 }; - glGenBuffers(1, &result->ClearVertexBufferID); - glBindBuffer(GL_ARRAY_BUFFER, result->ClearVertexBufferID); + glGenBuffers(1, &ClearVertexBufferID); + glBindBuffer(GL_ARRAY_BUFFER, ClearVertexBufferID); glBufferData(GL_ARRAY_BUFFER, sizeof(clearvtx), clearvtx, GL_STATIC_DRAW); - glGenVertexArrays(1, &result->ClearVertexArrayID); - glBindVertexArray(result->ClearVertexArrayID); + glGenVertexArrays(1, &ClearVertexArrayID); + glBindVertexArray(ClearVertexArrayID); glEnableVertexAttribArray(0); // position glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, (void*)(0)); + // init textures for the clear bitmap + glGenTextures(2, ClearBitmapTex); - glGenBuffers(1, &result->VertexBufferID); - glBindBuffer(GL_ARRAY_BUFFER, result->VertexBufferID); + glBindTexture(GL_TEXTURE_2D, ClearBitmapTex[0]); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8UI, 256, 256, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, nullptr); + + glBindTexture(GL_TEXTURE_2D, ClearBitmapTex[1]); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, 256, 256, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr); + + + glGenBuffers(1, &VertexBufferID); + glBindBuffer(GL_ARRAY_BUFFER, VertexBufferID); glBufferData(GL_ARRAY_BUFFER, sizeof(VertexBuffer), nullptr, GL_DYNAMIC_DRAW); - glGenVertexArrays(1, &result->VertexArrayID); - glBindVertexArray(result->VertexArrayID); + glGenVertexArrays(1, &VertexArrayID); + glBindVertexArray(VertexArrayID); glEnableVertexAttribArray(0); // position glVertexAttribIPointer(0, 4, GL_UNSIGNED_SHORT, 7*4, (void*)(0)); glEnableVertexAttribArray(1); // color @@ -228,112 +257,89 @@ std::unique_ptr GLRenderer::New() noexcept glEnableVertexAttribArray(3); // attrib glVertexAttribIPointer(3, 3, GL_UNSIGNED_INT, 7*4, (void*)(4*4)); - glGenBuffers(1, &result->IndexBufferID); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, result->IndexBufferID); + glGenBuffers(1, &IndexBufferID); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, IndexBufferID); glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(IndexBuffer), nullptr, GL_DYNAMIC_DRAW); - glGenFramebuffers(1, &result->MainFramebuffer); - glGenFramebuffers(1, &result->DownscaleFramebuffer); + glGenFramebuffers(1, &MainFramebuffer); // color buffers - glGenTextures(1, &result->ColorBufferTex); - SetupDefaultTexParams(result->ColorBufferTex); + glGenTextures(1, &ColorBufferTex); + SetupDefaultTexParams(ColorBufferTex); // depth/stencil buffer - glGenTextures(1, &result->DepthBufferTex); - SetupDefaultTexParams(result->DepthBufferTex); + glGenTextures(1, &DepthBufferTex); + SetupDefaultTexParams(DepthBufferTex); // attribute buffer // R: opaque polyID (for edgemarking) // G: edge flag // B: fog flag - glGenTextures(1, &result->AttrBufferTex); - SetupDefaultTexParams(result->AttrBufferTex); + glGenTextures(1, &AttrBufferTex); + SetupDefaultTexParams(AttrBufferTex); - // downscale framebuffer for display capture (always 256x192) - glGenTextures(1, &result->DownScaleBufferTex); - SetupDefaultTexParams(result->DownScaleBufferTex); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 256, 192, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + Parent.OutputTex3D = ColorBufferTex; glEnable(GL_BLEND); glBlendEquationSeparate(GL_FUNC_ADD, GL_MAX); - glGenBuffers(1, &result->PixelbufferID); - - glActiveTexture(GL_TEXTURE0); - glGenTextures(1, &result->TexMemID); - glBindTexture(GL_TEXTURE_2D, result->TexMemID); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_2D, 0, GL_R8UI, 1024, 512, 0, GL_RED_INTEGER, GL_UNSIGNED_BYTE, NULL); - - glActiveTexture(GL_TEXTURE1); - glGenTextures(1, &result->TexPalMemID); - glBindTexture(GL_TEXTURE_2D, result->TexPalMemID); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB5_A1, 1024, 48, 0, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, NULL); - glBindFramebuffer(GL_FRAMEBUFFER, 0); - return result; + return true; } -GLRenderer::~GLRenderer() +GLRenderer3D::~GLRenderer3D() { assert(glDeleteTextures != nullptr); - glDeleteTextures(1, &TexMemID); - glDeleteTextures(1, &TexPalMemID); + Texcache.Reset(); glDeleteFramebuffers(1, &MainFramebuffer); - glDeleteFramebuffers(1, &DownscaleFramebuffer); glDeleteTextures(1, &ColorBufferTex); glDeleteTextures(1, &DepthBufferTex); glDeleteTextures(1, &AttrBufferTex); - glDeleteTextures(1, &DownScaleBufferTex); glDeleteVertexArrays(1, &VertexArrayID); glDeleteBuffers(1, &VertexBufferID); glDeleteVertexArrays(1, &ClearVertexArrayID); glDeleteBuffers(1, &ClearVertexBufferID); + glDeleteTextures(2, ClearBitmapTex); + delete[] ClearBitmap[0]; + delete[] ClearBitmap[1]; glDeleteBuffers(1, &ShaderConfigUBO); - for (int i = 0; i < 16; i++) + for (int i = 0; i < 2; i++) { if (!RenderShader[i]) continue; glDeleteProgram(RenderShader[i]); } } -void GLRenderer::Reset(GPU& gpu) +void GLRenderer3D::Reset() { - // This is where the compositor's Reset() method would be called, - // except there's no such method right now. + Texcache.Reset(); + ClearBitmapDirty = 0x3; } -void GLRenderer::SetBetterPolygons(bool betterpolygons) noexcept +void GLRenderer3D::SetBetterPolygons(bool betterpolygons) noexcept { - SetRenderSettings(betterpolygons, ScaleFactor); + SetRenderSettings(ScaleFactor, betterpolygons); } -void GLRenderer::SetScaleFactor(int scale) noexcept +void GLRenderer3D::SetScaleFactor(int scale) noexcept { - SetRenderSettings(BetterPolygons, scale); + SetRenderSettings(scale, BetterPolygons); } -void GLRenderer::SetRenderSettings(bool betterpolygons, int scale) noexcept +void GLRenderer3D::SetRenderSettings(int scale, bool betterpolygons) noexcept { if (betterpolygons == BetterPolygons && scale == ScaleFactor) return; - CurGLCompositor.SetScaleFactor(scale); + // TODO set it for 2D renderer + //CurGLCompositor.SetScaleFactor(scale); ScaleFactor = scale; BetterPolygons = betterpolygons; @@ -348,9 +354,6 @@ void GLRenderer::SetRenderSettings(bool betterpolygons, int scale) noexcept glBindTexture(GL_TEXTURE_2D, AttrBufferTex); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, ScreenW, ScreenH, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL); - glBindFramebuffer(GL_FRAMEBUFFER, DownscaleFramebuffer); - glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, DownScaleBufferTex, 0); - GLenum fbassign[2] = {GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1}; glBindFramebuffer(GL_FRAMEBUFFER, MainFramebuffer); @@ -359,9 +362,6 @@ void GLRenderer::SetRenderSettings(bool betterpolygons, int scale) noexcept glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, AttrBufferTex, 0); glDrawBuffers(2, fbassign); - glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelbufferID); - glBufferData(GL_PIXEL_PACK_BUFFER, 256*192*4, NULL, GL_DYNAMIC_READ); - glBindFramebuffer(GL_FRAMEBUFFER, 0); //glLineWidth(scale); @@ -369,7 +369,7 @@ void GLRenderer::SetRenderSettings(bool betterpolygons, int scale) noexcept } -void GLRenderer::SetupPolygon(GLRenderer::RendererPolygon* rp, Polygon* polygon) const +void GLRenderer3D::SetupPolygon(GLRenderer3D::RendererPolygon* rp, Polygon* polygon) const { rp->PolyData = polygon; @@ -413,9 +413,14 @@ void GLRenderer::SetupPolygon(GLRenderer::RendererPolygon* rp, Polygon* polygon) { rp->RenderKey |= 0x30000; } + + u32 textype = (polygon->TexParam >> 26) & 0x7; + u32 texattr = (polygon->TexParam >> 16) & 0x3FF; + if (TexEnable && (textype != 0)) + rp->RenderKey |= (0x80000 | (texattr << 20)); } -u32* GLRenderer::SetupVertex(const Polygon* poly, int vid, const Vertex* vtx, u32 vtxattr, u32* vptr) const +u32* GLRenderer3D::SetupVertex(const Polygon* poly, int vid, const Vertex* vtx, u32 vtxattr, u32 texlayer, u32* vptr) const { u32 z = poly->FinalZ[vid]; u32 w = poly->FinalW[vid]; @@ -470,13 +475,13 @@ u32* GLRenderer::SetupVertex(const Polygon* poly, int vid, const Vertex* vtx, u3 *vptr++ = (u16)vtx->TexCoords[0] | ((u16)vtx->TexCoords[1] << 16); *vptr++ = vtxattr | (zshift << 16); - *vptr++ = poly->TexParam; - *vptr++ = poly->TexPalette; + *vptr++ = texlayer; + *vptr++ = TextureWidth(poly->TexParam) | (TextureHeight(poly->TexParam) << 16); return vptr; } -void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys) +void GLRenderer3D::BuildPolygons(GLRenderer3D::RendererPolygon* polygons, int npolys, int captureinfo[16]) { u32* vptr = &VertexBuffer[0]; u32 vidx = 0; @@ -484,6 +489,11 @@ void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys u32 iidx = 0; u32 eidx = EdgeIndicesOffset; + u32 curtexparam = 0; + u32 curtexpal = 0; + GLuint curtexid = 0; + u32 curtexlayer = (u32)-1; + for (int i = 0; i < npolys; i++) { RendererPolygon* rp = &polygons[i]; @@ -495,6 +505,8 @@ void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys u32 vidx_first = vidx; u32 polyattr = poly->Attr; + u32 texparam = poly->TexParam & ~0xC00F0000; + u32 texpal = poly->TexPalette; u32 alpha = (polyattr >> 16) & 0x1F; @@ -502,6 +514,70 @@ void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys if (poly->FacingView) vtxattr |= (1<<8); if (poly->WBuffer) vtxattr |= (1<<9); + if ((texparam != curtexparam) || (texpal != curtexpal)) + { + u32 textype = (texparam >> 26) & 0x7; + if (TexEnable && (textype != 0)) + { + // figure out which texture this polygon is going to use + + u32 texaddr = texparam & 0xFFFF; + u32 texwidth = TextureWidth(texparam); + u32 texheight = TextureHeight(texparam); + int capblock = -1; + if ((textype == 7) && ((texwidth == 128) || (texwidth == 256))) + { + // if this is a direct color texture, and the width is 128 or 256 + // then it might be a display capture + u32 startaddr = texaddr << 3; + u32 endaddr = startaddr + (texheight * texwidth * 2); + + startaddr >>= 15; + endaddr = (endaddr + 0x7FFF) >> 15; + + for (u32 b = startaddr; b < endaddr; b++) + { + int blk = captureinfo[b]; + if (blk == -1) continue; + + capblock = blk; + } + } + + if (capblock != -1) + { + if (texwidth == 128) + { + curtexid = -1; + curtexlayer = capblock | (((texaddr >> 5) & 0x7F) << 20); + } + else + { + curtexid = -2; + curtexlayer = (capblock >> 2) | (((texaddr >> 6) & 0xFF) << 20); + } + } + else + { + u32* halp; + Texcache.GetTexture(texparam, texpal, curtexid, curtexlayer, halp); + curtexlayer |= 0xFFFF0000; + } + } + else + { + // no texture + curtexid = 0; + curtexlayer = (u32)-1; + } + + curtexparam = texparam; + curtexpal = texpal; + } + + rp->TexID = curtexid; + rp->TexRepeat = (poly->TexParam >> 16) & 0xF; + // assemble vertices if (poly->Type == 1) // line { @@ -522,7 +598,7 @@ void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys lastx = vtx->FinalPosition[0]; lasty = vtx->FinalPosition[1]; - vptr = SetupVertex(poly, j, vtx, vtxattr, vptr); + vptr = SetupVertex(poly, j, vtx, vtxattr, curtexlayer, vptr); IndexBuffer[iidx++] = vidx; rp->NumIndices++; @@ -540,7 +616,7 @@ void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys { Vertex* vtx = poly->Vertices[j]; - vptr = SetupVertex(poly, j, vtx, vtxattr, vptr); + vptr = SetupVertex(poly, j, vtx, vtxattr, curtexlayer, vptr); vidx++; } @@ -562,7 +638,7 @@ void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys { Vertex* vtx = poly->Vertices[j]; - vptr = SetupVertex(poly, j, vtx, vtxattr, vptr); + vptr = SetupVertex(poly, j, vtx, vtxattr, curtexlayer, vptr); if (j >= 2) { @@ -646,8 +722,8 @@ void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys *vptr++ = (u16)cS | ((u16)cT << 16); *vptr++ = vtxattr | (zshift << 16); - *vptr++ = poly->TexParam; - *vptr++ = poly->TexPalette; + *vptr++ = curtexlayer; + *vptr++ = TextureWidth(texparam) | (TextureHeight(texparam) << 16); vidx++; @@ -656,7 +732,7 @@ void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys { Vertex* vtx = poly->Vertices[j]; - vptr = SetupVertex(poly, j, vtx, vtxattr, vptr); + vptr = SetupVertex(poly, j, vtx, vtxattr, curtexlayer, vptr); if (j >= 1) { @@ -698,20 +774,56 @@ void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys NumEdgeIndices = eidx - EdgeIndicesOffset; } -int GLRenderer::RenderSinglePolygon(int i) const +void GLRenderer3D::SetupPolygonTexture(const RendererPolygon* poly) const +{ + bool iscap = (poly->TexID == (GLuint)-1 || poly->TexID == (GLuint)-2); + + if (iscap) + { + if (poly->TexID == (GLuint)-1) + glActiveTexture(GL_TEXTURE1); + else + glActiveTexture(GL_TEXTURE2); + } + else + { + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D_ARRAY, poly->TexID); + } + + GLint repeatS, repeatT; + + if (poly->TexRepeat & (1<<0)) + repeatS = (poly->TexRepeat & (1<<2)) ? GL_MIRRORED_REPEAT : GL_REPEAT; + else + repeatS = GL_CLAMP_TO_EDGE; + + if (poly->TexRepeat & (1<<1)) + repeatT = (poly->TexRepeat & (1<<3)) ? GL_MIRRORED_REPEAT : GL_REPEAT; + else + repeatT = GL_CLAMP_TO_EDGE; + + glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_S, repeatS); + glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_T, repeatT); +} + +int GLRenderer3D::RenderSinglePolygon(int i) const { const RendererPolygon* rp = &PolygonList[i]; + SetupPolygonTexture(rp); glDrawElements(rp->PrimType, rp->NumIndices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->IndicesOffset * 2)); return 1; } -int GLRenderer::RenderPolygonBatch(int i) const +int GLRenderer3D::RenderPolygonBatch(int i) const { const RendererPolygon* rp = &PolygonList[i]; GLuint primtype = rp->PrimType; - u32 key = rp->RenderKey; + u32 renderkey = rp->RenderKey; + GLuint texid = rp->TexID; + u32 texrepeat = rp->TexRepeat; int numpolys = 0; u32 numindices = 0; @@ -719,52 +831,74 @@ int GLRenderer::RenderPolygonBatch(int i) const { const RendererPolygon* cur_rp = &PolygonList[iend]; if (cur_rp->PrimType != primtype) break; - if (cur_rp->RenderKey != key) break; + if (cur_rp->RenderKey != renderkey) break; + if (cur_rp->TexID != texid) break; + if (cur_rp->TexRepeat != texrepeat) break; numpolys++; numindices += cur_rp->NumIndices; } + SetupPolygonTexture(rp); glDrawElements(primtype, numindices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->IndicesOffset * 2)); return numpolys; } -int GLRenderer::RenderPolygonEdgeBatch(int i) const +int GLRenderer3D::RenderPolygonEdgeBatch(int i) const { const RendererPolygon* rp = &PolygonList[i]; - u32 key = rp->RenderKey; + u32 renderkey = rp->RenderKey; + GLuint texid = rp->TexID; + u32 texrepeat = rp->TexRepeat; int numpolys = 0; u32 numindices = 0; for (int iend = i; iend < NumFinalPolys; iend++) { const RendererPolygon* cur_rp = &PolygonList[iend]; - if (cur_rp->RenderKey != key) break; + if (cur_rp->RenderKey != renderkey) break; + if (cur_rp->TexID != texid) break; + if (cur_rp->TexRepeat != texrepeat) break; numpolys++; numindices += cur_rp->NumEdgeIndices; } + SetupPolygonTexture(rp); glDrawElements(GL_LINES, numindices, GL_UNSIGNED_SHORT, (void*)(uintptr_t)(rp->EdgeIndicesOffset * 2)); return numpolys; } -void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h) +void GLRenderer3D::RenderSceneChunk(int y, int h) { - u32 flags = 0; - if (gpu3d.RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer; + bool flags = GPU3D.RenderPolygonRAM[0]->WBuffer; + UseRenderShader(flags); - if (h != 192) glScissor(0, y<PolyData->IsShadow) { // shadow against clear-plane will only pass if its polyID matches that of the clear plane - u32 clrpolyid = (gpu3d.RenderClearAttr1 >> 24) & 0x3F; + u32 clrpolyid = (GPU3D.RenderClearAttr1 >> 24) & 0x3F; if (polyid != clrpolyid) { i++; continue; } glEnable(GL_BLEND); glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glColorMaski(1, GL_FALSE, GL_FALSE, transfog, GL_FALSE); + // draw where shadow mask has previously been rendered (stencil=0xFE) + // when passing, set it to (polyID | 0x40) + // TODO might break bit0 of polyID glStencilFunc(GL_EQUAL, 0xFE, 0xFF); glStencilOp(GL_KEEP, GL_KEEP, GL_INVERT); glStencilMask(~(0x40|polyid)); // heheh @@ -925,6 +1072,8 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h) glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glColorMaski(1, GL_FALSE, GL_FALSE, transfog, GL_FALSE); + // draw on either background (0xFF) or shadowmask (0xFE) + // when passing, set it to (polyID | 0x40) glStencilFunc(GL_EQUAL, 0xFF, 0xFE); glStencilOp(GL_KEEP, GL_KEEP, GL_INVERT); glStencilMask(~(0x40|polyid)); // heheh @@ -958,13 +1107,14 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h) // draw actual shadow mask - UseRenderShader(flags | RenderFlag_ShadowMask); + glUniform1i(RenderModeULoc, RenderMode_ShadowMask); glDisable(GL_BLEND); glColorMaski(0, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); glColorMaski(1, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); glDepthMask(GL_FALSE); + // set stencil bit7 where the shadowmask z-fails glDepthFunc(GL_LESS); glStencilFunc(GL_ALWAYS, 0x80, 0x80); glStencilOp(GL_KEEP, GL_REPLACE, GL_KEEP); @@ -985,12 +1135,13 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h) if (needopaque) { - UseRenderShader(flags); + glUniform1i(RenderModeULoc, RenderMode_Opaque); glDisable(GL_BLEND); glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glColorMaski(1, GL_TRUE, GL_TRUE, fogenable, GL_FALSE); + // set stencil to polyID glStencilFunc(GL_ALWAYS, polyid, 0xFF); glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); glStencilMask(0xFF); @@ -1000,7 +1151,7 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h) RenderSinglePolygon(i); } - UseRenderShader(flags | RenderFlag_Trans); + glUniform1i(RenderModeULoc, RenderMode_Translucent); GLboolean transfog; if (!(polyattr & (1<<15))) transfog = fogenable; @@ -1012,6 +1163,9 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h) glColorMaski(0, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); glColorMaski(1, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); glDepthMask(GL_FALSE); + + // render where polyID matches (ignoring other bits) + // clear bit7 where it passes glStencilFunc(GL_EQUAL, polyid, 0x3F); glStencilOp(GL_KEEP, GL_KEEP, GL_ZERO); glStencilMask(0x80); @@ -1022,6 +1176,8 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h) glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glColorMaski(1, GL_FALSE, GL_FALSE, transfog, GL_FALSE); + // render where bit7 is set (ie. shadow mask) + // set bit6 and replace polyID glStencilFunc(GL_EQUAL, 0xC0|polyid, 0x80); glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); glStencilMask(0x7F); @@ -1037,6 +1193,8 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h) glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glColorMaski(1, GL_FALSE, GL_FALSE, transfog, GL_FALSE); + // render where polyID and bit6 do not match + // set bit6 and set polyID glStencilFunc(GL_NOTEQUAL, 0x40|polyid, 0x7F); glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); glStencilMask(0x7F); @@ -1052,7 +1210,7 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h) } } - if (gpu3d.RenderDispCnt & 0x00A0) // fog/edge enabled + if (GPU3D.RenderDispCnt & 0x00A0) // fog/edge enabled { glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glColorMaski(1, GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); @@ -1074,7 +1232,7 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h) glBindBuffer(GL_ARRAY_BUFFER, ClearVertexBufferID); glBindVertexArray(ClearVertexArrayID); - if (gpu3d.RenderDispCnt & (1<<5)) + if (GPU3D.RenderDispCnt & (1<<5)) { // edge marking // TODO: depth/polyid values at screen edges @@ -1086,25 +1244,25 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h) glDrawArrays(GL_TRIANGLES, 0, 2*3); } - if (gpu3d.RenderDispCnt & (1<<7)) + if (GPU3D.RenderDispCnt & (1<<7)) { // fog glUseProgram(FinalPassFogShader); - if (gpu3d.RenderDispCnt & (1<<6)) + if (GPU3D.RenderDispCnt & (1<<6)) glBlendFuncSeparate(GL_ZERO, GL_ONE, GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_ALPHA); else glBlendFuncSeparate(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_ALPHA, GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_ALPHA); { - u32 c = gpu3d.RenderFogColor; + u32 c = GPU3D.RenderFogColor; u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; u32 a = (c >> 16) & 0x1F; - glBlendColor((float)b/31.0, (float)g/31.0, (float)r/31.0, (float)a/31.0); + glBlendColor((float)r/31.0, (float)g/31.0, (float)b/31.0, (float)a/31.0); } glDrawArrays(GL_TRIANGLES, 0, 2*3); @@ -1113,8 +1271,61 @@ void GLRenderer::RenderSceneChunk(const GPU3D& gpu3d, int y, int h) } -void GLRenderer::RenderFrame(GPU& gpu) +void GLRenderer3D::RenderFrame() { + u8 clrBitmapDirty; + if (!Texcache.Update(clrBitmapDirty) && GPU3D.RenderFrameIdentical) + { + return; + } + + // figure out which chunks of texture memory contain display captures + int captureinfo[16]; + GPU.GetCaptureInfo_Texture(captureinfo); + + // if we're using a clear bitmap, set that up + ClearBitmapDirty |= clrBitmapDirty; + if (GPU3D.RenderDispCnt & (1<<14)) + { + if (ClearBitmapDirty & (1<<0)) + { + u16* vram = (u16*)&GPU.VRAMFlat_Texture[0x40000]; + for (int i = 0; i < 256*256; i++) + { + u16 color = vram[i]; + u32 r = (color << 1) & 0x3E; if (r) r++; + u32 g = (color >> 4) & 0x3E; if (g) g++; + u32 b = (color >> 9) & 0x3E; if (b) b++; + u32 a = (color & 0x8000) ? 31 : 0; + + ClearBitmap[0][i] = r | (g << 8) | (b << 16) | (a << 24); + } + + glBindTexture(GL_TEXTURE_2D, ClearBitmapTex[0]); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 256, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ClearBitmap[0]); + } + + if (ClearBitmapDirty & (1<<1)) + { + u16* vram = (u16*)&GPU.VRAMFlat_Texture[0x60000]; + for (int i = 0; i < 256*256; i++) + { + u16 val = vram[i]; + u32 depth = ((val & 0x7FFF) * 0x200) + 0x1FF; + u32 fog = (val & 0x8000) << 9; + + ClearBitmap[1][i] = depth | fog; + } + + glBindTexture(GL_TEXTURE_2D, ClearBitmapTex[1]); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 256, GL_RED_INTEGER, GL_UNSIGNED_INT, ClearBitmap[1]); + } + + ClearBitmapDirty = 0; + } + + TexEnable = !!(GPU3D.RenderDispCnt & (1<<0)); + CurShaderID = -1; glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); @@ -1122,11 +1333,11 @@ void GLRenderer::RenderFrame(GPU& gpu) ShaderConfig.uScreenSize[0] = ScreenW; ShaderConfig.uScreenSize[1] = ScreenH; - ShaderConfig.uDispCnt = gpu.GPU3D.RenderDispCnt; + ShaderConfig.uDispCnt = GPU3D.RenderDispCnt; for (int i = 0; i < 32; i++) { - u16 c = gpu.GPU3D.RenderToonTable[i]; + u16 c = GPU3D.RenderToonTable[i]; u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; @@ -1138,7 +1349,7 @@ void GLRenderer::RenderFrame(GPU& gpu) for (int i = 0; i < 8; i++) { - u16 c = gpu.GPU3D.RenderEdgeTable[i]; + u16 c = GPU3D.RenderEdgeTable[i]; u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; @@ -1149,7 +1360,7 @@ void GLRenderer::RenderFrame(GPU& gpu) } { - u32 c = gpu.GPU3D.RenderFogColor; + u32 c = GPU3D.RenderFogColor; u32 r = c & 0x1F; u32 g = (c >> 5) & 0x1F; u32 b = (c >> 10) & 0x1F; @@ -1163,50 +1374,18 @@ void GLRenderer::RenderFrame(GPU& gpu) for (int i = 0; i < 34; i++) { - u8 d = gpu.GPU3D.RenderFogDensityTable[i]; + u8 d = GPU3D.RenderFogDensityTable[i]; ShaderConfig.uFogDensity[i][0] = (float)d / 127.0; } - ShaderConfig.uFogOffset = gpu.GPU3D.RenderFogOffset; - ShaderConfig.uFogShift = gpu.GPU3D.RenderFogShift; + ShaderConfig.uFogOffset = GPU3D.RenderFogOffset; + ShaderConfig.uFogShift = GPU3D.RenderFogShift; glBindBuffer(GL_UNIFORM_BUFFER, ShaderConfigUBO); void* unibuf = glMapBuffer(GL_UNIFORM_BUFFER, GL_WRITE_ONLY); if (unibuf) memcpy(unibuf, &ShaderConfig, sizeof(ShaderConfig)); glUnmapBuffer(GL_UNIFORM_BUFFER); - // SUCKY!!!!!!!!!!!!!!!!!! - // TODO: detect when VRAM blocks are modified! - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, TexMemID); - for (int i = 0; i < 4; i++) - { - u32 mask = gpu.VRAMMap_Texture[i]; - u8* vram; - if (!mask) continue; - else if (mask & (1<<0)) vram = gpu.VRAM_A; - else if (mask & (1<<1)) vram = gpu.VRAM_B; - else if (mask & (1<<2)) vram = gpu.VRAM_C; - else if (mask & (1<<3)) vram = gpu.VRAM_D; - - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, i*128, 1024, 128, GL_RED_INTEGER, GL_UNSIGNED_BYTE, vram); - } - - glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_2D, TexPalMemID); - for (int i = 0; i < 6; i++) - { - // 6 x 16K chunks - u32 mask = gpu.VRAMMap_TexPal[i]; - u8* vram; - if (!mask) continue; - else if (mask & (1<<4)) vram = &gpu.VRAM_E[(i&3)*0x4000]; - else if (mask & (1<<5)) vram = gpu.VRAM_F; - else if (mask & (1<<6)) vram = gpu.VRAM_G; - - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, i*8, 1024, 8, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, vram); - } - glDisable(GL_SCISSOR_TEST); glEnable(GL_DEPTH_TEST); glEnable(GL_STENCIL_TEST); @@ -1219,24 +1398,46 @@ void GLRenderer::RenderFrame(GPU& gpu) glDepthMask(GL_TRUE); glStencilMask(0xFF); + glDepthFunc(GL_ALWAYS); + glStencilFunc(GL_ALWAYS, 0xFF, 0xFF); + glStencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE); + // clear buffers - // TODO: clear bitmap // TODO: check whether 'clear polygon ID' affects translucent polyID // (for example when alpha is 1..30) + if (GPU3D.RenderDispCnt & (1<<14)) { - glUseProgram(ClearShaderPlain); - glDepthFunc(GL_ALWAYS); + // clear bitmap + glUseProgram(ClearShaderBitmap); + + u32 polyid = (GPU3D.RenderClearAttr1 >> 24) & 0x3F; - u32 r = gpu.GPU3D.RenderClearAttr1 & 0x1F; - u32 g = (gpu.GPU3D.RenderClearAttr1 >> 5) & 0x1F; - u32 b = (gpu.GPU3D.RenderClearAttr1 >> 10) & 0x1F; - u32 fog = (gpu.GPU3D.RenderClearAttr1 >> 15) & 0x1; - u32 a = (gpu.GPU3D.RenderClearAttr1 >> 16) & 0x1F; - u32 polyid = (gpu.GPU3D.RenderClearAttr1 >> 24) & 0x3F; - u32 z = ((gpu.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; + float bitmapoffset[2]; + u8 xoff = (GPU3D.RenderClearAttr2 >> 16) & 0xFF; + u8 yoff = (GPU3D.RenderClearAttr2 >> 24) & 0xFF; + bitmapoffset[0] = (float)xoff / 256.0; + bitmapoffset[1] = (float)yoff / 256.0; - glStencilFunc(GL_ALWAYS, 0xFF, 0xFF); - glStencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE); + glUniform2f(ClearBitmapULoc[0], bitmapoffset[0], bitmapoffset[1]); + glUniform1ui(ClearBitmapULoc[1], polyid); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, ClearBitmapTex[0]); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, ClearBitmapTex[1]); + } + else + { + // plain clear plane + glUseProgram(ClearShaderPlain); + + u32 r = GPU3D.RenderClearAttr1 & 0x1F; + u32 g = (GPU3D.RenderClearAttr1 >> 5) & 0x1F; + u32 b = (GPU3D.RenderClearAttr1 >> 10) & 0x1F; + u32 fog = (GPU3D.RenderClearAttr1 >> 15) & 0x1; + u32 a = (GPU3D.RenderClearAttr1 >> 16) & 0x1F; + u32 polyid = (GPU3D.RenderClearAttr1 >> 24) & 0x3F; + u32 z = ((GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; /*if (r) r = r*2 + 1; if (g) g = g*2 + 1; @@ -1246,26 +1447,22 @@ void GLRenderer::RenderFrame(GPU& gpu) glUniform1ui(ClearUniformLoc[1], z); glUniform1ui(ClearUniformLoc[2], polyid); glUniform1ui(ClearUniformLoc[3], fog); - - glBindBuffer(GL_ARRAY_BUFFER, ClearVertexBufferID); - glBindVertexArray(ClearVertexArrayID); - glDrawArrays(GL_TRIANGLES, 0, 2*3); } - if (gpu.GPU3D.RenderNumPolygons) - { - // render shit here - u32 flags = 0; - if (gpu.GPU3D.RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer; + glBindBuffer(GL_ARRAY_BUFFER, ClearVertexBufferID); + glBindVertexArray(ClearVertexArrayID); + glDrawArrays(GL_TRIANGLES, 0, 2*3); + if (GPU3D.RenderNumPolygons) + { int npolys = 0; int firsttrans = -1; - for (u32 i = 0; i < gpu.GPU3D.RenderNumPolygons; i++) + for (u32 i = 0; i < GPU3D.RenderNumPolygons; i++) { - if (gpu.GPU3D.RenderPolygonRAM[i]->Degenerate) continue; + if (GPU3D.RenderPolygonRAM[i]->Degenerate) continue; - SetupPolygon(&PolygonList[npolys], gpu.GPU3D.RenderPolygonRAM[i]); - if (firsttrans < 0 && gpu.GPU3D.RenderPolygonRAM[i]->Translucent) + SetupPolygon(&PolygonList[npolys], GPU3D.RenderPolygonRAM[i]); + if (firsttrans < 0 && GPU3D.RenderPolygonRAM[i]->Translucent) firsttrans = npolys; npolys++; @@ -1273,7 +1470,7 @@ void GLRenderer::RenderFrame(GPU& gpu) NumFinalPolys = npolys; NumOpaqueFinalPolys = firsttrans; - BuildPolygons(&PolygonList[0], npolys); + BuildPolygons(&PolygonList[0], npolys, captureinfo); glBindBuffer(GL_ARRAY_BUFFER, VertexBufferID); glBufferSubData(GL_ARRAY_BUFFER, 0, NumVertices*7*4, VertexBuffer); @@ -1282,65 +1479,13 @@ void GLRenderer::RenderFrame(GPU& gpu) glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, NumIndices * 2, IndexBuffer); glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, EdgeIndicesOffset * 2, NumEdgeIndices * 2, IndexBuffer + EdgeIndicesOffset); - RenderSceneChunk(gpu.GPU3D, 0, 192); - } -} - -void GLRenderer::Stop(const GPU& gpu) -{ - CurGLCompositor.Stop(gpu); -} - -void GLRenderer::PrepareCaptureFrame() -{ - glBindFramebuffer(GL_READ_FRAMEBUFFER, MainFramebuffer); - glReadBuffer(GL_COLOR_ATTACHMENT0); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, DownscaleFramebuffer); - glDrawBuffer(GL_COLOR_ATTACHMENT0); - glBlitFramebuffer(0, 0, ScreenW, ScreenH, 0, 0, 256, 192, GL_COLOR_BUFFER_BIT, GL_NEAREST); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelbufferID); - glBindFramebuffer(GL_READ_FRAMEBUFFER, DownscaleFramebuffer); - glReadPixels(0, 0, 256, 192, GL_BGRA, GL_UNSIGNED_BYTE, NULL); -} - -void GLRenderer::Blit(const GPU& gpu) -{ - CurGLCompositor.RenderFrame(gpu, *this); -} - -void GLRenderer::BindOutputTexture(int buffer) -{ - CurGLCompositor.BindOutputTexture(buffer); -} - -u32* GLRenderer::GetLine(int line) -{ - int stride = 256; - - if (line == 0) - { - glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelbufferID); - u8* data = (u8*)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY); - if (data) memcpy(&Framebuffer[stride*0], data, 4*stride*192); - glUnmapBuffer(GL_PIXEL_PACK_BUFFER); - } - - u64* ptr = (u64*)&Framebuffer[stride * line]; - for (int i = 0; i < stride; i+=2) - { - u64 rgb = *ptr & 0x00FCFCFC00FCFCFC; - u64 a = *ptr & 0xF8000000F8000000; - - *ptr++ = (rgb >> 2) | (a >> 3); + RenderSceneChunk(0, 192); } - - return &Framebuffer[stride * line]; } -void GLRenderer::SetupAccelFrame() +u32* GLRenderer3D::GetLine(int line) { - glBindTexture(GL_TEXTURE_2D, ColorBufferTex); + return nullptr; } } diff --git a/src/GPU3D_OpenGL.h b/src/GPU3D_OpenGL.h index b7805223db..bf9d52e083 100644 --- a/src/GPU3D_OpenGL.h +++ b/src/GPU3D_OpenGL.h @@ -20,40 +20,33 @@ #ifdef OGLRENDERER_ENABLED #include "GPU3D.h" -#include "GPU_OpenGL.h" #include "OpenGLSupport.h" +#include "GPU3D_TexcacheOpenGL.h" +#include "NonStupidBitfield.h" namespace melonDS { -class GPU; +class GLRenderer; -class GLRenderer : public Renderer3D +class GLRenderer3D : public Renderer3D { public: - ~GLRenderer() override; - void Reset(GPU& gpu) override; + GLRenderer3D(melonDS::GPU3D& gpu3D, GLRenderer& parent) noexcept; + ~GLRenderer3D() override; + bool Init() override; + void Reset() override; - void SetRenderSettings(bool betterpolygons, int scale) noexcept; + void SetRenderSettings(int scale, bool betterpolygons) noexcept; void SetBetterPolygons(bool betterpolygons) noexcept; void SetScaleFactor(int scale) noexcept; [[nodiscard]] bool GetBetterPolygons() const noexcept { return BetterPolygons; } [[nodiscard]] int GetScaleFactor() const noexcept { return ScaleFactor; } - void VCount144(GPU& gpu) override {}; - void RenderFrame(GPU& gpu) override; - void Stop(const GPU& gpu) override; + void RenderFrame() override; u32* GetLine(int line) override; - void SetupAccelFrame() override; - void PrepareCaptureFrame() override; - void Blit(const GPU& gpu) override; - - void BindOutputTexture(int buffer) override; - - static std::unique_ptr New() noexcept; private: - // Used by New() - GLRenderer(GLCompositor&& compositor) noexcept; + GLRenderer& Parent; // GL version requirements // * texelFetch: 3.0 (GLSL 1.30) (3.2/1.50 for MS) @@ -71,33 +64,42 @@ class GLRenderer : public Renderer3D u32 EdgeIndicesOffset; u32 RenderKey; + + GLuint TexID; + u32 TexRepeat; }; - GLCompositor CurGLCompositor; + //GLCompositor CurGLCompositor; RendererPolygon PolygonList[2048] {}; - bool BuildRenderShader(u32 flags, const std::string& vs, const std::string& fs); - void UseRenderShader(u32 flags); + bool TexEnable; + TexcacheOpenGL Texcache; + + bool BuildRenderShader(bool wbuffer); + void UseRenderShader(bool wbuffer); void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const; - u32* SetupVertex(const Polygon* poly, int vid, const Vertex* vtx, u32 vtxattr, u32* vptr) const; - void BuildPolygons(RendererPolygon* polygons, int npolys); + u32* SetupVertex(const Polygon* poly, int vid, const Vertex* vtx, u32 vtxattr, u32 texlayer, u32* vptr) const; + void BuildPolygons(RendererPolygon* polygons, int npolys, int captureinfo[16]); + void SetupPolygonTexture(const RendererPolygon* poly) const; int RenderSinglePolygon(int i) const; int RenderPolygonBatch(int i) const; int RenderPolygonEdgeBatch(int i) const; - void RenderSceneChunk(const GPU3D& gpu3d, int y, int h); + void RenderSceneChunk(int y, int h); + enum { - RenderFlag_WBuffer = 0x01, - RenderFlag_Trans = 0x02, - RenderFlag_ShadowMask = 0x04, - RenderFlag_Edge = 0x08, + RenderMode_Opaque = 0, + RenderMode_Translucent, + RenderMode_ShadowMask, }; GLuint ClearShaderPlain {}; + GLuint ClearShaderBitmap {}; - GLuint RenderShader[16] {}; + GLuint RenderShader[2] {}; + GLint RenderModeULoc = 0; GLuint CurShaderID = -1; GLuint FinalPassEdgeShader {}; @@ -124,6 +126,11 @@ class GLRenderer : public Renderer3D GLuint ClearVertexBufferID = 0, ClearVertexArrayID {}; GLint ClearUniformLoc[4] {}; + GLint ClearBitmapULoc[2] {}; + GLuint ClearBitmapTex[2]; + u32* ClearBitmap[2]; + u8 ClearBitmapDirty; + // vertex buffer // * XYZW: 4x16bit // * RGBA: 4x8bit @@ -147,19 +154,13 @@ class GLRenderer : public Renderer3D const u32 EdgeIndicesOffset = 2048 * 30; - GLuint TexMemID {}; - GLuint TexPalMemID {}; - int ScaleFactor {}; bool BetterPolygons {}; int ScreenW {}, ScreenH {}; GLuint ColorBufferTex {}, DepthBufferTex {}, AttrBufferTex {}; - GLuint DownScaleBufferTex {}; - GLuint PixelbufferID {}; - GLuint MainFramebuffer {}, DownscaleFramebuffer {}; - u32 Framebuffer[256*192] {}; + GLuint MainFramebuffer {}; }; } #endif \ No newline at end of file diff --git a/src/GPU3D_OpenGL_shaders.h b/src/GPU3D_OpenGL_shaders.h deleted file mode 100644 index ab9985ce4e..0000000000 --- a/src/GPU3D_OpenGL_shaders.h +++ /dev/null @@ -1,808 +0,0 @@ -/* - Copyright 2016-2025 melonDS team - - This file is part of melonDS. - - melonDS is free software: you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - melonDS is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with melonDS. If not, see http://www.gnu.org/licenses/. -*/ - -#ifndef GPU3D_OPENGL_SHADERS_H -#define GPU3D_OPENGL_SHADERS_H - -#define kShaderHeader "#version 140" - -namespace melonDS -{ -const char* kClearVS = kShaderHeader R"( - -in vec2 vPosition; - -uniform uint uDepth; - -void main() -{ - float fdepth = (float(uDepth) / 8388608.0) - 1.0; - gl_Position = vec4(vPosition, fdepth, 1.0); -} -)"; - -const char* kClearFS = kShaderHeader R"( - -uniform uvec4 uColor; -uniform uint uOpaquePolyID; -uniform uint uFogFlag; - -out vec4 oColor; -out vec4 oAttr; - -void main() -{ - oColor = vec4(uColor).bgra / 31.0; - oAttr.r = float(uOpaquePolyID) / 63.0; - oAttr.g = 0; - oAttr.b = float(uFogFlag); - oAttr.a = 1; -} -)"; - - - -const char* kFinalPassVS = kShaderHeader R"( - -in vec2 vPosition; - -void main() -{ - // heh - gl_Position = vec4(vPosition, 0.0, 1.0); -} -)"; - -const char* kFinalPassEdgeFS = kShaderHeader R"( - -uniform sampler2D DepthBuffer; -uniform sampler2D AttrBuffer; - -layout(std140) uniform uConfig -{ - vec2 uScreenSize; - int uDispCnt; - vec4 uToonColors[32]; - vec4 uEdgeColors[8]; - vec4 uFogColor; - float uFogDensity[34]; - int uFogOffset; - int uFogShift; -}; - -out vec4 oColor; - -// make up for crapo zbuffer precision -bool isless(float a, float b) -{ - return a < b; - - // a < b - float diff = a - b; - return diff < (256.0 / 16777216.0); -} - -bool isgood(vec4 attr, float depth, int refPolyID, float refDepth) -{ - int polyid = int(attr.r * 63.0); - - if (polyid != refPolyID && isless(refDepth, depth)) - return true; - - return false; -} - -void main() -{ - ivec2 coord = ivec2(gl_FragCoord.xy); - int scale = 1;//int(uScreenSize.x / 256); - - vec4 ret = vec4(0,0,0,0); - vec4 depth = texelFetch(DepthBuffer, coord, 0); - vec4 attr = texelFetch(AttrBuffer, coord, 0); - - int polyid = int(attr.r * 63.0); - - if (attr.g != 0) - { - vec4 depthU = texelFetch(DepthBuffer, coord + ivec2(0,-scale), 0); - vec4 attrU = texelFetch(AttrBuffer, coord + ivec2(0,-scale), 0); - vec4 depthD = texelFetch(DepthBuffer, coord + ivec2(0,scale), 0); - vec4 attrD = texelFetch(AttrBuffer, coord + ivec2(0,scale), 0); - vec4 depthL = texelFetch(DepthBuffer, coord + ivec2(-scale,0), 0); - vec4 attrL = texelFetch(AttrBuffer, coord + ivec2(-scale,0), 0); - vec4 depthR = texelFetch(DepthBuffer, coord + ivec2(scale,0), 0); - vec4 attrR = texelFetch(AttrBuffer, coord + ivec2(scale,0), 0); - - if (isgood(attrU, depthU.r, polyid, depth.r) || - isgood(attrD, depthD.r, polyid, depth.r) || - isgood(attrL, depthL.r, polyid, depth.r) || - isgood(attrR, depthR.r, polyid, depth.r)) - { - // mark this pixel! - - ret.rgb = uEdgeColors[polyid >> 3].bgr; - - // this isn't quite accurate, but it will have to do - if ((uDispCnt & (1<<4)) != 0) - ret.a = 0.5; - else - ret.a = 1; - } - } - - oColor = ret; -} -)"; - -const char* kFinalPassFogFS = kShaderHeader R"( - -uniform sampler2D DepthBuffer; -uniform sampler2D AttrBuffer; - -layout(std140) uniform uConfig -{ - vec2 uScreenSize; - int uDispCnt; - vec4 uToonColors[32]; - vec4 uEdgeColors[8]; - vec4 uFogColor; - float uFogDensity[34]; - int uFogOffset; - int uFogShift; -}; - -out vec4 oColor; - -vec4 CalculateFog(float depth) -{ - int idepth = int(depth * 16777216.0); - int densityid, densityfrac; - - if (idepth < uFogOffset) - { - densityid = 0; - densityfrac = 0; - } - else - { - uint udepth = uint(idepth); - udepth -= uint(uFogOffset); - udepth = (udepth >> 2) << uint(uFogShift); - - densityid = int(udepth >> 17); - if (densityid >= 32) - { - densityid = 32; - densityfrac = 0; - } - else - densityfrac = int(udepth & uint(0x1FFFF)); - } - - float density = mix(uFogDensity[densityid], uFogDensity[densityid+1], float(densityfrac)/131072.0); - - return vec4(density, density, density, density); -} - -void main() -{ - ivec2 coord = ivec2(gl_FragCoord.xy); - - vec4 ret = vec4(0,0,0,0); - vec4 depth = texelFetch(DepthBuffer, coord, 0); - vec4 attr = texelFetch(AttrBuffer, coord, 0); - - if (attr.b != 0) ret = CalculateFog(depth.r); - - oColor = ret; -} -)"; - - - -const char* kRenderVSCommon = R"( - -layout(std140) uniform uConfig -{ - vec2 uScreenSize; - int uDispCnt; - vec4 uToonColors[32]; - vec4 uEdgeColors[8]; - vec4 uFogColor; - float uFogDensity[34]; - int uFogOffset; - int uFogShift; -}; - -in uvec4 vPosition; -in uvec4 vColor; -in ivec2 vTexcoord; -in ivec3 vPolygonAttr; - -smooth out vec4 fColor; -smooth out vec2 fTexcoord; -flat out ivec3 fPolygonAttr; -)"; - -const char* kRenderFSCommon = R"( - -uniform usampler2D TexMem; -uniform sampler2D TexPalMem; - -layout(std140) uniform uConfig -{ - vec2 uScreenSize; - int uDispCnt; - vec4 uToonColors[32]; - vec4 uEdgeColors[8]; - vec4 uFogColor; - float uFogDensity[34]; - int uFogOffset; - int uFogShift; -}; - -smooth in vec4 fColor; -smooth in vec2 fTexcoord; -flat in ivec3 fPolygonAttr; - -out vec4 oColor; -out vec4 oAttr; - -int TexcoordWrap(int c, int maxc, int mode) -{ - if ((mode & (1<<0)) != 0) - { - if ((mode & (1<<2)) != 0 && (c & maxc) != 0) - return (maxc-1) - (c & (maxc-1)); - else - return (c & (maxc-1)); - } - else - return clamp(c, 0, maxc-1); -} - -vec4 TextureFetch_A3I5(ivec2 addr, ivec4 st, int wrapmode) -{ - st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); - st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); - - addr.x += ((st.y * st.z) + st.x); - ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); - - pixel.a = (pixel.r & 0xE0); - pixel.a = (pixel.a >> 3) + (pixel.a >> 6); - pixel.r &= 0x1F; - - addr.y = (addr.y << 3) + pixel.r; - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - - return vec4(color.rgb, float(pixel.a)/31.0); -} - -vec4 TextureFetch_I2(ivec2 addr, ivec4 st, int wrapmode, float alpha0) -{ - st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); - st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); - - addr.x += ((st.y * st.z) + st.x) >> 2; - ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); - pixel.r >>= (2 * (st.x & 3)); - pixel.r &= 0x03; - - addr.y = (addr.y << 2) + pixel.r; - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - - return vec4(color.rgb, (pixel.r>0)?1:alpha0); -} - -vec4 TextureFetch_I4(ivec2 addr, ivec4 st, int wrapmode, float alpha0) -{ - st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); - st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); - - addr.x += ((st.y * st.z) + st.x) >> 1; - ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); - if ((st.x & 1) != 0) pixel.r >>= 4; - else pixel.r &= 0x0F; - - addr.y = (addr.y << 3) + pixel.r; - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - - return vec4(color.rgb, (pixel.r>0)?1:alpha0); -} - -vec4 TextureFetch_I8(ivec2 addr, ivec4 st, int wrapmode, float alpha0) -{ - st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); - st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); - - addr.x += ((st.y * st.z) + st.x); - ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); - - addr.y = (addr.y << 3) + pixel.r; - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - - return vec4(color.rgb, (pixel.r>0)?1:alpha0); -} - -vec4 TextureFetch_Compressed(ivec2 addr, ivec4 st, int wrapmode) -{ - st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); - st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); - - addr.x += ((st.y & 0x3FC) * (st.z>>2)) + (st.x & 0x3FC) + (st.y & 0x3); - ivec4 p = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); - int val = (p.r >> (2 * (st.x & 0x3))) & 0x3; - - int slot1addr = 0x20000 + ((addr.x & 0x1FFFC) >> 1); - if (addr.x >= 0x40000) slot1addr += 0x10000; - - int palinfo; - p = ivec4(texelFetch(TexMem, ivec2(slot1addr&0x3FF, slot1addr>>10), 0)); - palinfo = p.r; - slot1addr++; - p = ivec4(texelFetch(TexMem, ivec2(slot1addr&0x3FF, slot1addr>>10), 0)); - palinfo |= (p.r << 8); - - addr.y = (addr.y << 3) + ((palinfo & 0x3FFF) << 1); - palinfo >>= 14; - - if (val == 0) - { - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - return vec4(color.rgb, 1.0); - } - else if (val == 1) - { - addr.y++; - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - return vec4(color.rgb, 1.0); - } - else if (val == 2) - { - if (palinfo == 1) - { - vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - addr.y++; - vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - return vec4((color0.rgb + color1.rgb) / 2.0, 1.0); - } - else if (palinfo == 3) - { - vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - addr.y++; - vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - return vec4((color0.rgb*5.0 + color1.rgb*3.0) / 8.0, 1.0); - } - else - { - addr.y += 2; - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - return vec4(color.rgb, 1.0); - } - } - else - { - if (palinfo == 2) - { - addr.y += 3; - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - return vec4(color.rgb, 1.0); - } - else if (palinfo == 3) - { - vec4 color0 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - addr.y++; - vec4 color1 = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - return vec4((color0.rgb*3.0 + color1.rgb*5.0) / 8.0, 1.0); - } - else - { - return vec4(0.0); - } - } -} - -vec4 TextureFetch_A5I3(ivec2 addr, ivec4 st, int wrapmode) -{ - st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); - st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); - - addr.x += ((st.y * st.z) + st.x); - ivec4 pixel = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); - - pixel.a = (pixel.r & 0xF8) >> 3; - pixel.r &= 0x07; - - addr.y = (addr.y << 3) + pixel.r; - vec4 color = texelFetch(TexPalMem, ivec2(addr.y&0x3FF, addr.y>>10), 0); - - return vec4(color.rgb, float(pixel.a)/31.0); -} - -vec4 TextureFetch_Direct(ivec2 addr, ivec4 st, int wrapmode) -{ - st.x = TexcoordWrap(st.x, st.z, wrapmode>>0); - st.y = TexcoordWrap(st.y, st.w, wrapmode>>1); - - addr.x += ((st.y * st.z) + st.x) << 1; - ivec4 pixelL = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); - addr.x++; - ivec4 pixelH = ivec4(texelFetch(TexMem, ivec2(addr.x&0x3FF, addr.x>>10), 0)); - - vec4 color; - color.r = float(pixelL.r & 0x1F) / 31.0; - color.g = float((pixelL.r >> 5) | ((pixelH.r & 0x03) << 3)) / 31.0; - color.b = float((pixelH.r & 0x7C) >> 2) / 31.0; - color.a = float(pixelH.r >> 7); - - return color; -} - -vec4 TextureLookup_Nearest(vec2 st) -{ - int attr = int(fPolygonAttr.y); - int paladdr = int(fPolygonAttr.z); - - float alpha0; - if ((attr & (1<<29)) != 0) alpha0 = 0.0; - else alpha0 = 1.0; - - int tw = 8 << ((attr >> 20) & 0x7); - int th = 8 << ((attr >> 23) & 0x7); - ivec4 st_full = ivec4(ivec2(st), tw, th); - - ivec2 vramaddr = ivec2((attr & 0xFFFF) << 3, paladdr); - int wrapmode = (attr >> 16); - - int type = (attr >> 26) & 0x7; - if (type == 5) return TextureFetch_Compressed(vramaddr, st_full, wrapmode); - else if (type == 2) return TextureFetch_I2 (vramaddr, st_full, wrapmode, alpha0); - else if (type == 3) return TextureFetch_I4 (vramaddr, st_full, wrapmode, alpha0); - else if (type == 4) return TextureFetch_I8 (vramaddr, st_full, wrapmode, alpha0); - else if (type == 1) return TextureFetch_A3I5 (vramaddr, st_full, wrapmode); - else if (type == 6) return TextureFetch_A5I3 (vramaddr, st_full, wrapmode); - else return TextureFetch_Direct (vramaddr, st_full, wrapmode); -} - -vec4 TextureLookup_Linear(vec2 texcoord) -{ - ivec2 intpart = ivec2(texcoord); - vec2 fracpart = fract(texcoord); - - int attr = int(fPolygonAttr.y); - int paladdr = int(fPolygonAttr.z); - - float alpha0; - if ((attr & (1<<29)) != 0) alpha0 = 0.0; - else alpha0 = 1.0; - - int tw = 8 << ((attr >> 20) & 0x7); - int th = 8 << ((attr >> 23) & 0x7); - ivec4 st_full = ivec4(intpart, tw, th); - - ivec2 vramaddr = ivec2((attr & 0xFFFF) << 3, paladdr); - int wrapmode = (attr >> 16); - - vec4 A, B, C, D; - int type = (attr >> 26) & 0x7; - if (type == 5) - { - A = TextureFetch_Compressed(vramaddr, st_full , wrapmode); - B = TextureFetch_Compressed(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); - C = TextureFetch_Compressed(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); - D = TextureFetch_Compressed(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); - } - else if (type == 2) - { - A = TextureFetch_I2(vramaddr, st_full , wrapmode, alpha0); - B = TextureFetch_I2(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); - C = TextureFetch_I2(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); - D = TextureFetch_I2(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); - } - else if (type == 3) - { - A = TextureFetch_I4(vramaddr, st_full , wrapmode, alpha0); - B = TextureFetch_I4(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); - C = TextureFetch_I4(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); - D = TextureFetch_I4(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); - } - else if (type == 4) - { - A = TextureFetch_I8(vramaddr, st_full , wrapmode, alpha0); - B = TextureFetch_I8(vramaddr, st_full + ivec4(1,0,0,0), wrapmode, alpha0); - C = TextureFetch_I8(vramaddr, st_full + ivec4(0,1,0,0), wrapmode, alpha0); - D = TextureFetch_I8(vramaddr, st_full + ivec4(1,1,0,0), wrapmode, alpha0); - } - else if (type == 1) - { - A = TextureFetch_A3I5(vramaddr, st_full , wrapmode); - B = TextureFetch_A3I5(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); - C = TextureFetch_A3I5(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); - D = TextureFetch_A3I5(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); - } - else if (type == 6) - { - A = TextureFetch_A5I3(vramaddr, st_full , wrapmode); - B = TextureFetch_A5I3(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); - C = TextureFetch_A5I3(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); - D = TextureFetch_A5I3(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); - } - else - { - A = TextureFetch_Direct(vramaddr, st_full , wrapmode); - B = TextureFetch_Direct(vramaddr, st_full + ivec4(1,0,0,0), wrapmode); - C = TextureFetch_Direct(vramaddr, st_full + ivec4(0,1,0,0), wrapmode); - D = TextureFetch_Direct(vramaddr, st_full + ivec4(1,1,0,0), wrapmode); - } - - float fx = fracpart.x; - vec4 AB; - if (A.a < (0.5/31.0) && B.a < (0.5/31.0)) - AB = vec4(0); - else - { - //if (A.a < (0.5/31.0) || B.a < (0.5/31.0)) - // fx = step(0.5, fx); - - AB = mix(A, B, fx); - } - - fx = fracpart.x; - vec4 CD; - if (C.a < (0.5/31.0) && D.a < (0.5/31.0)) - CD = vec4(0); - else - { - //if (C.a < (0.5/31.0) || D.a < (0.5/31.0)) - // fx = step(0.5, fx); - - CD = mix(C, D, fx); - } - - fx = fracpart.y; - vec4 ret; - if (AB.a < (0.5/31.0) && CD.a < (0.5/31.0)) - ret = vec4(0); - else - { - //if (AB.a < (0.5/31.0) || CD.a < (0.5/31.0)) - // fx = step(0.5, fx); - - ret = mix(AB, CD, fx); - } - - return ret; -} - -vec4 FinalColor() -{ - vec4 col; - vec4 vcol = fColor; - int blendmode = (fPolygonAttr.x >> 4) & 0x3; - - if (blendmode == 2) - { - if ((uDispCnt & (1<<1)) == 0) - { - // toon - vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb; - vcol.rgb = tooncolor; - } - else - { - // highlight - vcol.rgb = vcol.rrr; - } - } - - if ((((fPolygonAttr.y >> 26) & 0x7) == 0) || ((uDispCnt & (1<<0)) == 0)) - { - // no texture - col = vcol; - } - else - { - vec4 tcol = TextureLookup_Nearest(fTexcoord); - //vec4 tcol = TextureLookup_Linear(fTexcoord); - - if ((blendmode & 1) != 0) - { - // decal - col.rgb = (tcol.rgb * tcol.a) + (vcol.rgb * (1.0-tcol.a)); - col.a = vcol.a; - } - else - { - // modulate - col = vcol * tcol; - } - } - - if (blendmode == 2) - { - if ((uDispCnt & (1<<1)) != 0) - { - vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb; - col.rgb = min(col.rgb + tooncolor, 1.0); - } - } - - return col.bgra; -} -)"; - - -const char* kRenderVS_Z = R"( - -void main() -{ - int attr = vPolygonAttr.x; - int zshift = (attr >> 16) & 0x1F; - - vec4 fpos; - fpos.xy = (((vec2(vPosition.xy) ) * 2.0) / uScreenSize) - 1.0; - fpos.z = (float(vPosition.z << zshift) / 8388608.0) - 1.0; - fpos.w = float(vPosition.w) / 65536.0f; - fpos.xyz *= fpos.w; - - fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0); - fTexcoord = vec2(vTexcoord) / 16.0; - fPolygonAttr = vPolygonAttr; - - gl_Position = fpos; -} -)"; - -const char* kRenderVS_W = R"( - -smooth out float fZ; - -void main() -{ - int attr = vPolygonAttr.x; - int zshift = (attr >> 16) & 0x1F; - - vec4 fpos; - fpos.xy = (((vec2(vPosition.xy) ) * 2.0) / uScreenSize) - 1.0; - fpos.z = 0.0; - fZ = float(vPosition.z << zshift) / 16777216.0; - fpos.w = float(vPosition.w) / 65536.0f; - fpos.xy *= fpos.w; - - fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0); - fTexcoord = vec2(vTexcoord) / 16.0; - fPolygonAttr = vPolygonAttr; - - gl_Position = fpos; -} -)"; - - -const char* kRenderFS_ZO = R"( - -void main() -{ - vec4 col = FinalColor(); - if (col.a < 30.5/31) discard; - - oColor = col; - oAttr.r = float((fPolygonAttr.x >> 24) & 0x3F) / 63.0; - oAttr.g = 0; - oAttr.b = float((fPolygonAttr.x >> 15) & 0x1); - oAttr.a = 1; -} -)"; - -const char* kRenderFS_WO = R"( - -smooth in float fZ; - -void main() -{ - vec4 col = FinalColor(); - if (col.a < 30.5/31) discard; - - oColor = col; - oAttr.r = float((fPolygonAttr.x >> 24) & 0x3F) / 63.0; - oAttr.g = 0; - oAttr.b = float((fPolygonAttr.x >> 15) & 0x1); - oAttr.a = 1; - gl_FragDepth = fZ; -} -)"; - -const char* kRenderFS_ZE = R"( - -void main() -{ - vec4 col = FinalColor(); - if (col.a < 30.5/31) discard; - - oAttr.g = 1; - oAttr.a = 1; -} -)"; - -const char* kRenderFS_WE = R"( - -smooth in float fZ; - -void main() -{ - vec4 col = FinalColor(); - if (col.a < 30.5/31) discard; - - oAttr.g = 1; - oAttr.a = 1; - gl_FragDepth = fZ; -} -)"; - -const char* kRenderFS_ZT = R"( - -void main() -{ - vec4 col = FinalColor(); - if (col.a < 0.5/31) discard; - if (col.a >= 30.5/31) discard; - - oColor = col; - oAttr.b = 0; - oAttr.a = 1; -} -)"; - -const char* kRenderFS_WT = R"( - -smooth in float fZ; - -void main() -{ - vec4 col = FinalColor(); - if (col.a < 0.5/31) discard; - if (col.a >= 30.5/31) discard; - - oColor = col; - oAttr.b = 0; - oAttr.a = 1; - gl_FragDepth = fZ; -} -)"; - -const char* kRenderFS_ZSM = R"( - -void main() -{ - oColor = vec4(0,0,0,1); -} -)"; - -const char* kRenderFS_WSM = R"( - -smooth in float fZ; - -void main() -{ - oColor = vec4(0,0,0,1); - gl_FragDepth = fZ; -} -)"; -} -#endif // GPU3D_OPENGL_SHADERS_H diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp index 9f6e2d7350..7cb17be6f2 100644 --- a/src/GPU3D_Soft.cpp +++ b/src/GPU3D_Soft.cpp @@ -30,7 +30,7 @@ namespace melonDS void RenderThreadFunc(); -void SoftRenderer::StopRenderThread() +void SoftRenderer3D::StopRenderThread() { if (RenderThreadRunning.load(std::memory_order_relaxed)) { @@ -45,15 +45,15 @@ void SoftRenderer::StopRenderThread() } } -void SoftRenderer::SetupRenderThread(GPU& gpu) +void SoftRenderer3D::SetupRenderThread() { if (Threaded) { if (!RenderThreadRunning.load(std::memory_order_relaxed)) { // If the render thread isn't already running... RenderThreadRunning = true; // "Time for work, render thread!" - RenderThread = Platform::Thread_Create([this, &gpu]() { - RenderThreadFunc(gpu); + RenderThread = Platform::Thread_Create([this]() { + RenderThreadFunc(); }); } @@ -87,7 +87,7 @@ void SoftRenderer::SetupRenderThread(GPU& gpu) } } -void SoftRenderer::EnableRenderThread() +void SoftRenderer3D::EnableRenderThread() { if (Threaded && Sema_RenderStart) { @@ -95,8 +95,8 @@ void SoftRenderer::EnableRenderThread() } } -SoftRenderer::SoftRenderer() noexcept - : Renderer3D(false) +SoftRenderer3D::SoftRenderer3D(melonDS::GPU3D& gpu3D, SoftRenderer& parent) noexcept + : Renderer3D(gpu3D), Parent(parent) { Sema_RenderStart = Platform::Semaphore_Create(); Sema_RenderDone = Platform::Semaphore_Create(); @@ -107,7 +107,7 @@ SoftRenderer::SoftRenderer() noexcept RenderThread = nullptr; } -SoftRenderer::~SoftRenderer() +SoftRenderer3D::~SoftRenderer3D() { StopRenderThread(); @@ -116,7 +116,7 @@ SoftRenderer::~SoftRenderer() Platform::Semaphore_Free(Sema_ScanlineCount); } -void SoftRenderer::Reset(GPU& gpu) +void SoftRenderer3D::Reset() { memset(ColorBuffer, 0, BufferSize * 2 * 4); memset(DepthBuffer, 0, BufferSize * 2 * 4); @@ -124,22 +124,25 @@ void SoftRenderer::Reset(GPU& gpu) PrevIsShadowMask = false; - SetupRenderThread(gpu); + SetupRenderThread(); EnableRenderThread(); } -void SoftRenderer::SetThreaded(bool threaded, GPU& gpu) noexcept +void SoftRenderer3D::SetThreaded(bool threaded) noexcept { if (Threaded != threaded) { Threaded = threaded; - SetupRenderThread(gpu); + SetupRenderThread(); EnableRenderThread(); } } -void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const +void SoftRenderer3D::TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const { + // TODO: consider using texture cache + // however, I like the idea of having a "hardware accurate" path + u32 vramaddr = (texparam & 0xFFFF) << 3; s32 width = 8 << ((texparam >> 20) & 0x7); @@ -193,10 +196,10 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s case 1: // A3I5 { vramaddr += ((t * width) + s); - u8 pixel = gpu.ReadVRAMFlat_Texture(vramaddr); + u8 pixel = GPU.ReadVRAMFlat_Texture(vramaddr); texpal <<= 4; - *color = gpu.ReadVRAMFlat_TexPal(texpal + ((pixel&0x1F)<<1)); + *color = GPU.ReadVRAMFlat_TexPal(texpal + ((pixel&0x1F)<<1)); *alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6); } break; @@ -204,12 +207,12 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s case 2: // 4-color { vramaddr += (((t * width) + s) >> 2); - u8 pixel = gpu.ReadVRAMFlat_Texture(vramaddr); + u8 pixel = GPU.ReadVRAMFlat_Texture(vramaddr); pixel >>= ((s & 0x3) << 1); pixel &= 0x3; texpal <<= 3; - *color = gpu.ReadVRAMFlat_TexPal(texpal + (pixel<<1)); + *color = GPU.ReadVRAMFlat_TexPal(texpal + (pixel<<1)); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -217,12 +220,12 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s case 3: // 16-color { vramaddr += (((t * width) + s) >> 1); - u8 pixel = gpu.ReadVRAMFlat_Texture(vramaddr); + u8 pixel = GPU.ReadVRAMFlat_Texture(vramaddr); if (s & 0x1) pixel >>= 4; else pixel &= 0xF; texpal <<= 4; - *color = gpu.ReadVRAMFlat_TexPal(texpal + (pixel<<1)); + *color = GPU.ReadVRAMFlat_TexPal(texpal + (pixel<<1)); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -230,10 +233,10 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s case 4: // 256-color { vramaddr += ((t * width) + s); - u8 pixel = gpu.ReadVRAMFlat_Texture(vramaddr); + u8 pixel = GPU.ReadVRAMFlat_Texture(vramaddr); texpal <<= 4; - *color = gpu.ReadVRAMFlat_TexPal(texpal + (pixel<<1)); + *color = GPU.ReadVRAMFlat_TexPal(texpal + (pixel<<1)); *alpha = (pixel==0) ? alpha0 : 31; } break; @@ -253,31 +256,31 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s val = 0; else { - val = gpu.ReadVRAMFlat_Texture(vramaddr); + val = GPU.ReadVRAMFlat_Texture(vramaddr); val >>= (2 * (s & 0x3)); } - u16 palinfo = gpu.ReadVRAMFlat_Texture(slot1addr); + u16 palinfo = GPU.ReadVRAMFlat_Texture(slot1addr); u32 paloffset = (palinfo & 0x3FFF) << 2; texpal <<= 4; switch (val & 0x3) { case 0: - *color = gpu.ReadVRAMFlat_TexPal(texpal + paloffset); + *color = GPU.ReadVRAMFlat_TexPal(texpal + paloffset); *alpha = 31; break; case 1: - *color = gpu.ReadVRAMFlat_TexPal(texpal + paloffset + 2); + *color = GPU.ReadVRAMFlat_TexPal(texpal + paloffset + 2); *alpha = 31; break; case 2: if ((palinfo >> 14) == 1) { - u16 color0 = gpu.ReadVRAMFlat_TexPal(texpal + paloffset); - u16 color1 = gpu.ReadVRAMFlat_TexPal(texpal + paloffset + 2); + u16 color0 = GPU.ReadVRAMFlat_TexPal(texpal + paloffset); + u16 color1 = GPU.ReadVRAMFlat_TexPal(texpal + paloffset + 2); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -294,8 +297,8 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s } else if ((palinfo >> 14) == 3) { - u16 color0 = gpu.ReadVRAMFlat_TexPal(texpal + paloffset); - u16 color1 = gpu.ReadVRAMFlat_TexPal(texpal + paloffset + 2); + u16 color0 = GPU.ReadVRAMFlat_TexPal(texpal + paloffset); + u16 color1 = GPU.ReadVRAMFlat_TexPal(texpal + paloffset + 2); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -311,20 +314,20 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s *color = r | g | b; } else - *color = gpu.ReadVRAMFlat_TexPal(texpal + paloffset + 4); + *color = GPU.ReadVRAMFlat_TexPal(texpal + paloffset + 4); *alpha = 31; break; case 3: if ((palinfo >> 14) == 2) { - *color = gpu.ReadVRAMFlat_TexPal(texpal + paloffset + 6); + *color = GPU.ReadVRAMFlat_TexPal(texpal + paloffset + 6); *alpha = 31; } else if ((palinfo >> 14) == 3) { - u16 color0 = gpu.ReadVRAMFlat_TexPal(texpal + paloffset); - u16 color1 = gpu.ReadVRAMFlat_TexPal(texpal + paloffset + 2); + u16 color0 = GPU.ReadVRAMFlat_TexPal(texpal + paloffset); + u16 color1 = GPU.ReadVRAMFlat_TexPal(texpal + paloffset + 2); u32 r0 = color0 & 0x001F; u32 g0 = color0 & 0x03E0; @@ -353,10 +356,10 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s case 6: // A5I3 { vramaddr += ((t * width) + s); - u8 pixel = gpu.ReadVRAMFlat_Texture(vramaddr); + u8 pixel = GPU.ReadVRAMFlat_Texture(vramaddr); texpal <<= 4; - *color = gpu.ReadVRAMFlat_TexPal(texpal + ((pixel&0x7)<<1)); + *color = GPU.ReadVRAMFlat_TexPal(texpal + ((pixel&0x7)<<1)); *alpha = (pixel >> 3); } break; @@ -364,7 +367,7 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s case 7: // direct color { vramaddr += (((t * width) + s) << 1); - *color = gpu.ReadVRAMFlat_Texture(vramaddr); + *color = GPU.ReadVRAMFlat_Texture(vramaddr); *alpha = (*color & 0x8000) ? 31 : 0; } break; @@ -421,7 +424,7 @@ bool DepthTest_LessThan_FrontFacing(s32 dstz, s32 z, u32 dstattr) return false; } -u32 SoftRenderer::AlphaBlend(const GPU3D& gpu3d, u32 srccolor, u32 dstcolor, u32 alpha) const noexcept +u32 SoftRenderer3D::AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha) const noexcept { u32 dstalpha = dstcolor >> 24; @@ -432,7 +435,7 @@ u32 SoftRenderer::AlphaBlend(const GPU3D& gpu3d, u32 srccolor, u32 dstcolor, u32 u32 srcG = (srccolor >> 8) & 0x3F; u32 srcB = (srccolor >> 16) & 0x3F; - if (gpu3d.RenderDispCnt & (1<<3)) + if (GPU3D.RenderDispCnt & (1<<3)) { u32 dstR = dstcolor & 0x3F; u32 dstG = (dstcolor >> 8) & 0x3F; @@ -451,7 +454,7 @@ u32 SoftRenderer::AlphaBlend(const GPU3D& gpu3d, u32 srccolor, u32 dstcolor, u32 return srcR | (srcG << 8) | (srcB << 16) | (dstalpha << 24); } -u32 SoftRenderer::RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const +u32 SoftRenderer3D::RenderPixel(const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const { u8 r, g, b, a; @@ -461,7 +464,7 @@ u32 SoftRenderer::RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 if (blendmode == 2) { - if (gpu.GPU3D.RenderDispCnt & (1<<1)) + if (GPU3D.RenderDispCnt & (1<<1)) { // highlight mode: color is calculated normally // except all vertex color components are set @@ -475,7 +478,7 @@ u32 SoftRenderer::RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 { // toon mode: vertex color is replaced by toon color - u16 tooncolor = gpu.GPU3D.RenderToonTable[vr >> 1]; + u16 tooncolor = GPU3D.RenderToonTable[vr >> 1]; vr = (tooncolor << 1) & 0x3E; if (vr) vr++; vg = (tooncolor >> 4) & 0x3E; if (vg) vg++; @@ -483,12 +486,12 @@ u32 SoftRenderer::RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 } } - if ((gpu.GPU3D.RenderDispCnt & (1<<0)) && (((polygon->TexParam >> 26) & 0x7) != 0)) + if ((GPU3D.RenderDispCnt & (1<<0)) && (((polygon->TexParam >> 26) & 0x7) != 0)) { u8 tr, tg, tb; u16 tcolor; u8 talpha; - TextureLookup(gpu, polygon->TexParam, polygon->TexPalette, s, t, &tcolor, &talpha); + TextureLookup(polygon->TexParam, polygon->TexPalette, s, t, &tcolor, &talpha); tr = (tcolor << 1) & 0x3E; if (tr) tr++; tg = (tcolor >> 4) & 0x3E; if (tg) tg++; @@ -536,9 +539,9 @@ u32 SoftRenderer::RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 a = polyalpha; } - if ((blendmode == 2) && (gpu.GPU3D.RenderDispCnt & (1<<1))) + if ((blendmode == 2) && (GPU3D.RenderDispCnt & (1<<1))) { - u16 tooncolor = gpu.GPU3D.RenderToonTable[vr >> 1]; + u16 tooncolor = GPU3D.RenderToonTable[vr >> 1]; vr = (tooncolor << 1) & 0x3E; if (vr) vr++; vg = (tooncolor >> 4) & 0x3E; if (vg) vg++; @@ -559,7 +562,7 @@ u32 SoftRenderer::RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 return r | (g << 8) | (b << 16) | (a << 24); } -void SoftRenderer::PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow) +void SoftRenderer3D::PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow) { u32 dstattr = AttrBuffer[pixeladdr]; u32 attr = (polyattr & 0xE0F0) | ((polyattr >> 8) & 0xFF0000) | (1<<22) | (dstattr & 0xFF001F0F); @@ -589,7 +592,7 @@ void SoftRenderer::PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 c if (!(dstattr & (1<<15))) attr &= ~(1<<15); - color = AlphaBlend(gpu3d, color, ColorBuffer[pixeladdr], color>>24); + color = AlphaBlend(color, ColorBuffer[pixeladdr], color>>24); if (z != -1) DepthBuffer[pixeladdr] = z; @@ -598,7 +601,7 @@ void SoftRenderer::PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 c AttrBuffer[pixeladdr] = attr; } -void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y) const +void SoftRenderer3D::SetupPolygonLeftEdge(SoftRenderer3D::RendererPolygon* rp, s32 y) const { Polygon* polygon = rp->PolyData; @@ -625,7 +628,7 @@ void SoftRenderer::SetupPolygonLeftEdge(SoftRenderer::RendererPolygon* rp, s32 y polygon->FinalW[rp->CurVL], polygon->FinalW[rp->NextVL], y, polygon->WBuffer); } -void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 y) const +void SoftRenderer3D::SetupPolygonRightEdge(SoftRenderer3D::RendererPolygon* rp, s32 y) const { Polygon* polygon = rp->PolyData; @@ -652,7 +655,7 @@ void SoftRenderer::SetupPolygonRightEdge(SoftRenderer::RendererPolygon* rp, s32 polygon->FinalW[rp->CurVR], polygon->FinalW[rp->NextVR], y, polygon->WBuffer); } -void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* polygon) const +void SoftRenderer3D::SetupPolygon(SoftRenderer3D::RendererPolygon* rp, Polygon* polygon) const { u32 nverts = polygon->NumVertices; @@ -705,7 +708,7 @@ void SoftRenderer::SetupPolygon(SoftRenderer::RendererPolygon* rp, Polygon* poly } } -void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y) +void SoftRenderer3D::RenderShadowMaskScanline(RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -782,7 +785,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* std::swap(zl, zr); // CHECKME: edge fill rules for swapped opaque shadow mask polygons - if ((gpu3d.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (gpu3d.RenderDispCnt & (1<<3))) || wireframe) + if ((GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (GPU3D.RenderDispCnt & (1<<3))) || wireframe) { l_filledge = true; r_filledge = true; @@ -810,7 +813,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp->SlopeR.EdgeParams(&r_edgelen, &r_edgecov); // CHECKME: edge fill rules for unswapped opaque shadow mask polygons - if ((gpu3d.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (gpu3d.RenderDispCnt & (1<<3))) || wireframe) + if ((GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (GPU3D.RenderDispCnt & (1<<3))) || wireframe) { l_filledge = true; r_filledge = true; @@ -831,7 +834,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* // similarly, we can perform alpha test early (checkme) if (wireframe) polyalpha = 31; - if (polyalpha <= gpu3d.RenderAlphaRef) return; + if (polyalpha <= GPU3D.RenderAlphaRef) return; // in wireframe mode, there are special rules for equal Z (TODO) @@ -933,7 +936,7 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp->XR = rp->SlopeR.Step(); } -void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y) +void SoftRenderer3D::RenderPolygonScanline(RendererPolygon* rp, s32 y) { Polygon* polygon = rp->PolyData; @@ -1017,7 +1020,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 // edges are always filled if antialiasing/edgemarking are enabled, // if the pixels are translucent and alpha blending is enabled, or if the polygon is wireframe // checkme: do swapped line polygons exist? - if ((gpu.GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (gpu.GPU3D.RenderDispCnt & (1<<3))) || wireframe) + if ((GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (GPU3D.RenderDispCnt & (1<<3))) || wireframe) { l_filledge = true; r_filledge = true; @@ -1052,7 +1055,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 // * edges are filled if both sides are identical and fully overlapping // edges are always filled if antialiasing/edgemarking are enabled, // if the pixels are translucent and alpha blending is enabled, or if the polygon is wireframe - if ((gpu.GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (gpu.GPU3D.RenderDispCnt & (1<<3))) || wireframe) + if ((GPU3D.RenderDispCnt & ((1<<4)|(1<<5))) || ((polyalpha < 31) && (GPU3D.RenderDispCnt & (1<<3))) || wireframe) { l_filledge = true; r_filledge = true; @@ -1151,17 +1154,17 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 s16 s = interpX.Interpolate(sl, sr); s16 t = interpX.Interpolate(tl, tr); - u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t); + u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t); u8 alpha = color >> 24; // alpha test - if (alpha <= gpu.GPU3D.RenderAlphaRef) continue; + if (alpha <= GPU3D.RenderAlphaRef) continue; if (alpha == 31) { u32 attr = polyattr | edge; - if (gpu.GPU3D.RenderDispCnt & (1<<4)) + if (GPU3D.RenderDispCnt & (1<<4)) { // anti-aliasing: all edges are rendered @@ -1191,11 +1194,11 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 else { if (!(polygon->Attr & (1<<11))) z = -1; - PlotTranslucentPixel(gpu.GPU3D, pixeladdr, color, z, polyattr, polygon->IsShadow); + PlotTranslucentPixel(pixeladdr, color, z, polyattr, polygon->IsShadow); // blend with bottom pixel too, if needed if ((dstattr & 0xF) && (pixeladdr < BufferSize)) - PlotTranslucentPixel(gpu.GPU3D, pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); + PlotTranslucentPixel(pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); } } @@ -1247,17 +1250,17 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 s16 s = interpX.Interpolate(sl, sr); s16 t = interpX.Interpolate(tl, tr); - u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t); + u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t); u8 alpha = color >> 24; // alpha test - if (alpha <= gpu.GPU3D.RenderAlphaRef) continue; + if (alpha <= GPU3D.RenderAlphaRef) continue; if (alpha == 31) { u32 attr = polyattr | edge; - if ((gpu.GPU3D.RenderDispCnt & (1<<4)) && (attr & 0xF)) + if ((GPU3D.RenderDispCnt & (1<<4)) && (attr & 0xF)) { // anti-aliasing: all edges are rendered @@ -1280,11 +1283,11 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 else { if (!(polygon->Attr & (1<<11))) z = -1; - PlotTranslucentPixel(gpu.GPU3D, pixeladdr, color, z, polyattr, polygon->IsShadow); + PlotTranslucentPixel(pixeladdr, color, z, polyattr, polygon->IsShadow); // blend with bottom pixel too, if needed if ((dstattr & 0xF) && (pixeladdr < BufferSize)) - PlotTranslucentPixel(gpu.GPU3D, pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); + PlotTranslucentPixel(pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); } } @@ -1339,17 +1342,17 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 s16 s = interpX.Interpolate(sl, sr); s16 t = interpX.Interpolate(tl, tr); - u32 color = RenderPixel(gpu, polygon, vr>>3, vg>>3, vb>>3, s, t); + u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t); u8 alpha = color >> 24; // alpha test - if (alpha <= gpu.GPU3D.RenderAlphaRef) continue; + if (alpha <= GPU3D.RenderAlphaRef) continue; if (alpha == 31) { u32 attr = polyattr | edge; - if (gpu.GPU3D.RenderDispCnt & (1<<4)) + if (GPU3D.RenderDispCnt & (1<<4)) { // anti-aliasing: all edges are rendered @@ -1379,11 +1382,11 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 else { if (!(polygon->Attr & (1<<11))) z = -1; - PlotTranslucentPixel(gpu.GPU3D, pixeladdr, color, z, polyattr, polygon->IsShadow); + PlotTranslucentPixel(pixeladdr, color, z, polyattr, polygon->IsShadow); // blend with bottom pixel too, if needed if ((dstattr & 0xF) && (pixeladdr < BufferSize)) - PlotTranslucentPixel(gpu.GPU3D, pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); + PlotTranslucentPixel(pixeladdr+BufferSize, color, z, polyattr, polygon->IsShadow); } } @@ -1391,7 +1394,7 @@ void SoftRenderer::RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s3 rp->XR = rp->SlopeR.Step(); } -void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys) +void SoftRenderer3D::RenderScanline(s32 y, int npolys) { for (int i = 0; i < npolys; i++) { @@ -1401,19 +1404,19 @@ void SoftRenderer::RenderScanline(const GPU& gpu, s32 y, int npolys) if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop))) { if (polygon->IsShadowMask) - RenderShadowMaskScanline(gpu.GPU3D, rp, y); + RenderShadowMaskScanline(rp, y); else - RenderPolygonScanline(gpu, rp, y); + RenderPolygonScanline(rp, y); } } } -u32 SoftRenderer::CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const +u32 SoftRenderer3D::CalculateFogDensity(u32 pixeladdr) const { u32 z = DepthBuffer[pixeladdr]; u32 densityid, densityfrac; - if (z < gpu3d.RenderFogOffset) + if (z < GPU3D.RenderFogOffset) { densityid = 0; densityfrac = 0; @@ -1425,8 +1428,8 @@ u32 SoftRenderer::CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const // on hardware, the final value can overflow the 32-bit range with a shift big enough, // causing fog to 'wrap around' and accidentally apply to larger Z ranges - z -= gpu3d.RenderFogOffset; - z = (z >> 2) << gpu3d.RenderFogShift; + z -= GPU3D.RenderFogOffset; + z = (z >> 2) << GPU3D.RenderFogShift; densityid = z >> 17; if (densityid >= 32) @@ -1440,20 +1443,20 @@ u32 SoftRenderer::CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const // checkme (may be too precise?) u32 density = - ((gpu3d.RenderFogDensityTable[densityid] * (0x20000-densityfrac)) + - (gpu3d.RenderFogDensityTable[densityid+1] * densityfrac)) >> 17; + ((GPU3D.RenderFogDensityTable[densityid] * (0x20000-densityfrac)) + + (GPU3D.RenderFogDensityTable[densityid+1] * densityfrac)) >> 17; if (density >= 127) density = 128; return density; } -void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y) +void SoftRenderer3D::ScanlineFinalPass(s32 y) { // to consider: // clearing all polygon fog flags if the master flag isn't set? // merging all final pass loops into one? - if (gpu3d.RenderDispCnt & (1<<5)) + if (GPU3D.RenderDispCnt & (1<<5)) { // edge marking // only applied to topmost pixels @@ -1473,7 +1476,7 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y) ((polyid != (AttrBuffer[pixeladdr-ScanlineWidth] >> 24)) && (z < DepthBuffer[pixeladdr-ScanlineWidth])) || ((polyid != (AttrBuffer[pixeladdr+ScanlineWidth] >> 24)) && (z < DepthBuffer[pixeladdr+ScanlineWidth]))) { - u16 edgecolor = gpu3d.RenderEdgeTable[polyid >> 3]; + u16 edgecolor = GPU3D.RenderEdgeTable[polyid >> 3]; u32 edgeR = (edgecolor << 1) & 0x3E; if (edgeR) edgeR++; u32 edgeG = (edgecolor >> 4) & 0x3E; if (edgeG) edgeG++; u32 edgeB = (edgecolor >> 9) & 0x3E; if (edgeB) edgeB++; @@ -1486,7 +1489,7 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y) } } - if (gpu3d.RenderDispCnt & (1<<7)) + if (GPU3D.RenderDispCnt & (1<<7)) { // fog @@ -1499,12 +1502,12 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y) // TODO: check the 'fog alpha glitch with small Z' GBAtek talks about - bool fogcolor = !(gpu3d.RenderDispCnt & (1<<6)); + bool fogcolor = !(GPU3D.RenderDispCnt & (1<<6)); - u32 fogR = (gpu3d.RenderFogColor << 1) & 0x3E; if (fogR) fogR++; - u32 fogG = (gpu3d.RenderFogColor >> 4) & 0x3E; if (fogG) fogG++; - u32 fogB = (gpu3d.RenderFogColor >> 9) & 0x3E; if (fogB) fogB++; - u32 fogA = (gpu3d.RenderFogColor >> 16) & 0x1F; + u32 fogR = (GPU3D.RenderFogColor << 1) & 0x3E; if (fogR) fogR++; + u32 fogG = (GPU3D.RenderFogColor >> 4) & 0x3E; if (fogG) fogG++; + u32 fogB = (GPU3D.RenderFogColor >> 9) & 0x3E; if (fogB) fogB++; + u32 fogA = (GPU3D.RenderFogColor >> 16) & 0x1F; for (int x = 0; x < 256; x++) { @@ -1514,7 +1517,7 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y) u32 attr = AttrBuffer[pixeladdr]; if (attr & (1<<15)) { - density = CalculateFogDensity(gpu3d, pixeladdr); + density = CalculateFogDensity(pixeladdr); srccolor = ColorBuffer[pixeladdr]; srcR = srccolor & 0x3F; @@ -1543,7 +1546,7 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y) attr = AttrBuffer[pixeladdr]; if (!(attr & (1<<15))) continue; - density = CalculateFogDensity(gpu3d, pixeladdr); + density = CalculateFogDensity(pixeladdr); srccolor = ColorBuffer[pixeladdr]; srcR = srccolor & 0x3F; @@ -1564,7 +1567,7 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y) } } - if (gpu3d.RenderDispCnt & (1<<4)) + if (GPU3D.RenderDispCnt & (1<<4)) { // anti-aliasing @@ -1617,10 +1620,10 @@ void SoftRenderer::ScanlineFinalPass(const GPU3D& gpu3d, s32 y) } } -void SoftRenderer::ClearBuffers(const GPU& gpu) +void SoftRenderer3D::ClearBuffers() { - u32 clearz = ((gpu.GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; - u32 polyid = gpu.GPU3D.RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID + u32 clearz = ((GPU3D.RenderClearAttr2 & 0x7FFF) * 0x200) + 0x1FF; + u32 polyid = GPU3D.RenderClearAttr1 & 0x3F000000; // this sets the opaque polygonID // fill screen borders for edge marking @@ -1650,17 +1653,17 @@ void SoftRenderer::ClearBuffers(const GPU& gpu) // clear the screen - if (gpu.GPU3D.RenderDispCnt & (1<<14)) + if (GPU3D.RenderDispCnt & (1<<14)) { - u8 xoff = (gpu.GPU3D.RenderClearAttr2 >> 16) & 0xFF; - u8 yoff = (gpu.GPU3D.RenderClearAttr2 >> 24) & 0xFF; + u8 xoff = (GPU3D.RenderClearAttr2 >> 16) & 0xFF; + u8 yoff = (GPU3D.RenderClearAttr2 >> 24) & 0xFF; for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth) { for (int x = 0; x < 256; x++) { - u16 val2 = gpu.ReadVRAMFlat_Texture(0x40000 + (yoff << 9) + (xoff << 1)); - u16 val3 = gpu.ReadVRAMFlat_Texture(0x60000 + (yoff << 9) + (xoff << 1)); + u16 val2 = GPU.ReadVRAMFlat_Texture(0x40000 + (yoff << 9) + (xoff << 1)); + u16 val3 = GPU.ReadVRAMFlat_Texture(0x60000 + (yoff << 9) + (xoff << 1)); // TODO: confirm color conversion u32 r = (val2 << 1) & 0x3E; if (r) r++; @@ -1685,13 +1688,13 @@ void SoftRenderer::ClearBuffers(const GPU& gpu) else { // TODO: confirm color conversion - u32 r = (gpu.GPU3D.RenderClearAttr1 << 1) & 0x3E; if (r) r++; - u32 g = (gpu.GPU3D.RenderClearAttr1 >> 4) & 0x3E; if (g) g++; - u32 b = (gpu.GPU3D.RenderClearAttr1 >> 9) & 0x3E; if (b) b++; - u32 a = (gpu.GPU3D.RenderClearAttr1 >> 16) & 0x1F; + u32 r = (GPU3D.RenderClearAttr1 << 1) & 0x3E; if (r) r++; + u32 g = (GPU3D.RenderClearAttr1 >> 4) & 0x3E; if (g) g++; + u32 b = (GPU3D.RenderClearAttr1 >> 9) & 0x3E; if (b) b++; + u32 a = (GPU3D.RenderClearAttr1 >> 16) & 0x1F; u32 color = r | (g << 8) | (b << 16) | (a << 24); - polyid |= (gpu.GPU3D.RenderClearAttr1 & 0x8000); + polyid |= (GPU3D.RenderClearAttr1 & 0x8000); for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth) { @@ -1706,7 +1709,7 @@ void SoftRenderer::ClearBuffers(const GPU& gpu) } } -void SoftRenderer::RenderPolygons(const GPU& gpu, bool threaded, Polygon** polygons, int npolys) +void SoftRenderer3D::RenderPolygons(bool threaded, Polygon** polygons, int npolys) { int j = 0; for (int i = 0; i < npolys; i++) @@ -1715,40 +1718,40 @@ void SoftRenderer::RenderPolygons(const GPU& gpu, bool threaded, Polygon** polyg SetupPolygon(&PolygonList[j++], polygons[i]); } - RenderScanline(gpu, 0, j); + RenderScanline(0, j); for (s32 y = 1; y < 192; y++) { - RenderScanline(gpu, y, j); - ScanlineFinalPass(gpu.GPU3D, y-1); + RenderScanline(y, j); + ScanlineFinalPass(y-1); if (threaded) // Notify the main thread that we're done with a scanline. Platform::Semaphore_Post(Sema_ScanlineCount); } - ScanlineFinalPass(gpu.GPU3D, 191); + ScanlineFinalPass(191); if (threaded) // If this renderer is threaded, notify the main thread that we're done with the frame. Platform::Semaphore_Post(Sema_ScanlineCount); } -void SoftRenderer::VCount144(GPU& gpu) +void SoftRenderer3D::FinishRendering() { - if (RenderThreadRunning.load(std::memory_order_relaxed) && !gpu.GPU3D.AbortFrame) + if (RenderThreadRunning.load(std::memory_order_relaxed) && !GPU3D.AbortFrame) Platform::Semaphore_Wait(Sema_RenderDone); } -void SoftRenderer::RenderFrame(GPU& gpu) +void SoftRenderer3D::RenderFrame() { - auto textureDirty = gpu.VRAMDirty_Texture.DeriveState(gpu.VRAMMap_Texture, gpu); - auto texPalDirty = gpu.VRAMDirty_TexPal.DeriveState(gpu.VRAMMap_TexPal, gpu); + auto textureDirty = GPU.VRAMDirty_Texture.DeriveState(GPU.VRAMMap_Texture, GPU); + auto texPalDirty = GPU.VRAMDirty_TexPal.DeriveState(GPU.VRAMMap_TexPal, GPU); - bool textureChanged = gpu.MakeVRAMFlat_TextureCoherent(textureDirty); - bool texPalChanged = gpu.MakeVRAMFlat_TexPalCoherent(texPalDirty); + bool textureChanged = GPU.MakeVRAMFlat_TextureCoherent(textureDirty); + bool texPalChanged = GPU.MakeVRAMFlat_TexPalCoherent(texPalDirty); - FrameIdentical = !(textureChanged || texPalChanged) && gpu.GPU3D.RenderFrameIdentical; + FrameIdentical = !(textureChanged || texPalChanged) && GPU3D.RenderFrameIdentical; if (RenderThreadRunning.load(std::memory_order_relaxed)) { @@ -1757,18 +1760,18 @@ void SoftRenderer::RenderFrame(GPU& gpu) } else if (!FrameIdentical) { - ClearBuffers(gpu); - RenderPolygons(gpu, false, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons); + ClearBuffers(); + RenderPolygons(false, &GPU3D.RenderPolygonRAM[0], GPU3D.RenderNumPolygons); } } -void SoftRenderer::RestartFrame(GPU& gpu) +void SoftRenderer3D::RestartFrame() { - SetupRenderThread(gpu); + SetupRenderThread(); EnableRenderThread(); } -void SoftRenderer::RenderThreadFunc(GPU& gpu) +void SoftRenderer3D::RenderThreadFunc() { for (;;) { @@ -1789,8 +1792,8 @@ void SoftRenderer::RenderThreadFunc(GPU& gpu) } else { - ClearBuffers(gpu); - RenderPolygons(gpu, true, &gpu.GPU3D.RenderPolygonRAM[0], gpu.GPU3D.RenderNumPolygons); + ClearBuffers(); + RenderPolygons(true, &GPU3D.RenderPolygonRAM[0], GPU3D.RenderNumPolygons); } // Tell the main thread that we're done rendering @@ -1801,8 +1804,15 @@ void SoftRenderer::RenderThreadFunc(GPU& gpu) } } -u32* SoftRenderer::GetLine(int line) +u32* SoftRenderer3D::GetLine(int line) { + if (GPU3D.AbortFrame) + { + // TODO this isn't accurate + memset(ScrolledLine, 0, sizeof(ScrolledLine)); + return ScrolledLine; + } + if (RenderThreadRunning.load(std::memory_order_relaxed)) { if (line < 192) @@ -1812,7 +1822,31 @@ u32* SoftRenderer::GetLine(int line) Platform::Semaphore_Wait(Sema_ScanlineCount); } - return &ColorBuffer[(line * ScanlineWidth) + FirstPixelOffset]; + u32* rawline = &ColorBuffer[(line * ScanlineWidth) + FirstPixelOffset]; + u16 xpos = GPU3D.RenderXPos; + if (xpos == 0) + return rawline; + + // apply X scroll + + if (xpos & 0x100) + { + int i = 0, j = xpos; + for (; j < 512; i++, j++) + ScrolledLine[i] = 0; + for (j = 0; i < 256; i++, j++) + ScrolledLine[i] = rawline[j]; + } + else + { + int i = 0, j = xpos; + for (; j < 256; i++, j++) + ScrolledLine[i] = rawline[j]; + for (; i < 256; i++) + ScrolledLine[i] = 0; + } + + return ScrolledLine; } } diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h index a201c6a6cb..547492c870 100644 --- a/src/GPU3D_Soft.h +++ b/src/GPU3D_Soft.h @@ -26,26 +26,33 @@ namespace melonDS { -class SoftRenderer : public Renderer3D +class SoftRenderer; + +class SoftRenderer3D : public Renderer3D { public: - SoftRenderer() noexcept; - ~SoftRenderer() override; - void Reset(GPU& gpu) override; + SoftRenderer3D(melonDS::GPU3D& gpu3D, SoftRenderer& parent) noexcept; + ~SoftRenderer3D() override; + void Reset() override; - void SetThreaded(bool threaded, GPU& gpu) noexcept; + void SetThreaded(bool threaded) noexcept; [[nodiscard]] bool IsThreaded() const noexcept { return Threaded; } - void VCount144(GPU& gpu) override; - void RenderFrame(GPU& gpu) override; - void RestartFrame(GPU& gpu) override; + void RenderFrame() override; + void FinishRendering() override; + void RestartFrame() override; + u32* GetLine(int line) override; - void SetupRenderThread(GPU& gpu); + void SetupRenderThread(); void EnableRenderThread(); void StopRenderThread(); + private: + SoftRenderer& Parent; + friend void GPU3D::DoSavestate(Savestate* file) noexcept; + // Notes on the interpolator: // // This is a theory on how the DS hardware interpolates values. It matches hardware output @@ -423,7 +430,7 @@ class SoftRenderer : public Renderer3D s32 ycoverage, ycov_incr; }; - u32 AlphaBlend(const GPU3D& gpu3d, u32 srccolor, u32 dstcolor, u32 alpha) const noexcept; + u32 AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha) const noexcept; struct RendererPolygon { @@ -438,21 +445,21 @@ class SoftRenderer : public Renderer3D }; RendererPolygon PolygonList[2048]; - void TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const; - u32 RenderPixel(const GPU& gpu, const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const; - void PlotTranslucentPixel(const GPU3D& gpu3d, u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow); + void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha) const; + u32 RenderPixel(const Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t) const; + void PlotTranslucentPixel(u32 pixeladdr, u32 color, u32 z, u32 polyattr, u32 shadow); void SetupPolygonLeftEdge(RendererPolygon* rp, s32 y) const; void SetupPolygonRightEdge(RendererPolygon* rp, s32 y) const; void SetupPolygon(RendererPolygon* rp, Polygon* polygon) const; - void RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon* rp, s32 y); - void RenderPolygonScanline(const GPU& gpu, RendererPolygon* rp, s32 y); - void RenderScanline(const GPU& gpu, s32 y, int npolys); - u32 CalculateFogDensity(const GPU3D& gpu3d, u32 pixeladdr) const; - void ScanlineFinalPass(const GPU3D& gpu3d, s32 y); - void ClearBuffers(const GPU& gpu); - void RenderPolygons(const GPU& gpu, bool threaded, Polygon** polygons, int npolys); + void RenderShadowMaskScanline(RendererPolygon* rp, s32 y); + void RenderPolygonScanline(RendererPolygon* rp, s32 y); + void RenderScanline(s32 y, int npolys); + u32 CalculateFogDensity(u32 pixeladdr) const; + void ScanlineFinalPass(s32 y); + void ClearBuffers(); + void RenderPolygons(bool threaded, Polygon** polygons, int npolys); - void RenderThreadFunc(GPU& gpu); + void RenderThreadFunc(); // buffer dimensions are 258x194 to add a offscreen 1px border // which simplifies edge marking tests @@ -485,6 +492,8 @@ class SoftRenderer : public Renderer3D bool FrameIdentical; + u32 ScrolledLine[256]; + // threading bool Threaded = false; diff --git a/src/GPU3D_Texcache.h b/src/GPU3D_Texcache.h index f2cd6416f4..474245eb43 100644 --- a/src/GPU3D_Texcache.h +++ b/src/GPU3D_Texcache.h @@ -44,8 +44,8 @@ template class Texcache { public: - Texcache(const TexLoaderT& texloader) - : TexLoader(texloader) // probably better if this would be a move constructor??? + Texcache(melonDS::GPU& gpu, const TexLoaderT& texloader) + : GPU(gpu), TexLoader(texloader) // probably better if this would be a move constructor??? {} u64 MaskedHash(u8* vram, u32 vramSize, u32 addr, u32 size) @@ -92,16 +92,36 @@ class Texcache return false; } - bool Update(GPU& gpu) + bool Update(u8& clrBitmapDirty) { - auto textureDirty = gpu.VRAMDirty_Texture.DeriveState(gpu.VRAMMap_Texture, gpu); - auto texPalDirty = gpu.VRAMDirty_TexPal.DeriveState(gpu.VRAMMap_TexPal, gpu); + auto textureDirty = GPU.VRAMDirty_Texture.DeriveState(GPU.VRAMMap_Texture, GPU); + auto texPalDirty = GPU.VRAMDirty_TexPal.DeriveState(GPU.VRAMMap_TexPal, GPU); - bool textureChanged = gpu.MakeVRAMFlat_TextureCoherent(textureDirty); - bool texPalChanged = gpu.MakeVRAMFlat_TexPalCoherent(texPalDirty); + bool textureChanged = GPU.MakeVRAMFlat_TextureCoherent(textureDirty); + bool texPalChanged = GPU.MakeVRAMFlat_TexPalCoherent(texPalDirty); + + clrBitmapDirty = 0; if (textureChanged || texPalChanged) { + // check if slots 2 and 3 are dirty (for the clear bitmap) + for (u32 j = (0x40000/(VRAMDirtyGranularity*64)); j < (0x60000/(VRAMDirtyGranularity*64)); j++) + { + if (textureDirty.Data[j]) + { + clrBitmapDirty |= (1<<0); + break; + } + } + for (u32 j = (0x60000/(VRAMDirtyGranularity*64)); j < (0x80000/(VRAMDirtyGranularity*64)); j++) + { + if (textureDirty.Data[j]) + { + clrBitmapDirty |= (1<<1); + break; + } + } + //printf("check invalidation %d\n", TexCache.size()); for (auto it = Cache.begin(); it != Cache.end();) { @@ -113,7 +133,7 @@ class Texcache if (CheckInvalid(entry.TextureRAMStart[i], entry.TextureRAMSize[i], entry.TextureHash[i], textureDirty.Data, - gpu.VRAMFlat_Texture, sizeof(gpu.VRAMFlat_Texture))) + GPU.VRAMFlat_Texture, sizeof(GPU.VRAMFlat_Texture))) goto invalidate; } } @@ -123,7 +143,7 @@ class Texcache if (CheckInvalid(entry.TexPalStart, entry.TexPalSize, entry.TexPalHash, texPalDirty.Data, - gpu.VRAMFlat_TexPal, sizeof(gpu.VRAMFlat_TexPal))) + GPU.VRAMFlat_TexPal, sizeof(GPU.VRAMFlat_TexPal))) goto invalidate; } @@ -143,7 +163,7 @@ class Texcache return false; } - void GetTexture(GPU& gpu, u32 texParam, u32 palBase, TexHandleT& textureHandle, u32& layer, u32*& helper) + void GetTexture(u32 texParam, u32 palBase, TexHandleT& textureHandle, u32& layer, u32*& helper) { // remove sampling and texcoord gen params texParam &= ~0xC00F0000; @@ -188,7 +208,7 @@ class Texcache { entry.TextureRAMSize[0] = width*height*2; - ConvertBitmapTexture(width, height, DecodingBuffer, addr, gpu); + ConvertBitmapTexture(width, height, DecodingBuffer, addr, GPU); } else if (fmt == 5) { @@ -202,7 +222,7 @@ class Texcache entry.TexPalStart = palBase*16; entry.TexPalSize = 0x10000; - ConvertCompressedTexture(width, height, DecodingBuffer, addr, slot1addr, entry.TexPalStart, gpu); + ConvertCompressedTexture(width, height, DecodingBuffer, addr, slot1addr, entry.TexPalStart, GPU); } else { @@ -231,22 +251,22 @@ class Texcache switch (fmt) { - case 1: ConvertAXIYTexture(width, height, DecodingBuffer, addr, palAddr, gpu); break; - case 6: ConvertAXIYTexture(width, height, DecodingBuffer, addr, palAddr, gpu); break; - case 2: ConvertNColorsTexture(width, height, DecodingBuffer, addr, palAddr, color0Transparent, gpu); break; - case 3: ConvertNColorsTexture(width, height, DecodingBuffer, addr, palAddr, color0Transparent, gpu); break; - case 4: ConvertNColorsTexture(width, height, DecodingBuffer, addr, palAddr, color0Transparent, gpu); break; + case 1: ConvertAXIYTexture(width, height, DecodingBuffer, addr, palAddr, GPU); break; + case 6: ConvertAXIYTexture(width, height, DecodingBuffer, addr, palAddr, GPU); break; + case 2: ConvertNColorsTexture(width, height, DecodingBuffer, addr, palAddr, color0Transparent, GPU); break; + case 3: ConvertNColorsTexture(width, height, DecodingBuffer, addr, palAddr, color0Transparent, GPU); break; + case 4: ConvertNColorsTexture(width, height, DecodingBuffer, addr, palAddr, color0Transparent, GPU); break; } } for (int i = 0; i < 2; i++) { if (entry.TextureRAMSize[i]) - entry.TextureHash[i] = MaskedHash(gpu.VRAMFlat_Texture, sizeof(gpu.VRAMFlat_Texture), + entry.TextureHash[i] = MaskedHash(GPU.VRAMFlat_Texture, sizeof(GPU.VRAMFlat_Texture), entry.TextureRAMStart[i], entry.TextureRAMSize[i]); } if (entry.TexPalSize) - entry.TexPalHash = MaskedHash(gpu.VRAMFlat_TexPal, sizeof(gpu.VRAMFlat_TexPal), + entry.TexPalHash = MaskedHash(GPU.VRAMFlat_TexPal, sizeof(GPU.VRAMFlat_TexPal), entry.TexPalStart, entry.TexPalSize); auto& texArrays = TexArrays[widthLog2][heightLog2]; @@ -296,7 +316,10 @@ class Texcache } Cache.clear(); } + private: + melonDS::GPU& GPU; + struct TexArrayEntry { TexHandleT TextureID; diff --git a/src/GPU3D_TexcacheOpenGL.cpp b/src/GPU3D_TexcacheOpenGL.cpp index 95ca8cdc8d..08c684b5fe 100644 --- a/src/GPU3D_TexcacheOpenGL.cpp +++ b/src/GPU3D_TexcacheOpenGL.cpp @@ -8,7 +8,14 @@ GLuint TexcacheOpenGLLoader::GenerateTexture(u32 width, u32 height, u32 layers) GLuint texarray; glGenTextures(1, &texarray); glBindTexture(GL_TEXTURE_2D_ARRAY, texarray); - glTexStorage3D(GL_TEXTURE_2D_ARRAY, 1, GL_RGBA8UI, width, height, layers); + glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + if (IsCompute) + glTexStorage3D(GL_TEXTURE_2D_ARRAY, 1, GL_RGBA8UI, width, height, layers); + else + glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA8UI, width, height, layers, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, nullptr); + return texarray; } diff --git a/src/GPU3D_TexcacheOpenGL.h b/src/GPU3D_TexcacheOpenGL.h index a8cfa576d9..6044b932ff 100644 --- a/src/GPU3D_TexcacheOpenGL.h +++ b/src/GPU3D_TexcacheOpenGL.h @@ -13,9 +13,14 @@ class Texcache; class TexcacheOpenGLLoader { public: + TexcacheOpenGLLoader(bool compute) : IsCompute(compute) {} + GLuint GenerateTexture(u32 width, u32 height, u32 layers); void UploadTexture(GLuint handle, u32 width, u32 height, u32 layer, void* data); void DeleteTexture(GLuint handle); + +private: + bool IsCompute; }; using TexcacheOpenGL = Texcache; diff --git a/src/GPU_ColorOp.h b/src/GPU_ColorOp.h new file mode 100644 index 0000000000..2d6e947a98 --- /dev/null +++ b/src/GPU_ColorOp.h @@ -0,0 +1,82 @@ +/* + Copyright 2016-2025 melonDS team + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef GPU_COLOROP_H +#define GPU_COLOROP_H + +#include "types.h" + +namespace melonDS +{ + +static constexpr u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb) noexcept +{ + u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb) + 0x000008) >> 4; + u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb) + 0x000800) >> 4) & 0x007F00; + u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb) + 0x080000) >> 4) & 0x7F0000; + + if (r > 0x00003F) r = 0x00003F; + if (g > 0x003F00) g = 0x003F00; + if (b > 0x3F0000) b = 0x3F0000; + + return r | g | b | 0xFF000000; +} + +static constexpr u32 ColorBlend5(u32 val1, u32 val2) noexcept +{ + u32 eva = ((val1 >> 24) & 0x1F) + 1; + u32 evb = 32 - eva; + + if (eva == 32) return val1; + + u32 r = (((val1 & 0x00003F) * eva) + ((val2 & 0x00003F) * evb) + 0x000010) >> 5; + u32 g = ((((val1 & 0x003F00) * eva) + ((val2 & 0x003F00) * evb) + 0x001000) >> 5) & 0x007F00; + u32 b = ((((val1 & 0x3F0000) * eva) + ((val2 & 0x3F0000) * evb) + 0x100000) >> 5) & 0x7F0000; + + if (r > 0x00003F) r = 0x00003F; + if (g > 0x003F00) g = 0x003F00; + if (b > 0x3F0000) b = 0x3F0000; + + return r | g | b | 0xFF000000; +} + +static constexpr u32 ColorBrightnessUp(u32 val, u32 factor, u32 bias) noexcept +{ + u32 rb = val & 0x3F003F; + u32 g = val & 0x003F00; + + rb += (((((0x3F003F - rb) * factor) + (bias*0x010001)) >> 4) & 0x3F003F); + g += (((((0x003F00 - g ) * factor) + (bias*0x000100)) >> 4) & 0x003F00); + + return rb | g | 0xFF000000; +} + +static constexpr u32 ColorBrightnessDown(u32 val, u32 factor, u32 bias) noexcept +{ + u32 rb = val & 0x3F003F; + u32 g = val & 0x003F00; + + rb -= ((((rb * factor) + (bias*0x010001)) >> 4) & 0x3F003F); + g -= ((((g * factor) + (bias*0x000100)) >> 4) & 0x003F00); + + return rb | g | 0xFF000000; +} + +} + +#endif // GPU_COLOROP_H diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp index 888f508e02..66ad7400ce 100644 --- a/src/GPU_OpenGL.cpp +++ b/src/GPU_OpenGL.cpp @@ -16,267 +16,910 @@ with melonDS. If not, see http://www.gnu.org/licenses/. */ -#include "GPU_OpenGL.h" - -#include -#include -#include - +#include #include "NDS.h" -#include "GPU.h" -#include "GPU3D_OpenGL.h" -#include "OpenGLSupport.h" -#include "GPU_OpenGL_shaders.h" +#include "GPU_OpenGL.h" namespace melonDS { +using Platform::Log; +using Platform::LogLevel; + +#include "OpenGL_shaders/FinalPassVS.h" +#include "OpenGL_shaders/FinalPassFS.h" +#include "OpenGL_shaders/CaptureVS.h" +#include "OpenGL_shaders/CaptureFS.h" +#include "OpenGL_shaders/CaptureDownscaleVS.h" +#include "OpenGL_shaders/CaptureDownscaleFS.h" -using namespace OpenGL; -std::optional GLCompositor::New() noexcept +GLRenderer::GLRenderer(melonDS::NDS& nds, bool compute) + : Renderer(nds.GPU) { - assert(glBindAttribLocation != nullptr); - GLuint CompShader {}; + AuxInputBuffer[0] = new u16[256 * 256]; + AuxInputBuffer[1] = new u16[256 * 192]; + + Rend2D_A = std::make_unique(GPU.GPU2D_A, *this); + Rend2D_B = std::make_unique(GPU.GPU2D_B, *this); - if (!OpenGL::CompileVertexFragmentProgram(CompShader, - kCompositorVS, kCompositorFS_Nearest, - "CompositorShader", - {{"vPosition", 0}, {"vTexcoord", 1}}, - {{"oColor", 0}})) - return std::nullopt; + // TODO, eventually: figure out a nicer way to support different 3D renderers? + IsCompute = compute; + if (IsCompute) + Rend3D = std::make_unique(GPU.GPU3D, *this); + else + Rend3D = std::make_unique(GPU.GPU3D, *this); - return { GLCompositor(CompShader) }; + ScaleFactor = 0; } -GLCompositor::GLCompositor(GLuint compShader) noexcept : CompShader(compShader) +#define glTexParams(target, wrap) \ + glTexParameteri(target, GL_TEXTURE_WRAP_S, wrap); \ + glTexParameteri(target, GL_TEXTURE_WRAP_T, wrap); \ + glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_NEAREST); \ + glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + +bool GLRenderer::Init() { - CompScaleLoc = glGetUniformLocation(CompShader, "u3DScale"); - - glUseProgram(CompShader); - GLuint screenTextureUniform = glGetUniformLocation(CompShader, "ScreenTex"); - glUniform1i(screenTextureUniform, 0); - GLuint _3dTextureUniform = glGetUniformLocation(CompShader, "_3DTex"); - glUniform1i(_3dTextureUniform, 1); - - // all this mess is to prevent bleeding -#define SETVERTEX(i, x, y, offset) \ - CompVertices[i].Position[0] = x; \ - CompVertices[i].Position[1] = y + offset; \ - CompVertices[i].Texcoord[0] = (x + 1.f) * (256.f / 2.f); \ - CompVertices[i].Texcoord[1] = (y + 1.f) * (384.f / 2.f) - - const float padOffset = 1.f/(192*2.f+2.f)*2.f; - // top screen - SETVERTEX(0, -1, 1, 0); - SETVERTEX(1, 1, 0, padOffset); - SETVERTEX(2, 1, 1, 0); - SETVERTEX(3, -1, 1, 0); - SETVERTEX(4, -1, 0, padOffset); - SETVERTEX(5, 1, 0, padOffset); - - // bottom screen - SETVERTEX(6, -1, 0, -padOffset); - SETVERTEX(7, 1, -1, 0); - SETVERTEX(8, 1, 0, -padOffset); - SETVERTEX(9, -1, 0, -padOffset); - SETVERTEX(10, -1, -1, 0); - SETVERTEX(11, 1, -1, 0); + assert(glEnable != nullptr); + + GLint uniloc; + + // compile shaders + + if (!OpenGL::CompileVertexFragmentProgram(FPShader, + kFinalPassVS, kFinalPassFS, + "2DFinalPassShader", + {{"vPosition", 0}}, + {{"oTopColor", 0}, {"oBottomColor", 1}})) + return false; + + if (!OpenGL::CompileVertexFragmentProgram(CaptureShader, + kCaptureVS, kCaptureFS, + "2DCaptureShader", + {{"vPosition", 0}, {"vTexcoord", 1}}, + {{"oColor", 0}})) + return false; + + if (!OpenGL::CompileVertexFragmentProgram(CapDownShader, + kCaptureDownscaleVS, kCaptureDownscaleFS, + "2DCaptureDownscaleShader", + {{"vPosition", 0}}, + {{"oColor", 0}})) + return false; + + // vertex buffers + + const float rectvertices[2*2*3] = { + 0, 1, 1, 0, 1, 1, + 0, 1, 0, 0, 1, 0 + }; + + glGenBuffers(1, &RectVtxBuffer); + glBindBuffer(GL_ARRAY_BUFFER, RectVtxBuffer); + glBufferData(GL_ARRAY_BUFFER, sizeof(rectvertices), rectvertices, GL_STATIC_DRAW); + + glGenVertexArrays(1, &RectVtxArray); + glBindVertexArray(RectVtxArray); + glEnableVertexAttribArray(0); // position + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(float), (void*)0); + + float vertices[12][2]; +#define SETVERTEX(i, x, y) \ + vertices[i][0] = x; \ + vertices[i][1] = y; + + SETVERTEX(0, -1, 1); + SETVERTEX(1, 1, -1); + SETVERTEX(2, 1, 1); + SETVERTEX(3, -1, 1); + SETVERTEX(4, -1, -1); + SETVERTEX(5, 1, -1); #undef SETVERTEX - glGenBuffers(1, &CompVertexBufferID); - glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID); - glBufferData(GL_ARRAY_BUFFER, sizeof(CompVertices), &CompVertices[0], GL_STATIC_DRAW); + // final pass vertex data: 2x position, 2x texcoord + glGenBuffers(1, &FPVertexBufferID); + glBindBuffer(GL_ARRAY_BUFFER, FPVertexBufferID); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), &vertices[0], GL_STATIC_DRAW); + + glGenVertexArrays(1, &FPVertexArrayID); + glBindVertexArray(FPVertexArrayID); + glEnableVertexAttribArray(0); // position + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(float), (void*)0); + + glGenFramebuffers(2, &FPOutputFB[0]); + + // capture vertex data: 2x position, 2x texcoord + glGenBuffers(1, &CaptureVtxBuffer); + glBindBuffer(GL_ARRAY_BUFFER, CaptureVtxBuffer); + glBufferData(GL_ARRAY_BUFFER, 2 * 6 * 4 * sizeof(u16), nullptr, GL_STREAM_DRAW); - glGenVertexArrays(1, &CompVertexArrayID); - glBindVertexArray(CompVertexArrayID); + glGenVertexArrays(1, &CaptureVtxArray); + glBindVertexArray(CaptureVtxArray); glEnableVertexAttribArray(0); // position - glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(CompVertex), (void*)(offsetof(CompVertex, Position))); + glVertexAttribIPointer(0, 2, GL_SHORT, 4 * sizeof(u16), (void*)0); glEnableVertexAttribArray(1); // texcoord - glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, sizeof(CompVertex), (void*)(offsetof(CompVertex, Texcoord))); + glVertexAttribIPointer(1, 2, GL_SHORT, 4 * sizeof(u16), (void*)(2 * sizeof(u16))); - glGenFramebuffers(CompScreenOutputFB.size(), &CompScreenOutputFB[0]); + // textures / framebuffers - glGenTextures(1, &CompScreenInputTex); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, CompScreenInputTex); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8UI, 256*3 + 1, 192*2, 0, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, NULL); - - glGenTextures(CompScreenOutputTex.size(), &CompScreenOutputTex[0]); - for (GLuint i : CompScreenOutputTex) + glGenTextures(1, &AuxInputTex); + glBindTexture(GL_TEXTURE_2D_ARRAY, AuxInputTex); + glTexParams(GL_TEXTURE_2D_ARRAY, GL_REPEAT); + glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGB5_A1, 256, 256, 2, 0, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, nullptr); + + glGenTextures(1, &CaptureVRAMTex); + glBindTexture(GL_TEXTURE_2D_ARRAY, CaptureVRAMTex); + glTexParams(GL_TEXTURE_2D_ARRAY, GL_REPEAT); + glGenFramebuffers(1, &CaptureVRAMFB); + + glGenTextures(2, FPOutputTex); + for (int i = 0; i < 2; i++) { - glBindTexture(GL_TEXTURE_2D, i); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glBindTexture(GL_TEXTURE_2D_ARRAY, FPOutputTex[i]); + glTexParams(GL_TEXTURE_2D_ARRAY, GL_CLAMP_TO_EDGE); } + glGenTextures(1, &CaptureOutput256Tex); + glBindTexture(GL_TEXTURE_2D_ARRAY, CaptureOutput256Tex); + glTexParams(GL_TEXTURE_2D_ARRAY, GL_REPEAT); + glGenFramebuffers(4, CaptureOutput256FB); + + glGenTextures(1, &CaptureOutput128Tex); + glBindTexture(GL_TEXTURE_2D_ARRAY, CaptureOutput128Tex); + glTexParams(GL_TEXTURE_2D_ARRAY, GL_REPEAT); + glGenFramebuffers(16, CaptureOutput128FB); + + glGenTextures(1, &CaptureSyncTex); + glBindTexture(GL_TEXTURE_2D, CaptureSyncTex); + glTexParams(GL_TEXTURE_2D, GL_CLAMP_TO_EDGE); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB5_A1, 256, 256, 0, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, nullptr); + + glGenFramebuffers(1, &CaptureSyncFB); + glBindFramebuffer(GL_FRAMEBUFFER, CaptureSyncFB); + glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, CaptureSyncTex, 0); + glDrawBuffer(GL_COLOR_ATTACHMENT0); + glReadBuffer(GL_COLOR_ATTACHMENT0); + + // UBOs + + glGenBuffers(1, &FPConfigUBO); + glBindBuffer(GL_UNIFORM_BUFFER, FPConfigUBO); + static_assert((sizeof(sFinalPassConfig) & 15) == 0); + glBufferData(GL_UNIFORM_BUFFER, sizeof(sFinalPassConfig), nullptr, GL_STREAM_DRAW); + glBindBufferBase(GL_UNIFORM_BUFFER, 30, FPConfigUBO); + + glGenBuffers(1, &CaptureConfigUBO); + glBindBuffer(GL_UNIFORM_BUFFER, CaptureConfigUBO); + static_assert((sizeof(sCaptureConfig) & 15) == 0); + glBufferData(GL_UNIFORM_BUFFER, sizeof(sCaptureConfig), nullptr, GL_STREAM_DRAW); + glBindBufferBase(GL_UNIFORM_BUFFER, 31, CaptureConfigUBO); + + // shader config + + glUseProgram(FPShader); + + uniloc = glGetUniformLocation(FPShader, "MainInputTexA"); + glUniform1i(uniloc, 0); + uniloc = glGetUniformLocation(FPShader, "MainInputTexB"); + glUniform1i(uniloc, 1); + uniloc = glGetUniformLocation(FPShader, "AuxInputTex"); + glUniform1i(uniloc, 2); + + uniloc = glGetUniformBlockIndex(FPShader, "ubFinalPassConfig"); + glUniformBlockBinding(FPShader, uniloc, 30); + + + glUseProgram(CaptureShader); + + uniloc = glGetUniformLocation(CaptureShader, "InputTexA"); + glUniform1i(uniloc, 0); + uniloc = glGetUniformLocation(CaptureShader, "InputTexB"); + glUniform1i(uniloc, 1); + + uniloc = glGetUniformBlockIndex(CaptureShader, "ubCaptureConfig"); + glUniformBlockBinding(CaptureShader, uniloc, 31); + + + glUseProgram(CapDownShader); + + uniloc = glGetUniformLocation(CapDownShader, "InputTex"); + glUniform1i(uniloc, 0); + + CapDownInputLayerULoc = glGetUniformLocation(CapDownShader, "uInputLayer"); + + + if (!Rend2D_A->Init()) return false; + if (!Rend2D_B->Init()) return false; + if (!Rend3D->Init()) return false; + glBindFramebuffer(GL_FRAMEBUFFER, 0); + return true; } -GLCompositor::~GLCompositor() +GLRenderer::~GLRenderer() { - assert(glDeleteFramebuffers != nullptr); + glDeleteProgram(FPShader); + glDeleteProgram(CaptureShader); + glDeleteProgram(CapDownShader); - glDeleteFramebuffers(CompScreenOutputFB.size(), &CompScreenOutputFB[0]); - glDeleteTextures(1, &CompScreenInputTex); - glDeleteTextures(CompScreenOutputTex.size(), &CompScreenOutputTex[0]); + glDeleteBuffers(1, &RectVtxBuffer); + glDeleteVertexArrays(1, &RectVtxArray); - glDeleteVertexArrays(1, &CompVertexArrayID); - glDeleteBuffers(1, &CompVertexBufferID); + glDeleteBuffers(1, &FPVertexBufferID); + glDeleteVertexArrays(1, &FPVertexArrayID); - glDeleteProgram(CompShader); -} + glDeleteBuffers(1, &CaptureVtxBuffer); + glDeleteVertexArrays(1, &CaptureVtxArray); + + glDeleteFramebuffers(2, FPOutputFB); + glDeleteTextures(1, &AuxInputTex); + glDeleteTextures(1, &CaptureVRAMTex); + glDeleteTextures(2, FPOutputTex); + + delete[] AuxInputBuffer[0]; + delete[] AuxInputBuffer[1]; + glDeleteTextures(1, &CaptureOutput256Tex); + glDeleteTextures(1, &CaptureOutput128Tex); + glDeleteTextures(1, &CaptureSyncTex); + glDeleteFramebuffers(1, &CaptureSyncFB); -GLCompositor::GLCompositor(GLCompositor&& other) noexcept : - Scale(other.Scale), - ScreenH(other.ScreenH), - ScreenW(other.ScreenW), - CompScaleLoc(other.CompScaleLoc), - CompVertices(other.CompVertices), - CompShader(other.CompShader), - CompVertexBufferID(other.CompVertexBufferID), - CompVertexArrayID(other.CompVertexArrayID), - CompScreenInputTex(other.CompScreenInputTex), - CompScreenOutputTex(other.CompScreenOutputTex), - CompScreenOutputFB(other.CompScreenOutputFB) + glDeleteBuffers(1, &FPConfigUBO); + glDeleteBuffers(1, &CaptureConfigUBO); +} + +void GLRenderer::Reset() { - other.CompScreenOutputFB = {}; - other.CompScreenInputTex = {}; - other.CompScreenOutputTex = {}; - other.CompVertexArrayID = {}; - other.CompVertexBufferID = {}; - other.CompShader = {}; + memset(&FinalPassConfig, 0, sizeof(FinalPassConfig)); + memset(&CaptureConfig, 0, sizeof(CaptureConfig)); + + AuxUsageMask = 0; + + DispCntA = 0; + DispCntB = 0; + MasterBrightnessA = 0; + MasterBrightnessB = 0; + CaptureCnt = 0; + + NeedPartialRender = false; + LastLine = 0; + LastCapLine = 0; + Aux0VRAMCap = -1; + + Rend2D_A->Reset(); + Rend2D_B->Reset(); + Rend3D->Reset(); } -GLCompositor& GLCompositor::operator=(GLCompositor&& other) noexcept +void GLRenderer::Stop() { - if (this != &other) - { - Scale = other.Scale; - ScreenH = other.ScreenH; - ScreenW = other.ScreenW; - CompScaleLoc = other.CompScaleLoc; - CompVertices = other.CompVertices; + // TODO clear buffers + // TODO: do we even need this anymore? +} - // Clean up these resources before overwriting them - glDeleteProgram(CompShader); - CompShader = other.CompShader; +void GLRenderer::PostSavestate() +{ + Reset(); - glDeleteBuffers(1, &CompVertexBufferID); - CompVertexBufferID = other.CompVertexBufferID; + auto rend2D = dynamic_cast(Rend2D_A.get()); + rend2D->PostSavestate(); + rend2D = dynamic_cast(Rend2D_B.get()); + rend2D->PostSavestate(); +} - glDeleteVertexArrays(1, &CompVertexArrayID); - CompVertexArrayID = other.CompVertexArrayID; - glDeleteTextures(1, &CompScreenInputTex); - CompScreenInputTex = other.CompScreenInputTex; +void GLRenderer::SetRenderSettings(RendererSettings& settings) +{ + SetScaleFactor(settings.ScaleFactor); - glDeleteTextures(CompScreenOutputTex.size(), &CompScreenOutputTex[0]); - CompScreenOutputTex = other.CompScreenOutputTex; + auto rend2d = dynamic_cast(Rend2D_A.get()); + rend2d->SetScaleFactor(settings.ScaleFactor); - glDeleteFramebuffers(CompScreenOutputFB.size(), &CompScreenOutputFB[0]); - CompScreenOutputFB = other.CompScreenOutputFB; + rend2d = dynamic_cast(Rend2D_B.get()); + rend2d->SetScaleFactor(settings.ScaleFactor); - other.CompScreenOutputFB = {}; - other.CompScreenInputTex = {}; - other.CompScreenOutputTex = {}; - other.CompVertexArrayID = {}; - other.CompVertexBufferID = {}; - other.CompShader = {}; + if (IsCompute) + { + auto rend3d = dynamic_cast(Rend3D.get()); + rend3d->SetRenderSettings(settings.ScaleFactor, settings.HiresCoordinates); + } + else + { + auto rend3d = dynamic_cast(Rend3D.get()); + rend3d->SetRenderSettings(settings.ScaleFactor, settings.BetterPolygons); } - - return *this; } -void GLCompositor::SetScaleFactor(int scale) noexcept +void GLRenderer::SetScaleFactor(int scale) { - if (scale == Scale) + if (scale == ScaleFactor) return; - Scale = scale; + ScaleFactor = scale; ScreenW = 256 * scale; - ScreenH = (384+2) * scale; + ScreenH = 192 * scale; + + const GLenum fbassign2[] = {GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1}; + + glBindTexture(GL_TEXTURE_2D_ARRAY, CaptureOutput256Tex); + glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA, 256*ScaleFactor, 256*ScaleFactor, 4, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + + for (int i = 0; i < 4; i++) + { + glBindFramebuffer(GL_FRAMEBUFFER, CaptureOutput256FB[i]); + glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, CaptureOutput256Tex, 0, i); + glDrawBuffer(GL_COLOR_ATTACHMENT0); + } + + glBindTexture(GL_TEXTURE_2D_ARRAY, CaptureOutput128Tex); + glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA, 128*ScaleFactor, 128*ScaleFactor, 16, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + + for (int i = 0; i < 16; i++) + { + glBindFramebuffer(GL_FRAMEBUFFER, CaptureOutput128FB[i]); + glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, CaptureOutput128Tex, 0, i); + glDrawBuffer(GL_COLOR_ATTACHMENT0); + } + + glBindTexture(GL_TEXTURE_2D_ARRAY, CaptureVRAMTex); + glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA, 256*ScaleFactor, 256*ScaleFactor, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + + glBindFramebuffer(GL_FRAMEBUFFER, CaptureVRAMFB); + glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, CaptureVRAMTex, 0, 0); + glReadBuffer(GL_COLOR_ATTACHMENT0); + glDrawBuffer(GL_COLOR_ATTACHMENT0); for (int i = 0; i < 2; i++) { - glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex[i]); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - // fill the padding - u8* zeroPixels = (u8*) calloc(1, ScreenW*2*scale*4); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192*scale, ScreenW, 2*scale, GL_RGBA, GL_UNSIGNED_BYTE, zeroPixels); - - GLenum fbassign[] = {GL_COLOR_ATTACHMENT0}; - glBindFramebuffer(GL_FRAMEBUFFER, CompScreenOutputFB[i]); - glFramebufferTexture(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, CompScreenOutputTex[i], 0); - glDrawBuffers(1, fbassign); - free(zeroPixels); + glBindTexture(GL_TEXTURE_2D_ARRAY, FPOutputTex[i]); + glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA, ScreenW, ScreenH, 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr); + + glBindFramebuffer(GL_FRAMEBUFFER, FPOutputFB[i]); + glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, FPOutputTex[i], 0, 0); + glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, FPOutputTex[i], 0, 1); + glDrawBuffers(2, fbassign2); } glBindFramebuffer(GL_FRAMEBUFFER, 0); } -void GLCompositor::Stop(const GPU& gpu) noexcept + +void GLRenderer::DrawScanline(u32 line) { - for (int i = 0; i < 2; i++) + u32 dispcnt_a_diff = DispCntA ^ GPU.GPU2D_A.DispCnt; + u32 dispcnt_b_diff = DispCntB ^ GPU.GPU2D_B.DispCnt; + u32 capturecnt_diff = CaptureCnt ^ GPU.CaptureCnt; + + bool need_render = false; + bool need_capture = false; + + if (dispcnt_a_diff & 0xF0000) + need_render = true; + else if (dispcnt_b_diff & 0x10000) + need_render = true; + else if (MasterBrightnessA != GPU.MasterBrightnessA || + MasterBrightnessB != GPU.MasterBrightnessB) + need_render = true; + + if (GPU.CaptureEnable && (capturecnt_diff & 0x7FFFFFFF)) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CompScreenOutputFB[gpu.FrontBuffer]); + need_render = true; + need_capture = true; + } - glClear(GL_COLOR_BUFFER_BIT); + NeedPartialRender = need_render; + Rend2D_A->DrawScanline(line); + Rend2D_B->DrawScanline(line); + + if (need_render && (line > 0)) + { + RenderScreen(LastLine, line); + LastLine = line; } - glBindFramebuffer(GL_FRAMEBUFFER, 0); + if (need_capture && (line > 0)) + { + DoCapture(LastCapLine, line); + LastCapLine = line; + } + + DispCntA = GPU.GPU2D_A.DispCnt; + DispCntB = GPU.GPU2D_B.DispCnt; + MasterBrightnessA = GPU.MasterBrightnessA; + MasterBrightnessB = GPU.MasterBrightnessB; + CaptureCnt = GPU.CaptureCnt; + + FinalPassConfig.uScreenSwap[line] = GPU.ScreenSwap; + + u32 dispcnt = GPU.GPU2D_A.DispCnt; + u32 dispmode = (dispcnt >> 16) & 0x3; + u32 capcnt = GPU.CaptureCnt; + u32 capsel = (capcnt >> 29) & 0x3; + u32 capA = (capcnt >> 24) & 0x1; + u32 capB = (capcnt >> 25) & 0x1; + bool checkcap = GPU.CaptureEnable && (capsel != 0); + + if (GPU.CaptureEnable && (capsel != 1)) + { + if (capA == 0) + CaptureConfig.uSrcAOffset[line] = 0; + else + { + int xpos = GPU.GPU3D.GetRenderXPos() & 0x1FF; + xpos -= ((xpos & 0x100) << 1); + CaptureConfig.uSrcAOffset[line] = (float)xpos / 256.f; + } + } + + if ((dispmode == 2) || (checkcap && (capB == 0))) + { + AuxUsageMask |= (1<<0); + + u32 vrambank = (dispcnt >> 18) & 0x3; + u32 vramoffset = line * 256; + u32 outoffset = line * 256; + if (dispmode != 2) + { + u32 yoff = ((capcnt >> 26) & 0x3) << 14; + vramoffset += yoff; + outoffset += yoff; + } + + vramoffset &= 0xFFFF; + outoffset &= 0xFFFF; + + u16* adst = &AuxInputBuffer[0][outoffset]; + + if (GPU.VRAMMap_LCDC & (1<DrawSprites(line); + Rend2D_B->DrawSprites(line); +} + + +void GLRenderer::RenderScreen(int ystart, int yend) +{ + int backbuf = BackBuffer; glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CompScreenOutputFB[backbuf]); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, FPOutputFB[backbuf]); glDisable(GL_DEPTH_TEST); glDisable(GL_STENCIL_TEST); glDisable(GL_BLEND); glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glColorMaski(1, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDepthMask(GL_FALSE); glViewport(0, 0, ScreenW, ScreenH); - glClear(GL_COLOR_BUFFER_BIT); + // TODO: adjust incoming vertices instead of doing this? + glEnable(GL_SCISSOR_TEST); + glScissor(0, ystart * ScaleFactor, ScreenW, (yend-ystart) * ScaleFactor); - // TODO: select more shaders (filtering, etc) - glUseProgram(CompShader); - glUniform1ui(CompScaleLoc, Scale); + int vramcap = -1; + if (AuxUsageMask & (1<<0)) + { + u32 vrambank = (DispCntA >> 18) & 0x3; + if (GPU.VRAMMap_LCDC & (1<> 16) & 0x3; + FinalPassConfig.uDispModeB = (DispCntB >> 16) & 0x1; + FinalPassConfig.uBrightModeA = (MasterBrightnessA >> 14) & 0x3; + FinalPassConfig.uBrightModeB = (MasterBrightnessB >> 14) & 0x3; + FinalPassConfig.uBrightFactorA = std::min(MasterBrightnessA & 0x1F, 16); + FinalPassConfig.uBrightFactorB = std::min(MasterBrightnessB & 0x1F, 16); + + if (AuxUsageMask) + { + glBindTexture(GL_TEXTURE_2D_ARRAY, AuxInputTex); + if ((AuxUsageMask & (1<<0)) && (vramcap == -1)) + { + glTexSubImage3D(GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, 256, 256, 1, GL_RGBA, + GL_UNSIGNED_SHORT_1_5_5_5_REV, AuxInputBuffer[0]); + } + if (AuxUsageMask & (1<<1)) + { + glTexSubImage3D(GL_TEXTURE_2D_ARRAY, 0, 0, 0, 1, 256, 192, 1, GL_RGBA, + GL_UNSIGNED_SHORT_1_5_5_5_REV, AuxInputBuffer[1]); + } + } + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, OutputTex2D[0]); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, OutputTex2D[1]); + + glActiveTexture(GL_TEXTURE2); + u32 modeA = (DispCntA >> 16) & 0x3; + if ((modeA == 2) && (vramcap != -1)) + { + glBindTexture(GL_TEXTURE_2D_ARRAY, CaptureOutput256Tex); + FinalPassConfig.uAuxLayer = vramcap >> 2; + FinalPassConfig.uAuxColorFactor = 63.75f; + } + else if (modeA >= 2) + { + glBindTexture(GL_TEXTURE_2D_ARRAY, AuxInputTex); + FinalPassConfig.uAuxLayer = (modeA - 2); + FinalPassConfig.uAuxColorFactor = 62.f; + } + + glBindBuffer(GL_UNIFORM_BUFFER, FPConfigUBO); + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(FinalPassConfig), &FinalPassConfig); + + glBindBuffer(GL_ARRAY_BUFFER, FPVertexBufferID); + glBindVertexArray(FPVertexArrayID); + glDrawArrays(GL_TRIANGLES, 0, 2*3); + } + + glDisable(GL_SCISSOR_TEST); +} + +void GLRenderer::VBlank() +{ + Rend2D_A->VBlank(); + Rend2D_B->VBlank(); + + RenderScreen(LastLine, 192); + + if (GPU.CaptureEnable) + DoCapture(LastCapLine, 192); + + LastLine = 0; + LastCapLine = 0; +} + +void GLRenderer::VBlankEnd() +{ + AuxUsageMask = 0; +} - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, CompScreenInputTex); - if (gpu.Framebuffer[backbuf][0] && gpu.Framebuffer[backbuf][1]) +void GLRenderer::DoCapture(int ystart, int yend) +{ + u32 dispcnt = DispCntA; + u32 capcnt = CaptureCnt; + u32 dispmode = (dispcnt >> 16) & 0x3; + u32 srcA = (capcnt >> 24) & 0x1; + u32 srcB = (capcnt >> 25) & 0x1; + u32 srcBblock = (dispcnt >> 18) & 0x3; + u32 srcBoffset = (dispmode == 2) ? 0 : ((capcnt >> 26) & 0x3); + u32 dstblock = (capcnt >> 16) & 0x3; + u32 dstoffset = (capcnt >> 18) & 0x3; + u32 capsize = (capcnt >> 20) & 0x3; + u32 dstmode = (capcnt >> 29) & 0x3; + u32 eva = std::min(capcnt & 0x1F, 16u); + u32 evb = std::min((capcnt >> 8) & 0x1F, 16u); + + // determine the region we're going to capture to + + int dstwidth, dstheight; + + if (capsize == 0) + { + dstwidth = 128; + dstheight = 128; + } + else { - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256*3 + 1, 192, GL_RGBA_INTEGER, - GL_UNSIGNED_BYTE, gpu.Framebuffer[backbuf][0].get()); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256*3 + 1, 192, GL_RGBA_INTEGER, - GL_UNSIGNED_BYTE, gpu.Framebuffer[backbuf][1].get()); + dstwidth = 256; + dstheight = 64 * capsize; } + if (ystart >= dstheight) + return; + if (yend > dstheight) + yend = dstheight; + + glUseProgram(CaptureShader); + + GLuint inputA; + if (srcA) + inputA = OutputTex3D; + else + inputA = OutputTex2D[0]; + + bool useSrcB = (dstmode == 1) || (dstmode == 2 && evb > 0); + + GLuint inputB = AuxInputTex; + u32 layerB = srcB; + CaptureConfig.uSrcBColorFactor = 248.f; + + if (useSrcB && (Aux0VRAMCap != -1)) + { + // hi-res VRAM + if (dstblock == srcBblock) + { + // we are reading from the same block we are capturing to + // on hardware, it would read the old VRAM contents, then write new stuff + // but we can't do that with OpenGL + // so we need to blit it to a temporary framebuffer + + int blitY0 = (srcBoffset * 64) + ystart; + int blitY1 = (srcBoffset * 64) + yend; + + if (dstoffset != srcBoffset) + Log(LogLevel::Error, "GPU_OpenGL: MISMATCHED VRAM OFFSETS ON SAME BANK!!! bank=%d src=%d dst=%d\n", + dstblock, srcBoffset, dstoffset); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, CaptureOutput256FB[srcBblock]); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CaptureVRAMFB); + + if (blitY1 > 256) + { + // wraparound + glBlitFramebuffer(0, blitY0*ScaleFactor, 256*ScaleFactor, 256*ScaleFactor, + 0, blitY0*ScaleFactor, 256*ScaleFactor, 256*ScaleFactor, + GL_COLOR_BUFFER_BIT, GL_NEAREST); + glBlitFramebuffer(0, 0, 256*ScaleFactor, (blitY1-256)*ScaleFactor, + 0, 0, 256*ScaleFactor, (blitY1-256)*ScaleFactor, + GL_COLOR_BUFFER_BIT, GL_NEAREST); + } + else + { + // straightforward + glBlitFramebuffer(0, blitY0*ScaleFactor, 256*ScaleFactor, blitY1*ScaleFactor, + 0, blitY0*ScaleFactor, 256*ScaleFactor, blitY1*ScaleFactor, + GL_COLOR_BUFFER_BIT, GL_NEAREST); + } + + inputB = CaptureVRAMTex; + layerB = 0; + } + else + { + // if it's a different bank, we can just use it as-is + inputB = CaptureOutput256Tex; + layerB = srcBblock; + } + + CaptureConfig.uSrcBColorFactor = 255.f; + } + + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + if (capsize == 0) + { + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CaptureOutput128FB[(dstblock << 2) | dstoffset]); + glViewport(0, 0, 128*ScaleFactor, 128*ScaleFactor); + } + else + { + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CaptureOutput256FB[dstblock]); + glViewport(0, 0, 256*ScaleFactor, 256*ScaleFactor); + } + + CaptureConfig.uInvCaptureSize[0] = 1.f / (float)dstwidth; + CaptureConfig.uInvCaptureSize[1] = 1.f / (float)dstheight; + + CaptureConfig.uSrcALayer = srcA; + + if (srcB == 0) + CaptureConfig.uSrcBOffset = 64 * srcBoffset; + else + CaptureConfig.uSrcBOffset = 0; + + CaptureConfig.uSrcBLayer = layerB; + + CaptureConfig.uDstMode = dstmode; + CaptureConfig.uBlendFactors[0] = eva; + CaptureConfig.uBlendFactors[1] = evb; + + glBindBuffer(GL_UNIFORM_BUFFER, CaptureConfigUBO); + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(CaptureConfig), &CaptureConfig); + + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, inputA); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER); + glActiveTexture(GL_TEXTURE1); - renderer.SetupAccelFrame(); + glBindTexture(GL_TEXTURE_2D_ARRAY, inputB); + + u16 vtxbuf[12 * 4]; + u16* vptr = vtxbuf; + int numvtx; + + // y0/y1 = coordinates in destination buffer + // t0/t1 = coordinates in source buffers + if (capsize == 0) dstoffset = 0; + int y0 = (dstoffset * 64) + ystart; + int y1 = (dstoffset * 64) + yend; + int t0 = ystart; + int t1 = yend; + + int bufferheight = (capsize == 0) ? 128 : 256; + if (y1 > bufferheight) + { + // wraparound + int y2 = bufferheight; + int t2 = t0 + (y2 - y0); + *vptr++ = 0; *vptr++ = y2; *vptr++ = 0; *vptr++ = t2; + *vptr++ = dstwidth; *vptr++ = y0; *vptr++ = dstwidth; *vptr++ = t0; + *vptr++ = dstwidth; *vptr++ = y2; *vptr++ = dstwidth; *vptr++ = t2; + *vptr++ = 0; *vptr++ = y2; *vptr++ = 0; *vptr++ = t2; + *vptr++ = 0; *vptr++ = y0; *vptr++ = 0; *vptr++ = t0; + *vptr++ = dstwidth; *vptr++ = y0; *vptr++ = dstwidth; *vptr++ = t0; + + y2 = y1 - bufferheight; + *vptr++ = 0; *vptr++ = y2; *vptr++ = 0; *vptr++ = t1; + *vptr++ = dstwidth; *vptr++ = 0; *vptr++ = dstwidth; *vptr++ = t2; + *vptr++ = dstwidth; *vptr++ = y2; *vptr++ = dstwidth; *vptr++ = t1; + *vptr++ = 0; *vptr++ = y2; *vptr++ = 0; *vptr++ = t1; + *vptr++ = 0; *vptr++ = 0; *vptr++ = 0; *vptr++ = t2; + *vptr++ = dstwidth; *vptr++ = 0; *vptr++ = dstwidth; *vptr++ = t2; + + numvtx = 12; + } + else + { + // straightforward + *vptr++ = 0; *vptr++ = y1; *vptr++ = 0; *vptr++ = t1; + *vptr++ = dstwidth; *vptr++ = y0; *vptr++ = dstwidth; *vptr++ = t0; + *vptr++ = dstwidth; *vptr++ = y1; *vptr++ = dstwidth; *vptr++ = t1; + *vptr++ = 0; *vptr++ = y1; *vptr++ = 0; *vptr++ = t1; + *vptr++ = 0; *vptr++ = y0; *vptr++ = 0; *vptr++ = t0; + *vptr++ = dstwidth; *vptr++ = y0; *vptr++ = dstwidth; *vptr++ = t0; + + numvtx = 6; + } + + glBindBuffer(GL_ARRAY_BUFFER, CaptureVtxBuffer); + glBufferSubData(GL_ARRAY_BUFFER, 0, numvtx * 4 * sizeof(u16), vtxbuf); + + glBindVertexArray(CaptureVtxArray); + glDrawArrays(GL_TRIANGLES, 0, numvtx); +} + + +void GLRenderer::AllocCapture(u32 bank, u32 start, u32 len) +{ + auto rend2D = dynamic_cast(Rend2D_A.get()); + rend2D->LayerConfigDirty = true; + rend2D->SpriteConfigDirty = true; + rend2D = dynamic_cast(Rend2D_B.get()); + rend2D->LayerConfigDirty = true; + rend2D->SpriteConfigDirty = true; +} + +void GLRenderer::DownscaleCapture(int width, int height, int layer) +{ + // downscale a hi-res capture buffer to 1x IR, and convert to RGBA5551 + // we need to do this with a shader so we can accurately downscale color components + + glUseProgram(CapDownShader); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, CaptureSyncFB); + + glViewport(0, 0, width, height); + + glActiveTexture(GL_TEXTURE0); + if (width == 128) + glBindTexture(GL_TEXTURE_2D_ARRAY, CaptureOutput128Tex); + else + glBindTexture(GL_TEXTURE_2D_ARRAY, CaptureOutput256Tex); + glUniform1i(CapDownInputLayerULoc, layer); + + glBindBuffer(GL_ARRAY_BUFFER, RectVtxBuffer); + glBindVertexArray(RectVtxArray); + glDrawArrays(GL_TRIANGLES, 0, 2*3); +} + +void GLRenderer::SyncVRAMCapture(u32 bank, u32 start, u32 len, bool complete) +{ + if (!complete) + Log(LogLevel::Error, "GPU_OpenGL: !!! READING VRAM AS IT IS BEING CAPTURED TO\n"); + + u8* vram = GPU.VRAM[bank]; + + glDisable(GL_DITHER); + + if (len == 0) // 128x128 + { + DownscaleCapture(128, 128, (bank<<2) | start); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, CaptureSyncFB); + + glReadPixels(0, 0, 128, 128, + GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, &vram[start * 64 * 512]); + + for (u32 j = start * 64; j < (start+1) * 64; j++) + GPU.VRAMDirty[bank][j] = true; + } + else + { + DownscaleCapture(256, 256, bank); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, CaptureSyncFB); + + u32 pos = start; + for (u32 i = 0; i < len;) + { + u32 end = pos + len; + if (end > 4) + end = 4; - glBindBuffer(GL_ARRAY_BUFFER, CompVertexBufferID); - glBindVertexArray(CompVertexArrayID); - glDrawArrays(GL_TRIANGLES, 0, 4*3); + glReadPixels(0, pos * 64, 256, (end - pos) * 64, + GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, &vram[pos * 64 * 512]); + + for (u32 j = pos * 64; j < end * 64; j++) + GPU.VRAMDirty[bank][j] = true; + + i += (end - pos); + pos += (end - pos); + pos &= 3; + } + } +} + + +bool GLRenderer::GetFramebuffers(void** top, void** bottom) +{ + // since we use an array texture, we only need one of the pointer fields + int frontbuf = BackBuffer ^ 1; + *top = &FPOutputTex[frontbuf]; + *bottom = nullptr; + return false; +} + + +bool GLRenderer::NeedsShaderCompile() +{ + return Rend3D->NeedsShaderCompile(); } -void GLCompositor::BindOutputTexture(int buf) +void GLRenderer::ShaderCompileStep(int& current, int& count) { - glBindTexture(GL_TEXTURE_2D, CompScreenOutputTex[buf]); + return Rend3D->ShaderCompileStep(current, count); } } diff --git a/src/GPU_OpenGL.h b/src/GPU_OpenGL.h index 3461ca8150..503b52e9d9 100644 --- a/src/GPU_OpenGL.h +++ b/src/GPU_OpenGL.h @@ -16,56 +16,140 @@ with melonDS. If not, see http://www.gnu.org/licenses/. */ -#pragma once +#ifndef GPU_OPENGL_H +#define GPU_OPENGL_H #include "OpenGLSupport.h" - -#include -#include +#include "GPU.h" +#include "GPU2D_OpenGL.h" +#include "GPU3D_OpenGL.h" +#include "GPU3D_Compute.h" namespace melonDS { -class GPU; -struct RenderSettings; -class GLRenderer; -class Renderer3D; -class GLCompositor + +class GLRenderer : public Renderer { public: - static std::optional New() noexcept; - GLCompositor(const GLCompositor&) = delete; - GLCompositor& operator=(const GLCompositor&) = delete; - GLCompositor(GLCompositor&&) noexcept; - GLCompositor& operator=(GLCompositor&&) noexcept; - ~GLCompositor(); - - void SetScaleFactor(int scale) noexcept; - [[nodiscard]] int GetScaleFactor() const noexcept { return Scale; } - - void Stop(const GPU& gpu) noexcept; - void RenderFrame(const GPU& gpu, Renderer3D& renderer) noexcept; - void BindOutputTexture(int buf); + GLRenderer(melonDS::NDS& nds, bool compute); + ~GLRenderer() override; + bool Init() override; + void Reset() override; + void Stop() override; + + void PostSavestate() override; + + void SetRenderSettings(RendererSettings& settings) override; + + void DrawScanline(u32 line) override; + void DrawSprites(u32 line) override; + + void VBlank() override; + void VBlankEnd() override; + + void AllocCapture(u32 bank, u32 start, u32 len) override; + void SyncVRAMCapture(u32 bank, u32 start, u32 len, bool complete) override; + + bool GetFramebuffers(void** top, void** bottom) override; + + bool NeedsShaderCompile() override; + void ShaderCompileStep(int& current, int& count) override; + private: - GLCompositor(GLuint CompShader) noexcept; - int Scale = 0; - int ScreenH = 0, ScreenW = 0; + friend class GLRenderer2D; + friend class GLRenderer3D; + friend class ComputeRenderer3D; + + bool IsCompute; + + int ScaleFactor; + int ScreenW, ScreenH; + + GLuint RectVtxBuffer; + GLuint RectVtxArray; + + GLuint OutputTex3D; + GLuint OutputTex2D[2]; + + struct sFinalPassConfig + { + u32 uScreenSwap[192]; + u32 uScaleFactor; + u32 uAuxLayer; + u32 uDispModeA; + u32 uDispModeB; + u32 uBrightModeA; + u32 uBrightModeB; + u32 uBrightFactorA; + u32 uBrightFactorB; + float uAuxColorFactor; + u32 __pad0[3]; + } FinalPassConfig; + + GLuint FPShader; + GLuint FPConfigUBO; + + GLuint FPVertexBufferID; + GLuint FPVertexArrayID; + + GLuint AuxInputTex; // aux input (VRAM and mainmem FIFO) - GLuint CompShader {}; - GLuint CompScaleLoc = 0; + // texture/fb for display capture VRAM input + GLuint CaptureVRAMTex; + GLuint CaptureVRAMFB; - GLuint CompVertexBufferID = 0; - GLuint CompVertexArrayID = 0; + GLuint FPOutputTex[2]; // final output + GLuint FPOutputFB[2]; - struct CompVertex + struct sCaptureConfig { - std::array Position {}; - std::array Texcoord {}; - }; - std::array CompVertices {}; - - GLuint CompScreenInputTex = 0; - std::array CompScreenOutputTex {}; - std::array CompScreenOutputFB {}; + float uInvCaptureSize[2]; + u32 uSrcALayer; + u32 uSrcBLayer; + u32 uSrcBOffset; + u32 uDstMode; + u32 uBlendFactors[2]; + float uSrcAOffset[192]; + float uSrcBColorFactor; + u32 __pad0[3]; + } CaptureConfig; + + GLuint CaptureShader; + GLuint CaptureConfigUBO; + + GLuint CaptureVtxBuffer; + GLuint CaptureVtxArray; + + GLuint CaptureOutput256FB[4]; + GLuint CaptureOutput256Tex; + GLuint CaptureOutput128FB[16]; + GLuint CaptureOutput128Tex; + + GLuint CapDownShader; + GLint CapDownInputLayerULoc; + + GLuint CaptureSyncFB; + GLuint CaptureSyncTex; + + u16* AuxInputBuffer[2]; + u8 AuxUsageMask; + + u32 DispCntA, DispCntB; + u16 MasterBrightnessA, MasterBrightnessB; + u32 CaptureCnt; + + bool NeedPartialRender; + int LastLine; + int LastCapLine; + int Aux0VRAMCap; + + void SetScaleFactor(int scale); + + void RenderScreen(int ystart, int yend); + void DoCapture(int ystart, int yend); + void DownscaleCapture(int width, int height, int layer); }; } + +#endif // GPU_OPENGL_H diff --git a/src/GPU_OpenGL_shaders.h b/src/GPU_OpenGL_shaders.h deleted file mode 100644 index 04af221dd4..0000000000 --- a/src/GPU_OpenGL_shaders.h +++ /dev/null @@ -1,874 +0,0 @@ -/* - Copyright 2016-2025 melonDS team - - This file is part of melonDS. - - melonDS is free software: you can redistribute it and/or modify it under - the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - melonDS is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with melonDS. If not, see http://www.gnu.org/licenses/. -*/ - -#ifndef GPU_OPENGL_SHADERS_H -#define GPU_OPENGL_SHADERS_H - -namespace melonDS -{ -const char* kCompositorVS = R"(#version 140 - -in vec2 vPosition; -in vec2 vTexcoord; - -smooth out vec2 fTexcoord; - -void main() -{ - vec4 fpos; - fpos.xy = vPosition; - fpos.z = 0.0; - fpos.w = 1.0; - - gl_Position = fpos; - fTexcoord = vTexcoord; -} -)"; - -const char* kCompositorFS_Nearest = R"(#version 140 - -uniform uint u3DScale; - -uniform usampler2D ScreenTex; -uniform sampler2D _3DTex; - -smooth in vec2 fTexcoord; - -out vec4 oColor; - -void main() -{ - ivec4 pixel = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord), 0)); - - ivec4 mbright = ivec4(texelFetch(ScreenTex, ivec2(256*3, int(fTexcoord.y)), 0)); - int dispmode = mbright.b & 0x3; - - // mbright.a == HOFS bit0..7 - // mbright.b bit7 == HOFS bit8 (sign) - float _3dxpos = float(mbright.a - ((mbright.b & 0x80) * 2)); - - if (dispmode == 1) - { - ivec4 val1 = pixel; - ivec4 val2 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(256,0), 0)); - ivec4 val3 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(512,0), 0)); - - int compmode = val3.a & 0xF; - int eva, evb, evy; - - if (compmode == 4) - { - // 3D on top, blending - - float xpos = fTexcoord.x + _3dxpos; - float ypos = mod(fTexcoord.y, 192); - ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra - * vec4(63,63,63,31)); - - if (_3dpix.a > 0) - { - eva = (_3dpix.a & 0x1F) + 1; - evb = 32 - eva; - - val1 = ((_3dpix * eva) + (val1 * evb) + 0x10) >> 5; - val1 = min(val1, 0x3F); - } - else - val1 = val2; - } - else if (compmode == 1) - { - // 3D on bottom, blending - - float xpos = fTexcoord.x + _3dxpos; - float ypos = mod(fTexcoord.y, 192); - ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra - * vec4(63,63,63,31)); - - if (_3dpix.a > 0) - { - eva = val3.g; - evb = val3.b; - - val1 = ((val1 * eva) + (_3dpix * evb) + 0x8) >> 4; - val1 = min(val1, 0x3F); - } - else - val1 = val2; - } - else if (compmode <= 3) - { - // 3D on top, normal/fade - - float xpos = fTexcoord.x + _3dxpos; - float ypos = mod(fTexcoord.y, 192); - ivec4 _3dpix = ivec4(texelFetch(_3DTex, ivec2(vec2(xpos, ypos)*u3DScale), 0).bgra - * vec4(63,63,63,31)); - - if (_3dpix.a > 0) - { - evy = val3.g; - - val1 = _3dpix; - if (compmode == 2) val1 += (((0x3F - val1) * evy) + 0x8) >> 4; - else if (compmode == 3) val1 -= ((val1 * evy) + 0x7) >> 4; - } - else - val1 = val2; - } - - pixel = val1; - } - - if (dispmode != 0) - { - int brightmode = mbright.g >> 6; - if (brightmode == 1) - { - // up - int evy = mbright.r & 0x1F; - if (evy > 16) evy = 16; - - pixel += ((0x3F - pixel) * evy) >> 4; - } - else if (brightmode == 2) - { - // down - int evy = mbright.r & 0x1F; - if (evy > 16) evy = 16; - - pixel -= ((pixel * evy) + 0xF) >> 4; - } - } - - pixel.rgb <<= 2; - pixel.rgb |= (pixel.rgb >> 6); - - // TODO: filters - - oColor = vec4(vec3(pixel.bgr) / 255.0, 1.0); -} -)"; - - - -const char* kCompositorFS_Linear = R"(#version 140 - -uniform uint u3DScale; - -uniform usampler2D ScreenTex; -uniform sampler2D _3DTex; - -smooth in vec2 fTexcoord; - -out vec4 oColor; - -ivec4 Get3DPixel(vec2 pos) -{ - return ivec4(texelFetch(_3DTex, ivec2(pos*u3DScale), 0).bgra - * vec4(63,63,63,31)); -} - -ivec4 GetFullPixel(ivec4 val1, ivec4 val2, ivec4 val3, ivec4 _3dpix) -{ - int compmode = val3.a & 0xF; - int eva, evb, evy; - - if (compmode == 4) - { - // 3D on top, blending - - if (_3dpix.a > 0) - { - eva = (_3dpix.a & 0x1F) + 1; - evb = 32 - eva; - - val1 = ((_3dpix * eva) + (val1 * evb)) >> 5; - if (eva <= 16) val1 += ivec4(1,1,1,0); - val1 = min(val1, 0x3F); - } - else - val1 = val2; - } - else if (compmode == 1) - { - // 3D on bottom, blending - - if (_3dpix.a > 0) - { - eva = val3.g; - evb = val3.b; - - val1 = ((val1 * eva) + (_3dpix * evb)) >> 4; - val1 = min(val1, 0x3F); - } - else - val1 = val2; - } - else if (compmode <= 3) - { - // 3D on top, normal/fade - - if (_3dpix.a > 0) - { - evy = val3.g; - - val1 = _3dpix; - if (compmode == 2) val1 += ((ivec4(0x3F,0x3F,0x3F,0) - val1) * evy) >> 4; - else if (compmode == 3) val1 -= (val1 * evy) >> 4; - } - else - val1 = val2; - } - - return val1; -} - -ivec4 imix(ivec4 a, ivec4 b, float x) -{ - return ivec4(vec4(a)*(1-x) + vec4(b)*x); -} - -void main() -{ - ivec4 pixel = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord), 0)); - - ivec4 mbright = ivec4(texelFetch(ScreenTex, ivec2(256*3, int(fTexcoord.y)), 0)); - int dispmode = mbright.b & 0x3; - - if (dispmode == 1) - { - ivec4 val1 = pixel; - ivec4 val2 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(256,0), 0)); - ivec4 val3 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(512,0), 0)); - - float xfract = fract(fTexcoord.x); - float yfract = fract(fTexcoord.y); - - float xpos = val3.r + xfract; - float ypos = mod(fTexcoord.y, 192); - ivec4 _3dpix = Get3DPixel(vec2(xpos,ypos)); - - ivec4 p00 = GetFullPixel(val1, val2, val3, _3dpix); - - int xdisp = 1 - int(step(255, fTexcoord.x)); - int ydisp = 1 - int(step(191, ypos)); - - ivec4 p01 = GetFullPixel(ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(xdisp+0 ,0), 0)), - ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(xdisp+256,0), 0)), - ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(xdisp+512,0), 0)), - _3dpix); - - ivec4 p10 = GetFullPixel(ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(0+0 ,ydisp), 0)), - ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(0+256,ydisp), 0)), - ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(0+512,ydisp), 0)), - _3dpix); - - ivec4 p11 = GetFullPixel(ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(xdisp+0 ,ydisp), 0)), - ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(xdisp+256,ydisp), 0)), - ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(xdisp+512,ydisp), 0)), - _3dpix); - - ivec4 pa = imix(p00, p01, xfract); - ivec4 pb = imix(p10, p11, xfract); - - pixel = imix(pa, pb, yfract); - } - - if (dispmode != 0) - { - int brightmode = mbright.g >> 6; - if (brightmode == 1) - { - // up - int evy = mbright.r & 0x1F; - if (evy > 16) evy = 16; - - pixel += ((ivec4(0x3F,0x3F,0x3F,0) - pixel) * evy) >> 4; - } - else if (brightmode == 2) - { - // down - int evy = mbright.r & 0x1F; - if (evy > 16) evy = 16; - - pixel -= (pixel * evy) >> 4; - } - } - - pixel.rgb <<= 2; - pixel.rgb |= (pixel.rgb >> 6); - - // TODO: filters - - oColor = vec4(vec3(pixel.bgr) / 255.0, 1.0); -} -)"; - - - - - - -// HUGE TEST ZONE ARRLGD - -const char* kCompositorVS_xBRZ = R"(#version 140 - -#define BLEND_NONE 0 -#define BLEND_NORMAL 1 -#define BLEND_DOMINANT 2 -#define LUMINANCE_WEIGHT 1.0 -#define EQUAL_COLOR_TOLERANCE 30.0/255.0 -#define STEEP_DIRECTION_THRESHOLD 2.2 -#define DOMINANT_DIRECTION_THRESHOLD 3.6 - -#if __VERSION__ >= 130 -#define COMPAT_VARYING out -#define COMPAT_ATTRIBUTE in -#define COMPAT_TEXTURE texture -#else -#define COMPAT_VARYING varying -#define COMPAT_ATTRIBUTE attribute -#define COMPAT_TEXTURE texture2D -#endif - -#ifdef GL_ES -#define COMPAT_PRECISION mediump -#else -#define COMPAT_PRECISION -#endif - -COMPAT_ATTRIBUTE vec2 vPosition; -COMPAT_VARYING vec4 TEX0; -COMPAT_VARYING vec4 t1; -COMPAT_VARYING vec4 t2; -COMPAT_VARYING vec4 t3; -COMPAT_VARYING vec4 t4; -COMPAT_VARYING vec4 t5; -COMPAT_VARYING vec4 t6; -COMPAT_VARYING vec4 t7; - -uniform COMPAT_PRECISION int FrameDirection; -uniform COMPAT_PRECISION int FrameCount; -uniform COMPAT_PRECISION vec2 OutputSize; -uniform COMPAT_PRECISION vec2 TextureSize; -uniform COMPAT_PRECISION vec2 InputSize; - -// vertex compatibility #defines -#define vTexCoord TEX0.xy -#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize -#define outsize vec4(OutputSize, 1.0 / OutputSize) - -void main() -{ - vec4 fpos; - fpos.xy = vPosition; - fpos.z = 0.0; - fpos.w = 1.0; - - gl_Position = fpos; - vec2 TexCoord = (vPosition + vec2(1.0, 1.0)) * (vec2(256.0, 384.0) / 2.0); - - - //gl_Position = MVPMatrix * VertexCoord; - //COL0 = COLOR; - TEX0.xy = TexCoord.xy; - vec2 ps = vec2(1,1);//vec2(SourceSize.z, SourceSize.w); - float dx = ps.x; - float dy = ps.y; - - // A1 B1 C1 - // A0 A B C C4 - // D0 D E F F4 - // G0 G H I I4 - // G5 H5 I5 - - t1 = vTexCoord.xxxy + vec4( -dx, 0.0, dx,-2.0*dy); // A1 B1 C1 - t2 = vTexCoord.xxxy + vec4( -dx, 0.0, dx, -dy); // A B C - t3 = vTexCoord.xxxy + vec4( -dx, 0.0, dx, 0.0); // D E F - t4 = vTexCoord.xxxy + vec4( -dx, 0.0, dx, dy); // G H I - t5 = vTexCoord.xxxy + vec4( -dx, 0.0, dx, 2.0*dy); // G5 H5 I5 - t6 = vTexCoord.xyyy + vec4(-2.0*dx,-dy, 0.0, dy); // A0 D0 G0 - t7 = vTexCoord.xyyy + vec4( 2.0*dx,-dy, 0.0, dy); // C4 F4 I4 -} -)"; - -const char* kCompositorFS_xBRZ = R"(#version 140 - -#define BLEND_NONE 0 -#define BLEND_NORMAL 1 -#define BLEND_DOMINANT 2 -#define LUMINANCE_WEIGHT 1.0 -#define EQUAL_COLOR_TOLERANCE 30.0/255.0 -#define STEEP_DIRECTION_THRESHOLD 2.2 -#define DOMINANT_DIRECTION_THRESHOLD 3.6 - -#if __VERSION__ >= 130 -#define COMPAT_VARYING in -//#define COMPAT_TEXTURE texture -#define FragColor oColor -#else -#define COMPAT_VARYING varying -#define FragColor gl_FragColor -//#define COMPAT_TEXTURE texture2D -#endif - -#ifdef GL_ES -#ifdef GL_FRAGMENT_PRECISION_HIGH -precision highp float; -#else -precision mediump float; -#endif -#define COMPAT_PRECISION mediump -#else -#define COMPAT_PRECISION -#endif - -uniform uint u3DScale; - -uniform usampler2D ScreenTex; -uniform sampler2D _3DTex; - -smooth in vec2 fTexcoord; - -out vec4 oColor; - -//uniform COMPAT_PRECISION vec2 OutputSize; -//uniform COMPAT_PRECISION vec2 TextureSize; -#define TextureSize vec2(256,384) -//uniform COMPAT_PRECISION vec2 InputSize; -//uniform sampler2D Texture; -#define Texture 1312 -COMPAT_VARYING vec4 TEX0; -COMPAT_VARYING vec4 t1; -COMPAT_VARYING vec4 t2; -COMPAT_VARYING vec4 t3; -COMPAT_VARYING vec4 t4; -COMPAT_VARYING vec4 t5; -COMPAT_VARYING vec4 t6; -COMPAT_VARYING vec4 t7; - -// fragment compatibility #defines -#define Source Texture -#define vTexCoord TEX0.xy - -#define SourceSize vec4(TextureSize, 1.0 / TextureSize) //either TextureSize or InputSize -#define outsize vec4(OutputSize, 1.0 / OutputSize) - - const float one_sixth = 1.0 / 6.0; - const float two_sixth = 2.0 / 6.0; - const float four_sixth = 4.0 / 6.0; - const float five_sixth = 5.0 / 6.0; - -vec4 Get2DPixel(vec2 texcoord, int level) -{ - ivec4 pixel = ivec4(texelFetch(ScreenTex, ivec2(texcoord) + ivec2(level*256,0), 0)); - - return vec4(pixel) / vec4(63.0, 63.0, 63.0, 31.0); -} - -ivec4 Get3DPixel(vec2 pos) -{ - return ivec4(texelFetch(_3DTex, ivec2(pos*u3DScale), 0).bgra - * vec4(63,63,63,31)); -} - -float reduce(const vec3 color) -{ - return dot(color, vec3(65536.0, 256.0, 1.0)); -} - -float DistYCbCr(const vec3 pixA, const vec3 pixB) -{ - const vec3 w = vec3(0.2627, 0.6780, 0.0593); - const float scaleB = 0.5 / (1.0 - w.b); - const float scaleR = 0.5 / (1.0 - w.r); - vec3 diff = pixA - pixB; - float Y = dot(diff, w); - float Cb = scaleB * (diff.b - Y); - float Cr = scaleR * (diff.r - Y); - - return sqrt( ((LUMINANCE_WEIGHT * Y) * (LUMINANCE_WEIGHT * Y)) + (Cb * Cb) + (Cr * Cr) ); -} - -bool IsPixEqual(const vec3 pixA, const vec3 pixB) -{ - return (DistYCbCr(pixA, pixB) < EQUAL_COLOR_TOLERANCE); -} - -bool IsBlendingNeeded(const ivec4 blend) -{ - return any(notEqual(blend, ivec4(BLEND_NONE))); -} - -//--------------------------------------- -// Input Pixel Mapping: --|21|22|23|-- -// 19|06|07|08|09 -// 18|05|00|01|10 -// 17|04|03|02|11 -// --|15|14|13|-- -// -// Output Pixel Mapping: 20|21|22|23|24|25 -// 19|06|07|08|09|26 -// 18|05|00|01|10|27 -// 17|04|03|02|11|28 -// 16|15|14|13|12|29 -// 35|34|33|32|31|30 - -ivec4 GetFiltered2DPixel(int level) -{ - vec2 f = fract(vTexCoord.xy);// * SourceSize.xy); - - //--------------------------------------- - // Input Pixel Mapping: 20|21|22|23|24 - // 19|06|07|08|09 - // 18|05|00|01|10 - // 17|04|03|02|11 - // 16|15|14|13|12 - - vec3 src[25]; - - src[21] = Get2DPixel(t1.xw, level).rgb; - src[22] = Get2DPixel(t1.yw, level).rgb; - src[23] = Get2DPixel(t1.zw, level).rgb; - src[ 6] = Get2DPixel(t2.xw, level).rgb; - src[ 7] = Get2DPixel(t2.yw, level).rgb; - src[ 8] = Get2DPixel(t2.zw, level).rgb; - src[ 5] = Get2DPixel(t3.xw, level).rgb; - src[ 0] = Get2DPixel(t3.yw, level).rgb; - src[ 1] = Get2DPixel(t3.zw, level).rgb; - src[ 4] = Get2DPixel(t4.xw, level).rgb; - src[ 3] = Get2DPixel(t4.yw, level).rgb; - src[ 2] = Get2DPixel(t4.zw, level).rgb; - src[15] = Get2DPixel(t5.xw, level).rgb; - src[14] = Get2DPixel(t5.yw, level).rgb; - src[13] = Get2DPixel(t5.zw, level).rgb; - src[19] = Get2DPixel(t6.xy, level).rgb; - src[18] = Get2DPixel(t6.xz, level).rgb; - src[17] = Get2DPixel(t6.xw, level).rgb; - src[ 9] = Get2DPixel(t7.xy, level).rgb; - src[10] = Get2DPixel(t7.xz, level).rgb; - src[11] = Get2DPixel(t7.xw, level).rgb; - - float v[9]; - v[0] = reduce(src[0]); - v[1] = reduce(src[1]); - v[2] = reduce(src[2]); - v[3] = reduce(src[3]); - v[4] = reduce(src[4]); - v[5] = reduce(src[5]); - v[6] = reduce(src[6]); - v[7] = reduce(src[7]); - v[8] = reduce(src[8]); - - ivec4 blendResult = ivec4(BLEND_NONE); - - // Preprocess corners - // Pixel Tap Mapping: --|--|--|--|-- - // --|--|07|08|-- - // --|05|00|01|10 - // --|04|03|02|11 - // --|--|14|13|-- - // Corner (1, 1) - if ( ((v[0] == v[1] && v[3] == v[2]) || (v[0] == v[3] && v[1] == v[2])) == false) - { - float dist_03_01 = DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + DistYCbCr(src[14], src[ 2]) + DistYCbCr(src[ 2], src[10]) + (4.0 * DistYCbCr(src[ 3], src[ 1])); - float dist_00_02 = DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[ 3], src[13]) + DistYCbCr(src[ 7], src[ 1]) + DistYCbCr(src[ 1], src[11]) + (4.0 * DistYCbCr(src[ 0], src[ 2])); - bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_03_01) < dist_00_02; - blendResult[2] = ((dist_03_01 < dist_00_02) && (v[0] != v[1]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; - } - - // Pixel Tap Mapping: --|--|--|--|-- - // --|06|07|--|-- - // 18|05|00|01|-- - // 17|04|03|02|-- - // --|15|14|--|-- - // Corner (0, 1) - if ( ((v[5] == v[0] && v[4] == v[3]) || (v[5] == v[4] && v[0] == v[3])) == false) - { - float dist_04_00 = DistYCbCr(src[17], src[ 5]) + DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[15], src[ 3]) + DistYCbCr(src[ 3], src[ 1]) + (4.0 * DistYCbCr(src[ 4], src[ 0])); - float dist_05_03 = DistYCbCr(src[18], src[ 4]) + DistYCbCr(src[ 4], src[14]) + DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + (4.0 * DistYCbCr(src[ 5], src[ 3])); - bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_03) < dist_04_00; - blendResult[3] = ((dist_04_00 > dist_05_03) && (v[0] != v[5]) && (v[0] != v[3])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; - } - - // Pixel Tap Mapping: --|--|22|23|-- - // --|06|07|08|09 - // --|05|00|01|10 - // --|--|03|02|-- - // --|--|--|--|-- - // Corner (1, 0) - if ( ((v[7] == v[8] && v[0] == v[1]) || (v[7] == v[0] && v[8] == v[1])) == false) - { - float dist_00_08 = DistYCbCr(src[ 5], src[ 7]) + DistYCbCr(src[ 7], src[23]) + DistYCbCr(src[ 3], src[ 1]) + DistYCbCr(src[ 1], src[ 9]) + (4.0 * DistYCbCr(src[ 0], src[ 8])); - float dist_07_01 = DistYCbCr(src[ 6], src[ 0]) + DistYCbCr(src[ 0], src[ 2]) + DistYCbCr(src[22], src[ 8]) + DistYCbCr(src[ 8], src[10]) + (4.0 * DistYCbCr(src[ 7], src[ 1])); - bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_07_01) < dist_00_08; - blendResult[1] = ((dist_00_08 > dist_07_01) && (v[0] != v[7]) && (v[0] != v[1])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; - } - - // Pixel Tap Mapping: --|21|22|--|-- - // 19|06|07|08|-- - // 18|05|00|01|-- - // --|04|03|--|-- - // --|--|--|--|-- - // Corner (0, 0) - if ( ((v[6] == v[7] && v[5] == v[0]) || (v[6] == v[5] && v[7] == v[0])) == false) - { - float dist_05_07 = DistYCbCr(src[18], src[ 6]) + DistYCbCr(src[ 6], src[22]) + DistYCbCr(src[ 4], src[ 0]) + DistYCbCr(src[ 0], src[ 8]) + (4.0 * DistYCbCr(src[ 5], src[ 7])); - float dist_06_00 = DistYCbCr(src[19], src[ 5]) + DistYCbCr(src[ 5], src[ 3]) + DistYCbCr(src[21], src[ 7]) + DistYCbCr(src[ 7], src[ 1]) + (4.0 * DistYCbCr(src[ 6], src[ 0])); - bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_05_07) < dist_06_00; - blendResult[0] = ((dist_05_07 < dist_06_00) && (v[0] != v[5]) && (v[0] != v[7])) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE; - } - - vec3 dst[16]; - dst[ 0] = src[0]; - dst[ 1] = src[0]; - dst[ 2] = src[0]; - dst[ 3] = src[0]; - dst[ 4] = src[0]; - dst[ 5] = src[0]; - dst[ 6] = src[0]; - dst[ 7] = src[0]; - dst[ 8] = src[0]; - dst[ 9] = src[0]; - dst[10] = src[0]; - dst[11] = src[0]; - dst[12] = src[0]; - dst[13] = src[0]; - dst[14] = src[0]; - dst[15] = src[0]; - - // Scale pixel - if (IsBlendingNeeded(blendResult) == true) - { - float dist_01_04 = DistYCbCr(src[1], src[4]); - float dist_03_08 = DistYCbCr(src[3], src[8]); - bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[4]) && (v[5] != v[4]); - bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[8]) && (v[7] != v[8]); - bool needBlend = (blendResult[2] != BLEND_NONE); - bool doLineBlend = ( blendResult[2] >= BLEND_DOMINANT || - ((blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || - (blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || - (IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[0], src[2]) == false) ) == false ); - - vec3 blendPix = ( DistYCbCr(src[0], src[1]) <= DistYCbCr(src[0], src[3]) ) ? src[1] : src[3]; - dst[ 2] = mix(dst[ 2], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); - dst[ 9] = mix(dst[ 9], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); - dst[10] = mix(dst[10], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); - dst[11] = mix(dst[11], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[12] = mix(dst[12], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); - dst[13] = mix(dst[13], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[14] = mix(dst[14], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); - dst[15] = mix(dst[15], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); - - dist_01_04 = DistYCbCr(src[7], src[2]); - dist_03_08 = DistYCbCr(src[1], src[6]); - haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[2]) && (v[3] != v[2]); - haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[6]) && (v[5] != v[6]); - needBlend = (blendResult[1] != BLEND_NONE); - doLineBlend = ( blendResult[1] >= BLEND_DOMINANT || - !((blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || - (blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || - (IsPixEqual(src[2], src[1]) && IsPixEqual(src[1], src[8]) && IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && !IsPixEqual(src[0], src[8])) ) ); - - blendPix = ( DistYCbCr(src[0], src[7]) <= DistYCbCr(src[0], src[1]) ) ? src[7] : src[1]; - dst[ 1] = mix(dst[ 1], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); - dst[ 6] = mix(dst[ 6], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); - dst[ 7] = mix(dst[ 7], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); - dst[ 8] = mix(dst[ 8], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[ 9] = mix(dst[ 9], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); - dst[10] = mix(dst[10], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[11] = mix(dst[11], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); - dst[12] = mix(dst[12], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); - - dist_01_04 = DistYCbCr(src[5], src[8]); - dist_03_08 = DistYCbCr(src[7], src[4]); - haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[8]) && (v[1] != v[8]); - haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[4]) && (v[3] != v[4]); - needBlend = (blendResult[0] != BLEND_NONE); - doLineBlend = ( blendResult[0] >= BLEND_DOMINANT || - !((blendResult[3] != BLEND_NONE && !IsPixEqual(src[0], src[8])) || - (blendResult[1] != BLEND_NONE && !IsPixEqual(src[0], src[4])) || - (IsPixEqual(src[8], src[7]) && IsPixEqual(src[7], src[6]) && IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && !IsPixEqual(src[0], src[6])) ) ); - - blendPix = ( DistYCbCr(src[0], src[5]) <= DistYCbCr(src[0], src[7]) ) ? src[5] : src[7]; - dst[ 0] = mix(dst[ 0], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); - dst[15] = mix(dst[15], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); - dst[ 4] = mix(dst[ 4], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); - dst[ 5] = mix(dst[ 5], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[ 6] = mix(dst[ 6], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); - dst[ 7] = mix(dst[ 7], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[ 8] = mix(dst[ 8], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); - dst[ 9] = mix(dst[ 9], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); - - - dist_01_04 = DistYCbCr(src[3], src[6]); - dist_03_08 = DistYCbCr(src[5], src[2]); - haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_01_04 <= dist_03_08) && (v[0] != v[6]) && (v[7] != v[6]); - haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_03_08 <= dist_01_04) && (v[0] != v[2]) && (v[1] != v[2]); - needBlend = (blendResult[3] != BLEND_NONE); - doLineBlend = ( blendResult[3] >= BLEND_DOMINANT || - !((blendResult[2] != BLEND_NONE && !IsPixEqual(src[0], src[6])) || - (blendResult[0] != BLEND_NONE && !IsPixEqual(src[0], src[2])) || - (IsPixEqual(src[6], src[5]) && IsPixEqual(src[5], src[4]) && IsPixEqual(src[4], src[3]) && IsPixEqual(src[3], src[2]) && !IsPixEqual(src[0], src[4])) ) ); - - blendPix = ( DistYCbCr(src[0], src[3]) <= DistYCbCr(src[0], src[5]) ) ? src[3] : src[5]; - dst[ 3] = mix(dst[ 3], blendPix, (needBlend && doLineBlend) ? ((haveShallowLine) ? ((haveSteepLine) ? 1.0/3.0 : 0.25) : ((haveSteepLine) ? 0.25 : 0.00)) : 0.00); - dst[12] = mix(dst[12], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.25 : 0.00); - dst[13] = mix(dst[13], blendPix, (needBlend && doLineBlend && haveSteepLine) ? 0.75 : 0.00); - dst[14] = mix(dst[14], blendPix, (needBlend) ? ((doLineBlend) ? ((haveSteepLine) ? 1.00 : ((haveShallowLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[15] = mix(dst[15], blendPix, (needBlend) ? ((doLineBlend) ? 1.00 : 0.6848532563) : 0.00); - dst[ 4] = mix(dst[ 4], blendPix, (needBlend) ? ((doLineBlend) ? ((haveShallowLine) ? 1.00 : ((haveSteepLine) ? 0.75 : 0.50)) : 0.08677704501) : 0.00); - dst[ 5] = mix(dst[ 5], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.75 : 0.00); - dst[ 6] = mix(dst[ 6], blendPix, (needBlend && doLineBlend && haveShallowLine) ? 0.25 : 0.00); - } - - vec3 res = mix( mix( mix( mix(dst[ 6], dst[ 7], step(0.25, f.x)), mix(dst[ 8], dst[ 9], step(0.75, f.x)), step(0.50, f.x)), - mix( mix(dst[ 5], dst[ 0], step(0.25, f.x)), mix(dst[ 1], dst[10], step(0.75, f.x)), step(0.50, f.x)), step(0.25, f.y)), - mix( mix( mix(dst[ 4], dst[ 3], step(0.25, f.x)), mix(dst[ 2], dst[11], step(0.75, f.x)), step(0.50, f.x)), - mix( mix(dst[15], dst[14], step(0.25, f.x)), mix(dst[13], dst[12], step(0.75, f.x)), step(0.50, f.x)), step(0.75, f.y)), - step(0.50, f.y)); - - return ivec4(res * vec3(63,63,63), 0); -} - - -void main() -{ - vec2 fTexcoord = vTexCoord.xy; - - ivec4 pixel;// = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord), 0)); - - ivec4 mbright = ivec4(texelFetch(ScreenTex, ivec2(256*3, int(fTexcoord.y)), 0)); - int dispmode = mbright.b & 0x3; - - if (dispmode == 1) - { - ivec4 val1;// = pixel; - //ivec4 val2 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(256,0), 0)); - ivec4 val3 = ivec4(texelFetch(ScreenTex, ivec2(fTexcoord) + ivec2(512,0), 0)); - - int compmode = val3.a & 0xF; - int eva, evb, evy; - - float xpos = val3.r + fract(fTexcoord.x); - float ypos = mod(fTexcoord.y, 192); - ivec4 _3dpix = Get3DPixel(vec2(xpos, ypos)); - - if (compmode == 4) - { - // 3D on top, blending - - if (_3dpix.a > 0) - { - eva = (_3dpix.a & 0x1F) + 1; - if (eva == 32) - { - val1 = _3dpix; - } - else - { - evb = 32 - eva; - - val1 = GetFiltered2DPixel(0); - - val1 = ((_3dpix * eva) + (val1 * evb)) >> 5; - if (eva <= 16) val1 += ivec4(1,1,1,0); - val1 = min(val1, 0x3F); - } - } - else - val1 = GetFiltered2DPixel(1); - } - else if (compmode == 1) - { - // 3D on bottom, blending - - if (_3dpix.a > 0) - { - eva = val3.g; - evb = val3.b; - - val1 = GetFiltered2DPixel(0); - - val1 = ((val1 * eva) + (_3dpix * evb)) >> 4; - val1 = min(val1, 0x3F); - } - else - val1 = GetFiltered2DPixel(1); - } - else if (compmode <= 3) - { - // 3D on top, normal/fade - - if (_3dpix.a > 0) - { - evy = val3.g; - - val1 = _3dpix; - if (compmode == 2) val1 += ((ivec4(0x3F,0x3F,0x3F,0) - val1) * evy) >> 4; - else if (compmode == 3) val1 -= (val1 * evy) >> 4; - } - else - val1 = GetFiltered2DPixel(1); - } - else - val1 = GetFiltered2DPixel(0); - - pixel = val1; - } - else - { - pixel = GetFiltered2DPixel(0); - } - - if (dispmode != 0) - { - int brightmode = mbright.g >> 6; - if (brightmode == 1) - { - // up - int evy = mbright.r & 0x1F; - if (evy > 16) evy = 16; - - pixel += ((ivec4(0x3F,0x3F,0x3F,0) - pixel) * evy) >> 4; - } - else if (brightmode == 2) - { - // down - int evy = mbright.r & 0x1F; - if (evy > 16) evy = 16; - - pixel -= (pixel * evy) >> 4; - } - } - - pixel.rgb <<= 2; - pixel.rgb |= (pixel.rgb >> 6); - - FragColor = vec4(vec3(pixel.bgr) / 255.0, 1.0); -} -)"; - - - - - -} - -#endif // GPU_OPENGL_SHADERS_H diff --git a/src/GPU_Soft.cpp b/src/GPU_Soft.cpp new file mode 100644 index 0000000000..e0be1e6084 --- /dev/null +++ b/src/GPU_Soft.cpp @@ -0,0 +1,455 @@ +/* + Copyright 2016-2025 melonDS team + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#include "NDS.h" +#include "GPU_Soft.h" +#include "GPU_ColorOp.h" + +namespace melonDS +{ + +SoftRenderer::SoftRenderer(melonDS::NDS& nds) + : Renderer(nds.GPU) +{ + const size_t len = 256 * 192; + Framebuffer[0][0] = new u32[len]; + Framebuffer[0][1] = new u32[len]; + Framebuffer[1][0] = new u32[len]; + Framebuffer[1][1] = new u32[len]; + BackBuffer = 0; + + Rend2D_A = std::make_unique(GPU.GPU2D_A, *this); + Rend2D_B = std::make_unique(GPU.GPU2D_B, *this); + Rend3D = std::make_unique(GPU.GPU3D, *this); +} + +SoftRenderer::~SoftRenderer() +{ + delete[] Framebuffer[0][0]; + delete[] Framebuffer[0][1]; + delete[] Framebuffer[1][0]; + delete[] Framebuffer[1][1]; +} + +void SoftRenderer::Reset() +{ + const size_t len = 256 * 192 * sizeof(u32); + memset(Framebuffer[0][0], 0, len); + memset(Framebuffer[0][1], 0, len); + memset(Framebuffer[1][0], 0, len); + memset(Framebuffer[1][1], 0, len); + + Rend2D_A->Reset(); + Rend2D_B->Reset(); + Rend3D->Reset(); +} + +void SoftRenderer::Stop() +{ + // clear framebuffers to black + const size_t len = 256 * 192 * sizeof(u32); + memset(Framebuffer[0][0], 0, len); + memset(Framebuffer[0][1], 0, len); + memset(Framebuffer[1][0], 0, len); + memset(Framebuffer[1][1], 0, len); +} + + +void SoftRenderer::PreSavestate() +{ + auto rend3d = dynamic_cast(Rend3D.get()); + if (rend3d->IsThreaded()) + rend3d->SetupRenderThread(); +} + +void SoftRenderer::PostSavestate() +{ + auto rend3d = dynamic_cast(Rend3D.get()); + if (rend3d->IsThreaded()) + rend3d->EnableRenderThread(); +} + + +void SoftRenderer::SetRenderSettings(RendererSettings& settings) +{ + auto rend3d = dynamic_cast(Rend3D.get()); + rend3d->SetThreaded(settings.Threaded); +} + + +void SoftRenderer::DrawScanline(u32 line) +{ + u32 *dstA, *dstB; + u32 dstoffset = 256 * line; + if (GPU.ScreenSwap) + { + dstA = &Framebuffer[BackBuffer][0][dstoffset]; + dstB = &Framebuffer[BackBuffer][1][dstoffset]; + } + else + { + dstA = &Framebuffer[BackBuffer][1][dstoffset]; + dstB = &Framebuffer[BackBuffer][0][dstoffset]; + } + + // the position used for drawing operations is based on VCOUNT + line = GPU.VCount; + if (line < 192) + { + // retrieve 3D output + Output3D = Rend3D->GetLine(line); + + // draw BG/OBJ layers + Rend2D_A->DrawScanline(line); + Rend2D_B->DrawScanline(line); + + // draw the final screen output + DrawScanlineA(line, dstA); + DrawScanlineB(line, dstB); + + // perform display capture if enabled + if (GPU.CaptureEnable) + DoCapture(line); + } + else + { + // if scanlines outside VCOUNT range 0..191 were to be visible, fill them white + // this may happen if VCOUNT is written to during active display + // the actual hardware behavior depends on the screen model, and suggests that + // no video signal is output for such scanlines + + for (int i = 0; i < 256; i++) + { + dstA[i] = 0x3F3F3F; + dstB[i] = 0x3F3F3F; + } + } + + if (GPU.ScreensEnabled) + { + // expand the color from 6-bit to 8-bit + ExpandColor(dstA); + ExpandColor(dstB); + } + else + { + // if the screens are disabled: fill the framebuffer black + for (int i = 0; i < 256; i++) + { + dstA[i] = 0xFF000000; + dstB[i] = 0xFF000000; + } + } +} + +void SoftRenderer::DrawSprites(u32 line) +{ + Rend2D_A->DrawSprites(line); + Rend2D_B->DrawSprites(line); +} + +void SoftRenderer::DrawScanlineA(u32 line, u32* dst) +{ + u32 dispcnt = GPU.GPU2D_A.DispCnt; + switch ((dispcnt >> 16) & 0x3) + { + case 0: // screen off + { + for (int i = 0; i < 256; i++) + dst[i] = 0x3F3F3F; + } + return; + + case 1: // regular display + { + for (int i = 0; i < 256; i+=2) + *(u64*)&dst[i] = *(u64*)&Output2D[0][i]; + } + break; + + case 2: // VRAM display + { + u32 vrambank = (dispcnt >> 18) & 0x3; + if (GPU.VRAMMap_LCDC & (1<> 4; + u8 b = (color & 0x7C00) >> 9; + + dst[i] = r | (g << 8) | (b << 16); + } + } + else + { + for (int i = 0; i < 256; i++) + dst[i] = 0; + } + } + break; + + case 3: // FIFO display + { + for (int i = 0; i < 256; i++) + { + u16 color = GPU.DispFIFOBuffer[i]; + u8 r = (color & 0x001F) << 1; + u8 g = (color & 0x03E0) >> 4; + u8 b = (color & 0x7C00) >> 9; + + dst[i] = r | (g << 8) | (b << 16); + } + } + break; + } + + ApplyMasterBrightness(GPU.MasterBrightnessA, dst); +} + +void SoftRenderer::DrawScanlineB(u32 line, u32* dst) +{ + u32 dispcnt = GPU.GPU2D_B.DispCnt; + switch ((dispcnt >> 16) & 0x1) + { + case 0: // screen off + { + for (int i = 0; i < 256; i++) + dst[i] = 0xFF3F3F3F; + } + return; + + case 1: // regular display + { + for (int i = 0; i < 256; i+=2) + *(u64*)&dst[i] = *(u64*)&Output2D[1][i]; + } + break; + } + + ApplyMasterBrightness(GPU.MasterBrightnessB, dst); +} + +void SoftRenderer::DoCapture(u32 line) +{ + u32 captureCnt = GPU.CaptureCnt; + + u32 width, height; + u32 sz = (captureCnt >> 20) & 0x3; + if (sz == 0) + { + width = 128; + height = 128; + } + else + { + width = 256; + height = 64 * sz; + } + + if (line >= height) + return; + + u32 dstvram = (captureCnt >> 16) & 0x3; + if (!(GPU.VRAMMap_LCDC & (1<> 18) & 0x3) << 14) + (line * width); + dst += (dstaddr & 0xFFFF); + + u32* srcA; + if (captureCnt & (1<<24)) + srcA = Output3D; + else + srcA = Output2D[0]; + + u16* srcB = nullptr; + if (captureCnt & (1<<25)) + srcB = GPU.DispFIFOBuffer; + else + { + u32 dispcnt = GPU.GPU2D_A.DispCnt; + u32 srcvram = (dispcnt >> 18) & 0x3; + if (GPU.VRAMMap_LCDC & (1<> 16) & 0x3) != 2) + offset += (((captureCnt >> 26) & 0x3) << 14); + + srcB += (offset & 0xFFFF); + } + } + + static_assert(VRAMDirtyGranularity == 512); + GPU.VRAMDirty[dstvram][(dstaddr * 2) / VRAMDirtyGranularity] = true; + + switch ((captureCnt >> 29) & 0x3) + { + case 0: // source A + { + for (u32 i = 0; i < width; i++) + { + u32 val = srcA[i]; + + u32 r = (val >> 1) & 0x1F; + u32 g = (val >> 9) & 0x1F; + u32 b = (val >> 17) & 0x1F; + u32 a = ((val >> 24) != 0) ? 0x8000 : 0; + + dst[i] = r | (g << 5) | (b << 10) | a; + } + } + break; + + case 1: // source B + { + if (srcB) + { + for (u32 i = 0; i < width; i++) + dst[i] = srcB[i]; + } + else + { + for (u32 i = 0; i < width; i++) + dst[i] = 0; + } + } + break; + + case 2: // sources A+B + case 3: + { + u32 eva = captureCnt & 0x1F; + u32 evb = (captureCnt >> 8) & 0x1F; + + // checkme + if (eva > 16) eva = 16; + if (evb > 16) evb = 16; + + if (srcB) + { + for (u32 i = 0; i < width; i++) + { + u32 val = srcA[i]; + + u32 rA = (val >> 1) & 0x1F; + u32 gA = (val >> 9) & 0x1F; + u32 bA = (val >> 17) & 0x1F; + u32 aA = ((val >> 24) != 0) ? 1 : 0; + + val = srcB[i]; + + u32 rB = val & 0x1F; + u32 gB = (val >> 5) & 0x1F; + u32 bB = (val >> 10) & 0x1F; + u32 aB = val >> 15; + + u32 rD = ((rA * aA * eva) + (rB * aB * evb) + 8) >> 4; + u32 gD = ((gA * aA * eva) + (gB * aB * evb) + 8) >> 4; + u32 bD = ((bA * aA * eva) + (bB * aB * evb) + 8) >> 4; + u32 aD = (eva>0 ? aA : 0) | (evb>0 ? aB : 0); + + if (rD > 0x1F) rD = 0x1F; + if (gD > 0x1F) gD = 0x1F; + if (bD > 0x1F) bD = 0x1F; + + dst[i] = rD | (gD << 5) | (bD << 10) | (aD << 15); + } + } + else + { + for (u32 i = 0; i < width; i++) + { + u32 val = srcA[i]; + + u32 rA = (val >> 1) & 0x1F; + u32 gA = (val >> 9) & 0x1F; + u32 bA = (val >> 17) & 0x1F; + u32 aA = ((val >> 24) != 0) ? 1 : 0; + + u32 rD = ((rA * aA * eva) + 8) >> 4; + u32 gD = ((gA * aA * eva) + 8) >> 4; + u32 bD = ((bA * aA * eva) + 8) >> 4; + u32 aD = (eva>0 ? aA : 0); + + dst[i] = rD | (gD << 5) | (bD << 10) | (aD << 15); + } + } + } + break; + } +} + +void SoftRenderer::ApplyMasterBrightness(u16 regval, u32* dst) +{ + u16 mode = regval >> 14; + if (mode == 1) + { + // up + u32 factor = regval & 0x1F; + if (factor > 16) factor = 16; + + for (int i = 0; i < 256; i++) + dst[i] = ColorBrightnessUp(dst[i], factor, 0x0); + } + else if (mode == 2) + { + // down + u32 factor = regval & 0x1F; + if (factor > 16) factor = 16; + + for (int i = 0; i < 256; i++) + dst[i] = ColorBrightnessDown(dst[i], factor, 0xF); + } +} + +void SoftRenderer::ExpandColor(u32* dst) +{ + // convert to 32-bit BGRA + // note: 32-bit RGBA would be more straightforward, but + // BGRA seems to be more compatible (Direct2D soft, cairo...) + for (int i = 0; i < 256; i+=2) + { + u64 c = *(u64*)&dst[i]; + + u64 r = (c << 18) & 0xFC000000FC0000; + u64 g = (c << 2) & 0xFC000000FC00; + u64 b = (c >> 14) & 0xFC000000FC; + c = r | g | b; + + *(u64*)&dst[i] = c | ((c & 0x00C0C0C000C0C0C0) >> 6) | 0xFF000000FF000000; + } +} + + +bool SoftRenderer::GetFramebuffers(void** top, void** bottom) +{ + int frontbuf = BackBuffer ^ 1; + *top = Framebuffer[frontbuf][0]; + *bottom = Framebuffer[frontbuf][1]; + return true; +} + +} diff --git a/src/GPU_Soft.h b/src/GPU_Soft.h new file mode 100644 index 0000000000..3f31e3449d --- /dev/null +++ b/src/GPU_Soft.h @@ -0,0 +1,74 @@ +/* + Copyright 2016-2025 melonDS team + + This file is part of melonDS. + + melonDS is free software: you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + melonDS is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with melonDS. If not, see http://www.gnu.org/licenses/. +*/ + +#ifndef GPU_SOFT_H +#define GPU_SOFT_H + +#include "GPU.h" +#include "GPU2D_Soft.h" +#include "GPU3D_Soft.h" + +namespace melonDS +{ + +class SoftRenderer : public Renderer +{ +public: + explicit SoftRenderer(melonDS::NDS& nds); + ~SoftRenderer() override; + bool Init() override { return true; } + void Reset() override; + void Stop() override; + + void PreSavestate() override; + void PostSavestate() override; + + void SetRenderSettings(RendererSettings& settings) override; + + void DrawScanline(u32 line) override; + void DrawSprites(u32 line) override; + + void VBlank() override {}; + void VBlankEnd() override {}; + + void AllocCapture(u32 bank, u32 start, u32 len) override {}; + void SyncVRAMCapture(u32 bank, u32 start, u32 len, bool complete) override {}; + + bool GetFramebuffers(void** top, void** bottom) override; + +private: + friend class SoftRenderer2D; + friend class SoftRenderer3D; + + u32* Framebuffer[2][2]; + + u32* Output3D; + alignas(8) u32 Output2D[2][256]; + + void DrawScanlineA(u32 line, u32* dst); + void DrawScanlineB(u32 line, u32* dst); + + void DoCapture(u32 line); + + void ApplyMasterBrightness(u16 regval, u32* dst); + void ExpandColor(u32* dst); +}; + +} + +#endif // GPU_SOFT_H diff --git a/src/NDS.cpp b/src/NDS.cpp index 4e41dbb41b..afe06d103e 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -97,7 +97,7 @@ NDS::NDS(NDSArgs&& args, int type, void* userdata) noexcept : JIT(*this, args.JIT), SPU(*this, args.BitDepth, args.Interpolation, args.OutputSampleRate), Mic(*this), - GPU(*this, std::move(args.Renderer3D)), + GPU(*this, std::move(args.Renderer)), SPI(*this, std::move(args.Firmware)), RTC(*this), Wifi(*this), @@ -492,7 +492,7 @@ void NDS::Reset() PostFlag9 = 0x00; PostFlag7 = 0x00; - PowerControl9 = 0x0001; + PowerControl9 = 0x0000; PowerControl7 = 0x0000; WifiWaitCnt = 0xFFFF; // temp @@ -1382,7 +1382,7 @@ void NDS::SetIRQ(u32 cpu, u32 irq) { CPUStop &= ~CPUStop_Sleep; CPUStop |= CPUStop_Wakeup; - GPU.GPU3D.RestartFrame(GPU); + GPU.Restart3DFrame(); } } } @@ -1964,11 +1964,11 @@ u8 NDS::ARM9Read8(u32 addr) case 0x06000000: switch (addr & 0x00E00000) { - case 0x00000000: return GPU.ReadVRAM_ABG(addr); - case 0x00200000: return GPU.ReadVRAM_BBG(addr); - case 0x00400000: return GPU.ReadVRAM_AOBJ(addr); - case 0x00600000: return GPU.ReadVRAM_BOBJ(addr); - default: return GPU.ReadVRAM_LCDC(addr); + case 0x00000000: GPU.SyncVRAM_ABG(addr, false); return GPU.ReadVRAM_ABG(addr); + case 0x00200000: GPU.SyncVRAM_BBG(addr, false); return GPU.ReadVRAM_BBG(addr); + case 0x00400000: GPU.SyncVRAM_AOBJ(addr, false); return GPU.ReadVRAM_AOBJ(addr); + case 0x00600000: GPU.SyncVRAM_BOBJ(addr, false); return GPU.ReadVRAM_BOBJ(addr); + default: GPU.SyncVRAM_LCDC(addr, false); return GPU.ReadVRAM_LCDC(addr); } case 0x07000000: @@ -2024,11 +2024,11 @@ u16 NDS::ARM9Read16(u32 addr) case 0x06000000: switch (addr & 0x00E00000) { - case 0x00000000: return GPU.ReadVRAM_ABG(addr); - case 0x00200000: return GPU.ReadVRAM_BBG(addr); - case 0x00400000: return GPU.ReadVRAM_AOBJ(addr); - case 0x00600000: return GPU.ReadVRAM_BOBJ(addr); - default: return GPU.ReadVRAM_LCDC(addr); + case 0x00000000: GPU.SyncVRAM_ABG(addr, false); return GPU.ReadVRAM_ABG(addr); + case 0x00200000: GPU.SyncVRAM_BBG(addr, false); return GPU.ReadVRAM_BBG(addr); + case 0x00400000: GPU.SyncVRAM_AOBJ(addr, false); return GPU.ReadVRAM_AOBJ(addr); + case 0x00600000: GPU.SyncVRAM_BOBJ(addr, false); return GPU.ReadVRAM_BOBJ(addr); + default: GPU.SyncVRAM_LCDC(addr, false); return GPU.ReadVRAM_LCDC(addr); } case 0x07000000: @@ -2084,11 +2084,11 @@ u32 NDS::ARM9Read32(u32 addr) case 0x06000000: switch (addr & 0x00E00000) { - case 0x00000000: return GPU.ReadVRAM_ABG(addr); - case 0x00200000: return GPU.ReadVRAM_BBG(addr); - case 0x00400000: return GPU.ReadVRAM_AOBJ(addr); - case 0x00600000: return GPU.ReadVRAM_BOBJ(addr); - default: return GPU.ReadVRAM_LCDC(addr); + case 0x00000000: GPU.SyncVRAM_ABG(addr, false); return GPU.ReadVRAM_ABG(addr); + case 0x00200000: GPU.SyncVRAM_BBG(addr, false); return GPU.ReadVRAM_BBG(addr); + case 0x00400000: GPU.SyncVRAM_AOBJ(addr, false); return GPU.ReadVRAM_AOBJ(addr); + case 0x00600000: GPU.SyncVRAM_BOBJ(addr, false); return GPU.ReadVRAM_BOBJ(addr); + default: GPU.SyncVRAM_LCDC(addr, false); return GPU.ReadVRAM_LCDC(addr); } case 0x07000000: @@ -2184,11 +2184,11 @@ void NDS::ARM9Write16(u32 addr, u16 val) JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr); switch (addr & 0x00E00000) { - case 0x00000000: GPU.WriteVRAM_ABG(addr, val); return; - case 0x00200000: GPU.WriteVRAM_BBG(addr, val); return; - case 0x00400000: GPU.WriteVRAM_AOBJ(addr, val); return; - case 0x00600000: GPU.WriteVRAM_BOBJ(addr, val); return; - default: GPU.WriteVRAM_LCDC(addr, val); return; + case 0x00000000: GPU.SyncVRAM_ABG(addr, true); GPU.WriteVRAM_ABG(addr, val); return; + case 0x00200000: GPU.SyncVRAM_BBG(addr, true); GPU.WriteVRAM_BBG(addr, val); return; + case 0x00400000: GPU.SyncVRAM_AOBJ(addr, true); GPU.WriteVRAM_AOBJ(addr, val); return; + case 0x00600000: GPU.SyncVRAM_BOBJ(addr, true); GPU.WriteVRAM_BOBJ(addr, val); return; + default: GPU.SyncVRAM_LCDC(addr, true); GPU.WriteVRAM_LCDC(addr, val); return; } case 0x07000000: @@ -2244,11 +2244,11 @@ void NDS::ARM9Write32(u32 addr, u32 val) JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr); switch (addr & 0x00E00000) { - case 0x00000000: GPU.WriteVRAM_ABG(addr, val); return; - case 0x00200000: GPU.WriteVRAM_BBG(addr, val); return; - case 0x00400000: GPU.WriteVRAM_AOBJ(addr, val); return; - case 0x00600000: GPU.WriteVRAM_BOBJ(addr, val); return; - default: GPU.WriteVRAM_LCDC(addr, val); return; + case 0x00000000: GPU.SyncVRAM_ABG(addr, true); GPU.WriteVRAM_ABG(addr, val); return; + case 0x00200000: GPU.SyncVRAM_BBG(addr, true); GPU.WriteVRAM_BBG(addr, val); return; + case 0x00400000: GPU.SyncVRAM_AOBJ(addr, true); GPU.WriteVRAM_AOBJ(addr, val); return; + case 0x00600000: GPU.SyncVRAM_BOBJ(addr, true); GPU.WriteVRAM_BOBJ(addr, val); return; + default: GPU.SyncVRAM_LCDC(addr, true); GPU.WriteVRAM_LCDC(addr, val); return; } case 0x07000000: @@ -2771,6 +2771,20 @@ u8 NDS::ARM9IORead8(u32 addr) { switch (addr) { + case 0x04000004: return GPU.DispStat[0] & 0xFF; + case 0x04000005: return GPU.DispStat[0] >> 8; + case 0x04000006: return GPU.VCount & 0xFF; + case 0x04000007: return GPU.VCount >> 8; + + case 0x04000064: + case 0x04000065: + case 0x04000066: + case 0x04000067: + case 0x0400006C: + case 0x0400006D: + case 0x0400106C: + case 0x0400106D: return GPU.Read8(addr); + case 0x04000130: LagFrameFlag = false; return KeyInput & 0xFF; case 0x04000131: LagFrameFlag = false; return (KeyInput >> 8) & 0xFF; case 0x04000132: return KeyCnt[0] & 0xFF; @@ -2909,7 +2923,9 @@ u16 NDS::ARM9IORead16(u32 addr) case 0x04000060: return GPU.GPU3D.Read16(addr); case 0x04000064: - case 0x04000066: return GPU.GPU2D_A.Read16(addr); + case 0x04000066: + case 0x0400006C: + case 0x0400106C: return GPU.Read16(addr); case 0x040000B8: return DMAs[0].Cnt & 0xFFFF; case 0x040000BA: return DMAs[0].Cnt >> 16; @@ -3065,7 +3081,9 @@ u32 NDS::ARM9IORead32(u32 addr) case 0x04000004: return GPU.DispStat[0] | (GPU.VCount << 16); case 0x04000060: return GPU.GPU3D.Read32(addr); - case 0x04000064: return GPU.GPU2D_A.Read32(addr); + case 0x04000064: + case 0x0400006C: + case 0x0400106C: return GPU.Read32(addr); case 0x040000B0: return DMAs[0].SrcAddr; case 0x040000B4: return DMAs[0].DstAddr; @@ -3206,10 +3224,25 @@ void NDS::ARM9IOWrite8(u32 addr, u8 val) { switch (addr) { + case 0x04000004: GPU.SetDispStat(0, val, 0x00FF); return; + case 0x04000005: GPU.SetDispStat(0, val << 8, 0xFF00); return; + case 0x04000006: GPU.SetVCount(val, 0x00FF); return; + case 0x04000007: GPU.SetVCount(val << 8, 0xFF00); return; + + case 0x04000060: + case 0x04000061: GPU.GPU3D.Write8(addr, val); return; + case 0x04000064: + case 0x04000065: + case 0x04000066: + case 0x04000067: + case 0x04000068: + case 0x04000069: + case 0x0400006A: + case 0x0400006B: case 0x0400006C: - case 0x0400006D: GPU.GPU2D_A.Write8(addr, val); return; + case 0x0400006D: case 0x0400106C: - case 0x0400106D: GPU.GPU2D_B.Write8(addr, val); return; + case 0x0400106D: GPU.Write8(addr, val); return; case 0x04000132: KeyCnt[0] = (KeyCnt[0] & 0xFF00) | val; @@ -3317,19 +3350,16 @@ void NDS::ARM9IOWrite16(u32 addr, u16 val) { switch (addr) { - case 0x04000004: GPU.SetDispStat(0, val); return; - case 0x04000006: GPU.SetVCount(val); return; + case 0x04000004: GPU.SetDispStat(0, val, 0xFFFF); return; + case 0x04000006: GPU.SetVCount(val, 0xFFFF); return; case 0x04000060: GPU.GPU3D.Write16(addr, val); return; - case 0x04000064: - case 0x04000066: GPU.GPU2D_A.Write16(addr, val); return; - + case 0x04000066: case 0x04000068: - case 0x0400006A: GPU.GPU2D_A.Write16(addr, val); return; - - case 0x0400006C: GPU.GPU2D_A.Write16(addr, val); return; - case 0x0400106C: GPU.GPU2D_B.Write16(addr, val); return; + case 0x0400006A: + case 0x0400006C: + case 0x0400106C: GPU.Write16(addr, val); return; case 0x040000B8: DMAs[0].WriteCnt((DMAs[0].Cnt & 0xFFFF0000) | val); return; case 0x040000BA: DMAs[0].WriteCnt((DMAs[0].Cnt & 0x0000FFFF) | (val << 16)); return; @@ -3516,16 +3546,15 @@ void NDS::ARM9IOWrite32(u32 addr, u32 val) switch (addr) { case 0x04000004: - GPU.SetDispStat(0, val & 0xFFFF); - GPU.SetVCount(val >> 16); + GPU.SetDispStat(0, val & 0xFFFF, 0xFFFF); + GPU.SetVCount(val >> 16, 0xFFFF); return; case 0x04000060: GPU.GPU3D.Write32(addr, val); return; case 0x04000064: - case 0x04000068: GPU.GPU2D_A.Write32(addr, val); return; - - case 0x0400006C: GPU.GPU2D_A.Write16(addr, val&0xFFFF); return; - case 0x0400106C: GPU.GPU2D_B.Write16(addr, val&0xFFFF); return; + case 0x04000068: + case 0x0400006C: + case 0x0400106C: GPU.Write32(addr, val); return; case 0x040000B0: DMAs[0].SrcAddr = val; return; case 0x040000B4: DMAs[0].DstAddr = val; return; @@ -3712,6 +3741,11 @@ u8 NDS::ARM7IORead8(u32 addr) { switch (addr) { + case 0x04000004: return GPU.DispStat[1] & 0xFF; + case 0x04000005: return GPU.DispStat[1] >> 8; + case 0x04000006: return GPU.VCount & 0xFF; + case 0x04000007: return GPU.VCount >> 8; + case 0x04000130: return KeyInput & 0xFF; case 0x04000131: return (KeyInput >> 8) & 0xFF; case 0x04000132: return KeyCnt[1] & 0xFF; @@ -4019,6 +4053,11 @@ void NDS::ARM7IOWrite8(u32 addr, u8 val) { switch (addr) { + case 0x04000004: GPU.SetDispStat(1, val, 0x00FF); return; + case 0x04000005: GPU.SetDispStat(1, val << 8, 0xFF00); return; + case 0x04000006: GPU.SetVCount(val, 0x00FF); return; + case 0x04000007: GPU.SetVCount(val << 8, 0xFF00); return; + case 0x04000132: KeyCnt[1] = (KeyCnt[1] & 0xFF00) | val; return; @@ -4124,8 +4163,8 @@ void NDS::ARM7IOWrite16(u32 addr, u16 val) { switch (addr) { - case 0x04000004: GPU.SetDispStat(1, val); return; - case 0x04000006: GPU.SetVCount(val); return; + case 0x04000004: GPU.SetDispStat(1, val, 0xFFFF); return; + case 0x04000006: GPU.SetVCount(val, 0xFFFF); return; case 0x040000B8: DMAs[4].WriteCnt((DMAs[4].Cnt & 0xFFFF0000) | val); return; case 0x040000BA: DMAs[4].WriteCnt((DMAs[4].Cnt & 0x0000FFFF) | (val << 16)); return; @@ -4289,8 +4328,8 @@ void NDS::ARM7IOWrite32(u32 addr, u32 val) switch (addr) { case 0x04000004: - GPU.SetDispStat(1, val & 0xFFFF); - GPU.SetVCount(val >> 16); + GPU.SetDispStat(1, val & 0xFFFF, 0xFFFF); + GPU.SetVCount(val >> 16, 0xFFFF); return; case 0x040000B0: DMAs[4].SrcAddr = val; return; diff --git a/src/NDS.h b/src/NDS.h index 17e656e9e7..a2ea8af22c 100644 --- a/src/NDS.h +++ b/src/NDS.h @@ -368,12 +368,12 @@ class NDS Firmware& GetFirmware() { return SPI.GetFirmwareMem()->GetFirmware(); } void SetFirmware(Firmware&& firmware) { SPI.GetFirmwareMem()->SetFirmware(std::move(firmware)); } - const Renderer3D& GetRenderer3D() const noexcept { return GPU.GetRenderer3D(); } - Renderer3D& GetRenderer3D() noexcept { return GPU.GetRenderer3D(); } - void SetRenderer3D(std::unique_ptr&& renderer) noexcept + const Renderer& GetRenderer() const noexcept { return GPU.GetRenderer(); } + Renderer& GetRenderer() noexcept { return GPU.GetRenderer(); } + void SetRenderer(std::unique_ptr&& renderer) noexcept { if (renderer != nullptr) - GPU.SetRenderer3D(std::move(renderer)); + GPU.SetRenderer(std::move(renderer)); } virtual bool NeedsDirectBoot() const; diff --git a/src/NonStupidBitfield.h b/src/NonStupidBitfield.h index bfb3f50653..072bfb77ff 100644 --- a/src/NonStupidBitfield.h +++ b/src/NonStupidBitfield.h @@ -215,10 +215,34 @@ struct NonStupidBitField } else { - Data[startEntry] |= ((1ULL << bitsCount) - 1) << (startBit & 0x3F); + Data[startEntry] |= (0xFFFFFFFFFFFFFFFF >> (64-bitsCount) << (startBit & 0x3F)); } } + bool CheckRange(u32 startBit, u32 bitsCount) + { + u32 startEntry = startBit >> 6; + u64 entriesCount = (((startBit + bitsCount + 0x3F) & ~0x3F) >> 6) - startEntry; + u64 res = 0; + + if (entriesCount > 1) + { + res = (Data[startEntry] & (0xFFFFFFFFFFFFFFFF << (startBit & 0x3F))); + if ((startBit + bitsCount) & 0x3F) + res |= (Data[startEntry + entriesCount - 1] & (~(0xFFFFFFFFFFFFFFFF << ((startBit + bitsCount) & 0x3F)))); + else + res |= Data[startEntry + entriesCount - 1]; + for (u64 i = startEntry + 1; i < startEntry + entriesCount - 1; i++) + res |= Data[i]; + } + else + { + res = (Data[startEntry] & (0xFFFFFFFFFFFFFFFF >> (64-bitsCount) << (startBit & 0x3F))); + } + + return !!res; + } + int Min() const { for (int i = 0; i < DataLength; i++) diff --git a/src/OpenGL_shaders/2DCompositorFS.glsl b/src/OpenGL_shaders/2DCompositorFS.glsl new file mode 100644 index 0000000000..1de6ef0497 --- /dev/null +++ b/src/OpenGL_shaders/2DCompositorFS.glsl @@ -0,0 +1,397 @@ +#version 140 + +uniform sampler2D BGLayerTex[4]; +uniform sampler2DArray OBJLayerTex; +uniform sampler2DArray Capture128Tex; +uniform sampler2DArray Capture256Tex; +uniform isampler2D MosaicTex; + +struct sBGConfig +{ + ivec2 Size; + int Type; + int PalOffset; + int TileOffset; + int MapOffset; + bool Clamp; +}; + +layout(std140) uniform ubBGConfig +{ + int uVRAMMask; + sBGConfig uBGConfig[4]; +}; + +struct sScanline +{ + ivec2 BGOffset[4]; + ivec4 BGRotscale[2]; + int BackColor; + uint WinRegs; + int WinMask; + ivec4 WinPos; + bvec4 BGMosaicEnable; + ivec4 MosaicSize; +}; + +layout(std140) uniform ubScanlineConfig +{ + sScanline uScanline[192]; +}; + +layout(std140) uniform ubCompositorConfig +{ + ivec4 uBGPrio; + bool uEnableOBJ; + bool uEnable3D; + int uBlendCnt; + int uBlendEffect; + ivec3 uBlendCoef; +}; + +uniform int uScaleFactor; + +smooth in vec4 fTexcoord; + +out vec4 oColor; + +int MosaicX = 0; + +ivec3 ConvertColor(int col) +{ + ivec3 ret; + ret.r = (col & 0x1F) << 1; + ret.g = ((col & 0x3E0) >> 4) | (col >> 15); + ret.b = (col & 0x7C00) >> 9; + return ret; +} + +vec4 BG0Fetch(vec2 coord) +{ + return texture(BGLayerTex[0], coord); +} + +vec4 BG1Fetch(vec2 coord) +{ + return texture(BGLayerTex[1], coord); +} + +vec4 BG2Fetch(vec2 coord) +{ + return texture(BGLayerTex[2], coord); +} + +vec4 BG3Fetch(vec2 coord) +{ + return texture(BGLayerTex[3], coord); +} + +vec4 BG0CalcAndFetch(vec2 coord, int line) +{ + ivec2 bgoffset = uScanline[line].BGOffset[0]; + vec2 bgpos = vec2(bgoffset.xy) + coord; + + if (uScanline[line].BGMosaicEnable[0]) + { + bgpos = floor(bgpos) - vec2(MosaicX, 0); + } + + return BG0Fetch(bgpos / vec2(uBGConfig[0].Size)); +} + +vec4 BG1CalcAndFetch(vec2 coord, int line) +{ + ivec2 bgoffset = uScanline[line].BGOffset[1]; + vec2 bgpos = vec2(bgoffset.xy) + coord; + + if (uScanline[line].BGMosaicEnable[1]) + { + bgpos = floor(bgpos) - vec2(MosaicX, 0); + } + + return BG1Fetch(bgpos / vec2(uBGConfig[1].Size)); +} + +vec4 BG2CalcAndFetch(vec2 coord, int line) +{ + ivec2 bgoffset = uScanline[line].BGOffset[2]; + vec2 bgpos; + if (uBGConfig[2].Type >= 2) + { + // rotscale BG + bgpos = vec2(bgoffset.xy) / 256; + vec4 rotscale = vec4(uScanline[line].BGRotscale[0]) / 256; + mat2 rsmatrix = mat2(rotscale.xy, rotscale.zw); + bgpos = bgpos + (coord * rsmatrix); + } + else + { + // text-mode BG + bgpos = vec2(bgoffset.xy) + coord; + } + + if (uScanline[line].BGMosaicEnable[2]) + { + bgpos = floor(bgpos) - vec2(MosaicX, 0); + } + + if (uBGConfig[2].Type >= 7) + { + // hi-res capture + bgpos.y += uBGConfig[2].MapOffset; + vec3 capcoord = vec3(bgpos / vec2(uBGConfig[2].Size), uBGConfig[2].TileOffset); + + // due to the possible weirdness of display capture buffers, + // we need to do custom wraparound handling + if (uBGConfig[2].Clamp) + { + if (any(lessThan(capcoord.xy, vec2(0))) || any(greaterThanEqual(capcoord.xy, vec2(1)))) + return vec4(0); + } + + if (uBGConfig[2].Type == 7) + return texture(Capture128Tex, capcoord); + else + return texture(Capture256Tex, capcoord); + } + + return BG2Fetch(bgpos / vec2(uBGConfig[2].Size)); +} + +vec4 BG3CalcAndFetch(vec2 coord, int line) +{ + ivec2 bgoffset = uScanline[line].BGOffset[3]; + vec2 bgpos; + if (uBGConfig[3].Type >= 2) + { + // rotscale BG + bgpos = vec2(bgoffset.xy) / 256; + vec4 rotscale = vec4(uScanline[line].BGRotscale[1]) / 256; + mat2 rsmatrix = mat2(rotscale.xy, rotscale.zw); + bgpos = bgpos + (coord * rsmatrix); + } + else + { + // text-mode BG + bgpos = vec2(bgoffset.xy) + coord; + } + + if (uScanline[line].BGMosaicEnable[3]) + { + bgpos = floor(bgpos) - vec2(MosaicX, 0); + } + + if (uBGConfig[3].Type >= 7) + { + // hi-res capture + bgpos.y += uBGConfig[3].MapOffset; + vec3 capcoord = vec3(bgpos / vec2(uBGConfig[3].Size), uBGConfig[3].TileOffset); + + // due to the possible weirdness of display capture buffers, + // we need to do custom wraparound handling + if (uBGConfig[3].Clamp) + { + if (any(lessThan(capcoord.xy, vec2(0))) || any(greaterThanEqual(capcoord.xy, vec2(1)))) + return vec4(0); + } + + if (uBGConfig[3].Type == 7) + return texture(Capture128Tex, capcoord); + else + return texture(Capture256Tex, capcoord); + } + + return BG3Fetch(bgpos / vec2(uBGConfig[3].Size)); +} + +void CalcSpriteMosaic(in ivec2 coord, out ivec4 objflags, out vec4 objcolor) +{ + for (int i = 0; i < 16; i++) + { + ivec2 curpos = ivec2(coord.x - 15 + i, coord.y); + + if (curpos.x < 0) + { + objflags = ivec4(0); + objcolor = vec4(0); + } + else + { + int mosx = texelFetch(MosaicTex, ivec2(curpos.x, uScanline[curpos.y].MosaicSize.z), 0).r; + vec4 color = texelFetch(OBJLayerTex, ivec3(curpos * uScaleFactor, 0), 0); + ivec4 flags = ivec4(texelFetch(OBJLayerTex, ivec3(curpos * uScaleFactor, 1), 0) * 255.0); + + bool latch = false; + if (mosx == 0) + latch = true; + else if (flags.g == 0) + latch = true; + else if (objflags.g == 0) + latch = true; + else if (flags.a < objflags.a) + latch = true; + + if (latch) + { + objflags = flags; + objcolor = color; + } + } + } +} + +vec4 CompositeLayers() +{ + ivec2 coord = ivec2(fTexcoord.zw); + vec2 bgcoord = vec2(fTexcoord.x, fract(fTexcoord.y)); + int xpos = int(fTexcoord.x); + int line = int(fTexcoord.y); + + if (uScanline[line].MosaicSize.x > 0) + MosaicX = texelFetch(MosaicTex, ivec2(bgcoord.x, uScanline[line].MosaicSize.x), 0).r; + + ivec4 col1 = ivec4(ConvertColor(uScanline[line].BackColor), 0x20); + int mask1 = 0x20; + ivec4 col2 = ivec4(0); + int mask2 = 0; + bool specialcase = false; + + vec4 layercol[6]; + layercol[0] = BG0CalcAndFetch(bgcoord, line); + layercol[1] = BG1CalcAndFetch(bgcoord, line); + layercol[2] = BG2CalcAndFetch(bgcoord, line); + layercol[3] = BG3CalcAndFetch(bgcoord, line); + + ivec4 objflags; + if (uScanline[line].MosaicSize.z > 0) + { + CalcSpriteMosaic(ivec2(fTexcoord.xy), objflags, layercol[4]); + } + else + { + layercol[4] = texelFetch(OBJLayerTex, ivec3(coord, 0), 0); + layercol[5] = texelFetch(OBJLayerTex, ivec3(coord, 1), 0); + objflags = ivec4(layercol[5] * 255.0); + } + + int winmask = uScanline[line].WinMask; + bool inside_win0, inside_win1; + + if (xpos < uScanline[line].WinPos[0]) + inside_win0 = ((winmask & (1<<0)) != 0); + else if (xpos < uScanline[line].WinPos[1]) + inside_win0 = ((winmask & (1<<1)) != 0); + else + inside_win0 = ((winmask & (1<<2)) != 0); + + if (xpos < uScanline[line].WinPos[2]) + inside_win1 = ((winmask & (1<<3)) != 0); + else if (xpos < uScanline[line].WinPos[3]) + inside_win1 = ((winmask & (1<<4)) != 0); + else + inside_win1 = ((winmask & (1<<5)) != 0); + + uint winregs = uScanline[line].WinRegs; + uint winsel = winregs; + if (objflags.b > 0) + winsel = winregs >> 8; + if (inside_win1) + winsel = winregs >> 16; + if (inside_win0) + winsel = winregs >> 24; + + for (int prio = 3; prio >= 0; prio--) + { + for (int bg = 3; bg >= 0; bg--) + { + if ((uBGPrio[bg] == prio) && (layercol[bg].a > 0) && ((winsel & (1u << bg)) != 0u)) + { + col2 = col1; + mask2 = mask1 << 8; + col1 = ivec4(layercol[bg] * 255.0) >> ivec4(2,2,2,3); + mask1 = (1 << bg); + specialcase = (bg == 0) && uEnable3D; + } + } + + if (uEnableOBJ && (objflags.a == prio) && (layercol[4].a > 0) && ((winsel & (1u << 4)) != 0u)) + { + col2 = col1; + mask2 = mask1 << 8; + col1 = ivec4(layercol[4] * 255.0) >> ivec4(2,2,2,3); + mask1 = (1 << 4); + specialcase = (objflags.r != 0); + } + } + + int effect = 0; + int eva, evb, evy = uBlendCoef[2]; + + if (specialcase && (uBlendCnt & mask2) != 0) + { + if (mask1 == (1<<0)) + { + // 3D layer blending + effect = 4; + eva = (col1.a & 0x1F) + 1; + evb = 32 - eva; + } + else if (objflags.r == 1) + { + // semi-transparent sprite + effect = 1; + eva = uBlendCoef[0]; + evb = uBlendCoef[1]; + } + else //if (objflags.r == 2) + { + // bitmap sprite + effect = 1; + eva = col1.a; + evb = 16 - eva; + } + } + else if (((uBlendCnt & mask1) != 0) && ((winsel & (1u << 5)) != 0u)) + { + effect = uBlendEffect; + if (effect == 1) + { + if ((uBlendCnt & mask2) != 0) + { + eva = uBlendCoef[0]; + evb = uBlendCoef[1]; + } + else + effect = 0; + } + } + + if (effect == 1) + { + // blending + col1 = ((col1 * eva) + (col2 * evb) + 0x8) >> 4; + col1 = min(col1, 0x3F); + } + else if (effect == 2) + { + // brightness up + col1 = col1 + ((((0x3F - col1) * evy) + 0x8) >> 4); + } + else if (effect == 3) + { + // brightness down + col1 = col1 - (((col1 * evy) + 0x7) >> 4); + } + else if (effect == 4) + { + // 3D layer blending + col1 = ((col1 * eva) + (col2 * evb) + 0x10) >> 5; + } + + return vec4(vec3(col1.rgb << 2) / 255.0, 1); +} + +void main() +{ + oColor = CompositeLayers(); +} diff --git a/src/OpenGL_shaders/2DCompositorVS.glsl b/src/OpenGL_shaders/2DCompositorVS.glsl new file mode 100644 index 0000000000..48e10ea681 --- /dev/null +++ b/src/OpenGL_shaders/2DCompositorVS.glsl @@ -0,0 +1,14 @@ +#version 140 + +uniform int uScaleFactor; + +in vec2 vPosition; + +smooth out vec4 fTexcoord; + +void main() +{ + gl_Position = vec4((vPosition * 2) - 1, 0, 1); + fTexcoord.xy = vPosition * vec2(256, 192); + fTexcoord.zw = fTexcoord.xy * uScaleFactor; +} diff --git a/src/OpenGL_shaders/2DLayerPreFS.glsl b/src/OpenGL_shaders/2DLayerPreFS.glsl new file mode 100644 index 0000000000..016b4160eb --- /dev/null +++ b/src/OpenGL_shaders/2DLayerPreFS.glsl @@ -0,0 +1,235 @@ +#version 140 + +uniform usampler2D VRAMTex; +uniform sampler2D PalTex; + +struct sBGConfig +{ + ivec2 Size; + int Type; + int PalOffset; + int TileOffset; + int MapOffset; + bool Clamp; +}; + +layout(std140) uniform ubBGConfig +{ + int uVRAMMask; + sBGConfig uBGConfig[4]; +}; + +uniform int uCurBG; + +smooth in vec2 fTexcoord; + +out vec4 oColor; + +vec4 GetBGPalEntry(int layer, int pal, int id) +{ + ivec2 coord = ivec2(id, uBGConfig[layer].PalOffset + pal); + vec4 col = texelFetch(PalTex, coord, 0); + col.rgb *= (62.0/63.0); + col.g += (col.a * 1.0/63.0); + return col; +} + +int VRAMRead8(int addr) +{ + ivec2 coord = ivec2(addr & 0x3FF, (addr >> 10) & uVRAMMask); + int val = int(texelFetch(VRAMTex, coord, 0).r); + return val; +} + +int VRAMRead16(int addr) +{ + ivec2 coord = ivec2(addr & 0x3FF, (addr >> 10) & uVRAMMask); + int lo = int(texelFetch(VRAMTex, coord, 0).r); + int hi = int(texelFetch(VRAMTex, coord+ivec2(1,0), 0).r); + return lo | (hi << 8); +} + +vec4 GetBGLayerPixel(int layer, ivec2 coord) +{ + vec4 ret; + + if (uBGConfig[layer].Type == 0) + { + // text - 16-color tiles + + int mapoffset = uBGConfig[layer].MapOffset + + (((coord.x >> 3) & 0x1F) << 1) + + (((coord.y >> 3) & 0x1F) << 6); + + if (uBGConfig[layer].Size.y == 512) + { + if (uBGConfig[layer].Size.x == 512) + { + mapoffset += + (((coord.x >> 8) & 0x1) << 11) + + (((coord.y >> 8) & 0x1) << 12); + } + else + { + mapoffset += + (((coord.y >> 8) & 0x1) << 11); + } + } + else if (uBGConfig[layer].Size.x == 512) + { + mapoffset += + (((coord.x >> 8) & 0x1) << 11); + } + + int mapval = VRAMRead16(mapoffset); + int tileoffset = (uBGConfig[layer].TileOffset << 1) + ((mapval & 0x3FF) << 6); + + if ((mapval & (1<<10)) != 0) + tileoffset += (7 - (coord.x & 0x7)); + else + tileoffset += (coord.x & 0x7); + + if ((mapval & (1<<11)) != 0) + tileoffset += ((7 - (coord.y & 0x7)) << 3); + else + tileoffset += ((coord.y & 0x7) << 3); + + int col = VRAMRead8(tileoffset >> 1); + if ((tileoffset & 0x1) != 0) + col >>= 4; + else + col &= 0xF; + col += ((mapval >> 12) << 4); + + ret = GetBGPalEntry(layer, 0, col); + ret.a = ((col & 0xF) == 0) ? 0 : 1; + } + else if (uBGConfig[layer].Type == 1) + { + // text - 256-color tiles + + int mapoffset = uBGConfig[layer].MapOffset + + (((coord.x >> 3) & 0x1F) << 1) + + (((coord.y >> 3) & 0x1F) << 6); + + if (uBGConfig[layer].Size.y == 512) + { + if (uBGConfig[layer].Size.x == 512) + { + mapoffset += + (((coord.x >> 8) & 0x1) << 11) + + (((coord.y >> 8) & 0x1) << 12); + } + else + { + mapoffset += + (((coord.y >> 8) & 0x1) << 11); + } + } + else if (uBGConfig[layer].Size.x == 512) + { + mapoffset += + (((coord.x >> 8) & 0x1) << 11); + } + + int mapval = VRAMRead16(mapoffset); + int tileoffset = uBGConfig[layer].TileOffset + ((mapval & 0x3FF) << 6); + + if ((mapval & (1<<10)) != 0) + tileoffset += (7 - (coord.x & 0x7)); + else + tileoffset += (coord.x & 0x7); + + if ((mapval & (1<<11)) != 0) + tileoffset += ((7 - (coord.y & 0x7)) << 3); + else + tileoffset += ((coord.y & 0x7) << 3); + + int col = VRAMRead8(tileoffset); + int pal = (uBGConfig[layer].PalOffset != 0) ? (mapval >> 12) : 0; + + ret = GetBGPalEntry(layer, pal, col); + ret.a = (col == 0) ? 0 : 1; + } + else if (uBGConfig[layer].Type == 2) + { + // affine - 256 color tiles + + int mapoffset = uBGConfig[layer].MapOffset + + (coord.x >> 3) + + ((coord.y >> 3) * (uBGConfig[layer].Size.x >> 3)); + + int mapval = VRAMRead8(mapoffset); + int tileoffset = uBGConfig[layer].TileOffset + (mapval << 6); + + tileoffset += ((coord.y & 0x7) << 3); + tileoffset += (coord.x & 0x7); + + int col = VRAMRead8(tileoffset); + + ret = GetBGPalEntry(layer, 0, col); + ret.a = (col == 0) ? 0 : 1; + } + else if (uBGConfig[layer].Type == 3) + { + // extended - 256 color tiles + + int mapoffset = uBGConfig[layer].MapOffset + + (((coord.x >> 3) + + ((coord.y >> 3) * (uBGConfig[layer].Size.x >> 3))) << 1); + + int mapval = VRAMRead16(mapoffset); + int tileoffset = uBGConfig[layer].TileOffset + ((mapval & 0x3FF) << 6); + + if ((mapval & (1<<10)) != 0) + tileoffset += (7 - (coord.x & 0x7)); + else + tileoffset += (coord.x & 0x7); + + if ((mapval & (1<<11)) != 0) + tileoffset += ((7 - (coord.y & 0x7)) << 3); + else + tileoffset += ((coord.y & 0x7) << 3); + + int col = VRAMRead8(tileoffset); + int pal = (uBGConfig[layer].PalOffset != 0) ? (mapval >> 12) : 0; + + ret = GetBGPalEntry(layer, pal, col); + ret.a = (col == 0) ? 0 : 1; + } + else if (uBGConfig[layer].Type == 4) + { + // extended - 256 color bitmap + + int mapoffset = uBGConfig[layer].MapOffset + + coord.x + + (coord.y * uBGConfig[layer].Size.x); + + int col = VRAMRead8(mapoffset); + + ret = GetBGPalEntry(layer, 0, col); + ret.a = (col == 0) ? 0 : 1; + } + else if (uBGConfig[layer].Type == 5) + { + // extended - direct color bitmap + + int mapoffset = uBGConfig[layer].MapOffset + + ((coord.x + + (coord.y * uBGConfig[layer].Size.x)) << 1); + + int col = VRAMRead16(mapoffset); + + ret.r = float((col << 1) & 0x3E) / 63; + ret.g = float((col >> 4) & 0x3E) / 63; + ret.b = float((col >> 9) & 0x3E) / 63; + ret.a = float(col >> 15); + } + + return ret; +} + +void main() +{ + oColor = GetBGLayerPixel(uCurBG, ivec2(fTexcoord)); +} diff --git a/src/OpenGL_shaders/2DLayerPreVS.glsl b/src/OpenGL_shaders/2DLayerPreVS.glsl new file mode 100644 index 0000000000..c4af8177a8 --- /dev/null +++ b/src/OpenGL_shaders/2DLayerPreVS.glsl @@ -0,0 +1,29 @@ +#version 140 + +struct sBGConfig +{ + ivec2 Size; + int Type; + int PalOffset; + int TileOffset; + int MapOffset; + bool Clamp; +}; + +layout(std140) uniform ubBGConfig +{ + int uVRAMMask; + sBGConfig uBGConfig[4]; +}; + +uniform int uCurBG; + +in vec2 vPosition; + +smooth out vec2 fTexcoord; + +void main() +{ + gl_Position = vec4((vPosition * 2) - 1, 0, 1); + fTexcoord = vPosition * vec2(uBGConfig[uCurBG].Size); +} diff --git a/src/OpenGL_shaders/2DSpriteFS.glsl b/src/OpenGL_shaders/2DSpriteFS.glsl new file mode 100644 index 0000000000..6bb0c4fd8b --- /dev/null +++ b/src/OpenGL_shaders/2DSpriteFS.glsl @@ -0,0 +1,150 @@ +#version 140 + +uniform sampler2D SpriteTex; +uniform sampler2DArray Capture128Tex; +uniform sampler2DArray Capture256Tex; + +struct sOAM +{ + ivec2 Position; + bvec2 Flip; + ivec2 Size; + ivec2 BoundSize; + int OBJMode; + int Type; + int PalOffset; + int TileOffset; + int TileStride; + int Rotscale; + int BGPrio; + bool Mosaic; +}; + +layout(std140) uniform ubSpriteConfig +{ + int uVRAMMask; + ivec4 uRotscale[32]; + sOAM uOAM[128]; +}; + +layout(std140) uniform ubSpriteScanlineConfig +{ + ivec4 uMosaicLine[48]; +}; + +uniform bool uRenderTransparent; + +flat in int fSpriteIndex; +smooth in vec2 fPosition; +smooth in vec2 fTexcoord; + +out vec4 oColor; +out vec4 oFlags; + +vec4 GetSpritePixel(int sprite, vec2 coord) +{ + ivec2 basecoord = ivec2((sprite & 0xF) * 64, (sprite >> 4) * 64); + + return texelFetch(SpriteTex, basecoord + ivec2(coord), 0); +} + +void main() +{ + vec4 col, flags = vec4(0); + vec2 coord = fTexcoord; + + if (uOAM[fSpriteIndex].Mosaic) + { + int line = int(fPosition.y); + int mosline = uMosaicLine[line>>2][line&0x3]; + + float ymin = 0; + if (uOAM[fSpriteIndex].Rotscale != -1) + ymin = -float(uOAM[fSpriteIndex].Size.y) / 2.0; + + float mosy = coord.y - (line - mosline); + if (coord.y >= ymin) + coord.y = max(mosy, ymin); + } + + if (uOAM[fSpriteIndex].Rotscale != -1) + { + // rotscale sprite + // fTexcoord is based on the sprite center + + vec2 sprsize = vec2(uOAM[fSpriteIndex].Size); + vec4 rotscale = vec4(uRotscale[uOAM[fSpriteIndex].Rotscale]) / 256; + mat2 rsmatrix = mat2(rotscale.xy, rotscale.zw); + coord = (coord * rsmatrix) + (sprsize / 2); + if (any(lessThan(coord, vec2(0)))) discard; + if (any(greaterThanEqual(coord, sprsize))) discard; + } + + if (uRenderTransparent) + { + // set BG priority and mosaic flags for transparent pixels + + if (uOAM[fSpriteIndex].Mosaic) + flags.g = 1; + + flags.a = float(uOAM[fSpriteIndex].BGPrio) / 255; + + oColor = vec4(0); + oFlags = flags; + return; + } + + if (uOAM[fSpriteIndex].Type == 3) + { + coord += (ivec2(uOAM[fSpriteIndex].TileOffset) >> ivec2(1, 8)); + coord *= (1.0/128.0); + col = texture(Capture256Tex, vec3(fract(coord), uOAM[fSpriteIndex].TileStride)); + } + else if (uOAM[fSpriteIndex].Type == 4) + { + coord += (ivec2(uOAM[fSpriteIndex].TileOffset) >> ivec2(1, 9)); + coord *= (1.0/256.0); + col = texture(Capture256Tex, vec3(fract(coord), uOAM[fSpriteIndex].TileStride)); + } + else + { + col = GetSpritePixel(fSpriteIndex, coord); + } + + if (col.a == 0) discard; + + // oFlags: + // r = sprite blending flag + // g = mosaic flag + // b = OBJ window flag + // a = BG prio + + if (uOAM[fSpriteIndex].OBJMode == 2) + { + // OBJ window + // OBJ mosaic doesn't apply to "OBJ window" sprites + flags.b = 1; + } + else + { + if (uOAM[fSpriteIndex].OBJMode == 1) + { + // semi-transparent sprite + flags.r = 1.0 / 255; + } + else if (uOAM[fSpriteIndex].OBJMode == 3) + { + // bitmap sprite + col.a = float(uOAM[fSpriteIndex].PalOffset) / 31; + flags.r = 2.0 / 255; + } + + if (uOAM[fSpriteIndex].Mosaic) + flags.g = 1; + + flags.a = float(uOAM[fSpriteIndex].BGPrio) / 255; + } + + oColor = col; + oFlags = flags; +} diff --git a/src/OpenGL_shaders/2DSpritePreFS.glsl b/src/OpenGL_shaders/2DSpritePreFS.glsl new file mode 100644 index 0000000000..82aa6888b3 --- /dev/null +++ b/src/OpenGL_shaders/2DSpritePreFS.glsl @@ -0,0 +1,119 @@ +#version 140 + +uniform usampler2D VRAMTex; +uniform sampler2D PalTex; + +struct sOAM +{ + ivec2 Position; + bvec2 Flip; + ivec2 Size; + ivec2 BoundSize; + int OBJMode; + int Type; + int PalOffset; + int TileOffset; + int TileStride; + int Rotscale; + int BGPrio; + bool Mosaic; +}; + +layout(std140) uniform ubSpriteConfig +{ + int uVRAMMask; + ivec4 uRotscale[32]; + sOAM uOAM[128]; +}; + +flat in int fSpriteIndex; +smooth in vec2 fTexcoord; + +out vec4 oColor; + +vec4 GetOBJPalEntry(int pal, int id) +{ + ivec2 coord = ivec2(id, pal); + vec4 col = texelFetch(PalTex, coord, 0); + col.rgb *= (62.0/63.0); + col.g += (col.a * 1.0/63.0); + return col; +} + +int VRAMRead8(int addr) +{ + ivec2 coord = ivec2(addr & 0x3FF, (addr >> 10) & uVRAMMask); + int val = int(texelFetch(VRAMTex, coord, 0).r); + return val; +} + +int VRAMRead16(int addr) +{ + ivec2 coord = ivec2(addr & 0x3FF, (addr >> 10) & uVRAMMask); + int lo = int(texelFetch(VRAMTex, coord, 0).r); + int hi = int(texelFetch(VRAMTex, coord+ivec2(1,0), 0).r); + return lo | (hi << 8); +} + +vec4 GetSpritePixel(int sprite, ivec2 coord) +{ + vec4 ret; + + if (uOAM[sprite].Type == 0) + { + // 16-color + + int tileoffset = uOAM[sprite].TileOffset + + ((coord.x >> 3) * 32) + + ((coord.y >> 3) * uOAM[sprite].TileStride) + + ((coord.x & 0x7) >> 1) + + ((coord.y & 0x7) << 2); + + int col = VRAMRead8(tileoffset); + if ((coord.x & 1) != 0) + col >>= 4; + else + col &= 0xF; + col += uOAM[sprite].PalOffset; + + ret = GetOBJPalEntry(0, col); + ret.a = ((col & 0xF) == 0) ? 0 : 1; + } + else if (uOAM[sprite].Type == 1) + { + // 256-color + + int tileoffset = uOAM[sprite].TileOffset + + ((coord.x >> 3) * 64) + + ((coord.y >> 3) * uOAM[sprite].TileStride) + + (coord.x & 0x7) + + ((coord.y & 0x7) << 3); + + int col = VRAMRead8(tileoffset); + + ret = GetOBJPalEntry(uOAM[sprite].PalOffset, col); + ret.a = (col == 0) ? 0 : 1; + } + else //if (uOAM[sprite].Type == 2) + { + // direct color bitmap + + int tileoffset = uOAM[sprite].TileOffset + + (coord.x * 2) + + (coord.y * uOAM[sprite].TileStride); + + int col = VRAMRead16(tileoffset); + + ret.r = float((col << 1) & 0x3E) / 63; + ret.g = float((col >> 4) & 0x3E) / 63; + ret.b = float((col >> 9) & 0x3E) / 63; + ret.a = float(col >> 15); + } + + return ret; +} + +void main() +{ + oColor = GetSpritePixel(fSpriteIndex, ivec2(fTexcoord)); +} diff --git a/src/OpenGL_shaders/2DSpritePreVS.glsl b/src/OpenGL_shaders/2DSpritePreVS.glsl new file mode 100644 index 0000000000..1eee5f633c --- /dev/null +++ b/src/OpenGL_shaders/2DSpritePreVS.glsl @@ -0,0 +1,42 @@ +#version 140 + +struct sOAM +{ + ivec2 Position; + bvec2 Flip; + ivec2 Size; + ivec2 BoundSize; + int OBJMode; + int Type; + int PalOffset; + int TileOffset; + int TileStride; + int Rotscale; + int BGPrio; + bool Mosaic; +}; + +layout(std140) uniform ubSpriteConfig +{ + int uVRAMMask; + ivec4 uRotscale[32]; + sOAM uOAM[128]; +}; + +in ivec2 vPosition; +in int vSpriteIndex; + +flat out int fSpriteIndex; +smooth out vec2 fTexcoord; + +void main() +{ + ivec2 sprpos = ivec2((vSpriteIndex & 0xF) * 64, (vSpriteIndex >> 4) * 64); + ivec2 sprsize = uOAM[vSpriteIndex].Size; + vec2 vtxpos = vec2(sprpos) + (vPosition * vec2(sprsize)); + vec2 fbsize = vec2(1024, 512); + + gl_Position = vec4(((vtxpos * 2) / fbsize) - 1, 0, 1); + fSpriteIndex = vSpriteIndex; + fTexcoord = vPosition * vec2(sprsize); +} diff --git a/src/OpenGL_shaders/2DSpriteVS.glsl b/src/OpenGL_shaders/2DSpriteVS.glsl new file mode 100644 index 0000000000..c72309a3f0 --- /dev/null +++ b/src/OpenGL_shaders/2DSpriteVS.glsl @@ -0,0 +1,52 @@ +#version 140 + +struct sOAM +{ + ivec2 Position; + bvec2 Flip; + ivec2 Size; + ivec2 BoundSize; + int OBJMode; + int Type; + int PalOffset; + int TileOffset; + int TileStride; + int Rotscale; + int BGPrio; + bool Mosaic; +}; + +layout(std140) uniform ubSpriteConfig +{ + int uVRAMMask; + ivec4 uRotscale[32]; + sOAM uOAM[128]; +}; + +in ivec2 vPosition; +in ivec2 vTexcoord; +in int vSpriteIndex; + +flat out int fSpriteIndex; +smooth out vec2 fPosition; +smooth out vec2 fTexcoord; + +void main() +{ + vec2 sprsize = vec2(uOAM[vSpriteIndex].BoundSize); + vec2 fbsize = vec2(256, 192); + + int totalprio = (uOAM[vSpriteIndex].BGPrio * 128) + vSpriteIndex; + float z = float(totalprio) / 512.0; + gl_Position = vec4(((vec2(vPosition) * 2) / fbsize) - 1, z, 1); + fPosition = vPosition; + fSpriteIndex = vSpriteIndex; + + if (uOAM[vSpriteIndex].Rotscale == -1) + { + vec2 tmp = vec2(vTexcoord) * sprsize; + fTexcoord = mix(tmp, (sprsize - tmp), uOAM[vSpriteIndex].Flip); + } + else + fTexcoord = (vec2(vTexcoord) * sprsize) - (sprsize / 2); +} diff --git a/src/OpenGL_shaders/3DClearBitmapFS.glsl b/src/OpenGL_shaders/3DClearBitmapFS.glsl new file mode 100644 index 0000000000..3b20fb826b --- /dev/null +++ b/src/OpenGL_shaders/3DClearBitmapFS.glsl @@ -0,0 +1,28 @@ +#version 140 + +uniform usampler2D ClearBitmapColor; +uniform usampler2D ClearBitmapDepth; + +uniform vec2 uClearBitmapOffset; +uniform uint uOpaquePolyID; + +smooth in vec2 fTexcoord; + +out vec4 oColor; +out vec4 oAttr; + +void main() +{ + vec2 pos = fTexcoord + uClearBitmapOffset; + + vec4 color = vec4(texture(ClearBitmapColor, pos)) / vec4(63,63,63,31); + uint depth = texture(ClearBitmapDepth, pos).r; + float fdepth = float(depth & 0xFFFFFFu) / 16777216.0; + + oColor = color; + oAttr.r = float(uOpaquePolyID) / 63.0; + oAttr.g = 0; + oAttr.b = float(depth >> 24); + oAttr.a = 1; + gl_FragDepth = fdepth; +} diff --git a/src/OpenGL_shaders/3DClearBitmapVS.glsl b/src/OpenGL_shaders/3DClearBitmapVS.glsl new file mode 100644 index 0000000000..db8e66bfec --- /dev/null +++ b/src/OpenGL_shaders/3DClearBitmapVS.glsl @@ -0,0 +1,11 @@ +#version 140 + +in vec2 vPosition; + +smooth out vec2 fTexcoord; + +void main() +{ + fTexcoord = (vPosition + 1.0) * vec2(0.5, 0.375); + gl_Position = vec4(vPosition, 0.0, 1.0); +} diff --git a/src/OpenGL_shaders/3DClearFS.glsl b/src/OpenGL_shaders/3DClearFS.glsl new file mode 100644 index 0000000000..1115fc5112 --- /dev/null +++ b/src/OpenGL_shaders/3DClearFS.glsl @@ -0,0 +1,17 @@ +#version 140 + +uniform uvec4 uColor; +uniform uint uOpaquePolyID; +uniform uint uFogFlag; + +out vec4 oColor; +out vec4 oAttr; + +void main() +{ + oColor = vec4(uColor).rgba / 31.0; + oAttr.r = float(uOpaquePolyID) / 63.0; + oAttr.g = 0; + oAttr.b = float(uFogFlag); + oAttr.a = 1; +} diff --git a/src/OpenGL_shaders/3DClearVS.glsl b/src/OpenGL_shaders/3DClearVS.glsl new file mode 100644 index 0000000000..926be547f8 --- /dev/null +++ b/src/OpenGL_shaders/3DClearVS.glsl @@ -0,0 +1,11 @@ +#version 140 + +in vec2 vPosition; + +uniform uint uDepth; + +void main() +{ + float fdepth = (float(uDepth) / 8388608.0) - 1.0; + gl_Position = vec4(vPosition, fdepth, 1.0); +} diff --git a/src/OpenGL_shaders/3DFinalPassEdgeFS.glsl b/src/OpenGL_shaders/3DFinalPassEdgeFS.glsl new file mode 100644 index 0000000000..d98b7e16d1 --- /dev/null +++ b/src/OpenGL_shaders/3DFinalPassEdgeFS.glsl @@ -0,0 +1,80 @@ +#version 140 + +uniform sampler2D DepthBuffer; +uniform sampler2D AttrBuffer; + +layout(std140) uniform uConfig +{ + vec2 uScreenSize; + int uDispCnt; + vec4 uToonColors[32]; + vec4 uEdgeColors[8]; + vec4 uFogColor; + float uFogDensity[34]; + int uFogOffset; + int uFogShift; +}; + +out vec4 oColor; + +// make up for crapo zbuffer precision +bool isless(float a, float b) +{ + return a < b; + + // a < b + float diff = a - b; + return diff < (256.0 / 16777216.0); +} + +bool isgood(vec4 attr, float depth, int refPolyID, float refDepth) +{ + int polyid = int(attr.r * 63.0); + + if (polyid != refPolyID && isless(refDepth, depth)) + return true; + + return false; +} + +void main() +{ + ivec2 coord = ivec2(gl_FragCoord.xy); + int scale = 1;//int(uScreenSize.x / 256); + + vec4 ret = vec4(0,0,0,0); + vec4 depth = texelFetch(DepthBuffer, coord, 0); + vec4 attr = texelFetch(AttrBuffer, coord, 0); + + int polyid = int(attr.r * 63.0); + + if (attr.g != 0) + { + vec4 depthU = texelFetch(DepthBuffer, coord + ivec2(0,-scale), 0); + vec4 attrU = texelFetch(AttrBuffer, coord + ivec2(0,-scale), 0); + vec4 depthD = texelFetch(DepthBuffer, coord + ivec2(0,scale), 0); + vec4 attrD = texelFetch(AttrBuffer, coord + ivec2(0,scale), 0); + vec4 depthL = texelFetch(DepthBuffer, coord + ivec2(-scale,0), 0); + vec4 attrL = texelFetch(AttrBuffer, coord + ivec2(-scale,0), 0); + vec4 depthR = texelFetch(DepthBuffer, coord + ivec2(scale,0), 0); + vec4 attrR = texelFetch(AttrBuffer, coord + ivec2(scale,0), 0); + + if (isgood(attrU, depthU.r, polyid, depth.r) || + isgood(attrD, depthD.r, polyid, depth.r) || + isgood(attrL, depthL.r, polyid, depth.r) || + isgood(attrR, depthR.r, polyid, depth.r)) + { + // mark this pixel! + + ret.rgb = uEdgeColors[polyid >> 3].rgb; + + // this isn't quite accurate, but it will have to do + if ((uDispCnt & (1<<4)) != 0) + ret.a = 0.5; + else + ret.a = 1; + } + } + + oColor = ret; +} diff --git a/src/OpenGL_shaders/3DFinalPassFogFS.glsl b/src/OpenGL_shaders/3DFinalPassFogFS.glsl new file mode 100644 index 0000000000..1c0e350c74 --- /dev/null +++ b/src/OpenGL_shaders/3DFinalPassFogFS.glsl @@ -0,0 +1,62 @@ +#version 140 + +uniform sampler2D DepthBuffer; +uniform sampler2D AttrBuffer; + +layout(std140) uniform uConfig +{ + vec2 uScreenSize; + int uDispCnt; + vec4 uToonColors[32]; + vec4 uEdgeColors[8]; + vec4 uFogColor; + float uFogDensity[34]; + int uFogOffset; + int uFogShift; +}; + +out vec4 oColor; + +vec4 CalculateFog(float depth) +{ + int idepth = int(depth * 16777216.0); + int densityid, densityfrac; + + if (idepth < uFogOffset) + { + densityid = 0; + densityfrac = 0; + } + else + { + uint udepth = uint(idepth); + udepth -= uint(uFogOffset); + udepth = (udepth >> 2) << uint(uFogShift); + + densityid = int(udepth >> 17); + if (densityid >= 32) + { + densityid = 32; + densityfrac = 0; + } + else + densityfrac = int(udepth & uint(0x1FFFF)); + } + + float density = mix(uFogDensity[densityid], uFogDensity[densityid+1], float(densityfrac)/131072.0); + + return vec4(density, density, density, density); +} + +void main() +{ + ivec2 coord = ivec2(gl_FragCoord.xy); + + vec4 ret = vec4(0,0,0,0); + vec4 depth = texelFetch(DepthBuffer, coord, 0); + vec4 attr = texelFetch(AttrBuffer, coord, 0); + + if (attr.b != 0) ret = CalculateFog(depth.r); + + oColor = ret; +} diff --git a/src/OpenGL_shaders/3DFinalPassVS.glsl b/src/OpenGL_shaders/3DFinalPassVS.glsl new file mode 100644 index 0000000000..248aa08cb4 --- /dev/null +++ b/src/OpenGL_shaders/3DFinalPassVS.glsl @@ -0,0 +1,9 @@ +#version 140 + +in vec2 vPosition; + +void main() +{ + // heh + gl_Position = vec4(vPosition, 0.0, 1.0); +} diff --git a/src/OpenGL_shaders/3DRenderFS.glsl b/src/OpenGL_shaders/3DRenderFS.glsl new file mode 100644 index 0000000000..cb7ad07e77 --- /dev/null +++ b/src/OpenGL_shaders/3DRenderFS.glsl @@ -0,0 +1,129 @@ +#version 140 + +uniform usampler2DArray CurTexture; +uniform sampler2DArray Capture128Texture; +uniform sampler2DArray Capture256Texture; + +layout(std140) uniform uConfig +{ + vec2 uScreenSize; + int uDispCnt; + vec4 uToonColors[32]; + vec4 uEdgeColors[8]; + vec4 uFogColor; + float uFogDensity[34]; + int uFogOffset; + int uFogShift; +}; + +uniform int uRenderMode; // 0=opaque 1=translucent 2=shadowmask + +smooth in vec4 fColor; +smooth in vec2 fTexcoord; +flat in ivec3 fPolygonAttr; + +#ifdef WBuffer +smooth in float fZ; +#endif + +out vec4 oColor; +out vec4 oAttr; + +vec4 FinalColor() +{ + vec4 col; + vec4 vcol = fColor; + int blendmode = (fPolygonAttr.x >> 4) & 0x3; + + if (blendmode == 2) + { + if ((uDispCnt & (1<<1)) == 0) + { + // toon + vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb; + vcol.rgb = tooncolor; + } + else + { + // highlight + vcol.rgb = vcol.rrr; + } + } + + if (fPolygonAttr.y == 0xFFFF) + { + // no texture + col = vcol; + } + else + { + vec3 texcoord = vec3(fTexcoord, fPolygonAttr.y); + vec4 tcol; + if (fPolygonAttr.z == 0) + tcol = vec4(texture(CurTexture, texcoord)) / vec4(63,63,63,31); + else if (fPolygonAttr.z == 1) + tcol = texture(Capture128Texture, texcoord); + else + tcol = texture(Capture256Texture, texcoord); + + if ((blendmode & 1) != 0) + { + // decal + col.rgb = (tcol.rgb * tcol.a) + (vcol.rgb * (1.0-tcol.a)); + col.a = vcol.a; + } + else + { + // modulate + col = vcol * tcol; + } + } + + if (blendmode == 2) + { + if ((uDispCnt & (1<<1)) != 0) + { + vec3 tooncolor = uToonColors[int(vcol.r * 31)].rgb; + col.rgb = min(col.rgb + tooncolor, 1.0); + } + } + + return col.rgba; +} + +void main() +{ + if (uRenderMode == 2) + { + oColor = vec4(0,0,0,1); + } + else + { + vec4 col = FinalColor(); + if (uRenderMode == 0) + { + // opaque pixels + if (col.a < 30.5/31) discard; + + oAttr.r = float((fPolygonAttr.x >> 24) & 0x3F) / 63.0; + oAttr.g = 0; + oAttr.b = float((fPolygonAttr.x >> 15) & 0x1); + oAttr.a = 1; + } + else + { + // translucent pixels + if (col.a < 0.5/31) discard; + if (col.a >= 30.5/31) discard; + + oAttr.b = 0; + oAttr.a = 1; + } + + oColor = col; + } + +#ifdef WBuffer + gl_FragDepth = fZ; +#endif +} diff --git a/src/OpenGL_shaders/3DRenderVS.glsl b/src/OpenGL_shaders/3DRenderVS.glsl new file mode 100644 index 0000000000..4942221a0a --- /dev/null +++ b/src/OpenGL_shaders/3DRenderVS.glsl @@ -0,0 +1,65 @@ +#version 140 + +layout(std140) uniform uConfig +{ + vec2 uScreenSize; + int uDispCnt; + vec4 uToonColors[32]; + vec4 uEdgeColors[8]; + vec4 uFogColor; + float uFogDensity[34]; + int uFogOffset; + int uFogShift; +}; + +in uvec4 vPosition; +in uvec4 vColor; +in ivec2 vTexcoord; +in ivec3 vPolygonAttr; + +smooth out vec4 fColor; +smooth out vec2 fTexcoord; +flat out ivec3 fPolygonAttr; + +#ifdef WBuffer +smooth out float fZ; +#endif + +void main() +{ + int attr = vPolygonAttr.x; + int zshift = (attr >> 16) & 0x1F; + + vec4 fpos; + fpos.xy = (((vec2(vPosition.xy) ) * 2.0) / uScreenSize) - 1.0; +#ifdef WBuffer + fZ = float(vPosition.z << zshift) / 16777216.0; + fpos.z = 0; +#else + fpos.z = (float(vPosition.z << zshift) / 8388608.0) - 1.0; +#endif + fpos.w = float(vPosition.w) / 65536.0f; + fpos.xyz *= fpos.w; + + int texwidth = vPolygonAttr.z & 0xFFFF; + int texheight = (vPolygonAttr.z >> 16) & 0xFFFF; + vec2 texfactor = 1.0 / (16 * vec2(texwidth, texheight)); + + vec2 texcoord = vec2(vTexcoord); + int capyoffset = vPolygonAttr.y >> 16; + int attrz = 0; + if (capyoffset != -1) + { + texcoord.y += capyoffset; + if (texwidth == 128) + attrz = 1; + else + attrz = 2; + } + + fColor = vec4(vColor) / vec4(255.0,255.0,255.0,31.0); + fTexcoord = texcoord * texfactor; + fPolygonAttr = ivec3(vPolygonAttr.x, vPolygonAttr.y & 0xFFFF, attrz); + + gl_Position = fpos; +} diff --git a/src/OpenGL_shaders/CMakeLists.txt b/src/OpenGL_shaders/CMakeLists.txt new file mode 100644 index 0000000000..381cb3fd57 --- /dev/null +++ b/src/OpenGL_shaders/CMakeLists.txt @@ -0,0 +1,47 @@ +set(OPENGL_SHADER_FILES + 2DLayerPreVS.glsl + 2DLayerPreFS.glsl + 2DSpritePreVS.glsl + 2DSpritePreFS.glsl + 2DSpriteVS.glsl + 2DSpriteFS.glsl + 2DCompositorVS.glsl + 2DCompositorFS.glsl + + FinalPassVS.glsl + FinalPassFS.glsl + CaptureVS.glsl + CaptureFS.glsl + CaptureDownscaleVS.glsl + CaptureDownscaleFS.glsl + + 3DClearVS.glsl + 3DClearFS.glsl + 3DClearBitmapVS.glsl + 3DClearBitmapFS.glsl + 3DRenderVS.glsl + 3DRenderFS.glsl + 3DFinalPassVS.glsl + 3DFinalPassEdgeFS.glsl + 3DFinalPassFogFS.glsl +) + +foreach(SH_INPUT ${OPENGL_SHADER_FILES}) + get_filename_component(SH_NAME ${SH_INPUT} NAME_WE) + set(SH_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${SH_NAME}.h") + add_custom_command( + OUTPUT ${SH_OUTPUT} + DEPENDS ${SH_INPUT} + COMMAND + ${CMAKE_COMMAND} + -D INPUT_FILE=${SH_INPUT} + -D OUTPUT_FILE=${SH_OUTPUT} + -D VAR_NAME=${SH_NAME} + -P "${CMAKE_SOURCE_DIR}/cmake/MakeEmbed.cmake" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + ) + list(APPEND OPENGL_SHADER_OUT ${SH_OUTPUT}) +endforeach() + +add_custom_target(OpenGL_shaders ALL DEPENDS ${OPENGL_SHADER_OUT}) +add_dependencies(core OpenGL_shaders) diff --git a/src/OpenGL_shaders/CaptureDownscaleFS.glsl b/src/OpenGL_shaders/CaptureDownscaleFS.glsl new file mode 100644 index 0000000000..19e8cbc6c6 --- /dev/null +++ b/src/OpenGL_shaders/CaptureDownscaleFS.glsl @@ -0,0 +1,15 @@ +#version 140 + +uniform sampler2DArray InputTex; +uniform int uInputLayer; + +smooth in vec2 fTexcoord; + +out vec4 oColor; + +void main() +{ + ivec4 col = ivec4(texture(InputTex, vec3(fTexcoord, uInputLayer)) * 255.0); + oColor.rgb = vec3(col.rgb >> 3) / 31.0; + oColor.a = (col.a>0) ? 1 : 0; +} diff --git a/src/OpenGL_shaders/CaptureDownscaleVS.glsl b/src/OpenGL_shaders/CaptureDownscaleVS.glsl new file mode 100644 index 0000000000..62ed543c2f --- /dev/null +++ b/src/OpenGL_shaders/CaptureDownscaleVS.glsl @@ -0,0 +1,11 @@ +#version 140 + +in vec2 vPosition; + +smooth out vec2 fTexcoord; + +void main() +{ + gl_Position = vec4((vPosition * 2) - 1, 0, 1); + fTexcoord = vPosition; +} diff --git a/src/OpenGL_shaders/CaptureFS.glsl b/src/OpenGL_shaders/CaptureFS.glsl new file mode 100644 index 0000000000..ba31f873ae --- /dev/null +++ b/src/OpenGL_shaders/CaptureFS.glsl @@ -0,0 +1,73 @@ +#version 140 + +uniform sampler2D InputTexA; +uniform sampler2DArray InputTexB; + +layout(std140) uniform ubCaptureConfig +{ + vec2 uInvCaptureSize; + int uSrcALayer; + int uSrcBLayer; + int uSrcBOffset; + int uDstMode; + ivec2 uBlendFactors; + vec4 uSrcAOffset[48]; + float uSrcBColorFactor; +}; + +smooth in vec4 fTexcoord; + +out vec4 oColor; + +float GetSrcAPos(float line) +{ + int iline = int(line); + float a = uSrcAOffset[iline>>2][iline&0x3]; + iline++; + float b = uSrcAOffset[iline>>2][iline&0x3]; + return mix(a, b, fract(line)); +} + +void main() +{ + vec2 coordA = fTexcoord.xy; + vec3 coordB = vec3(fTexcoord.xw, uSrcBLayer); + ivec4 cap_out; + + // apply scroll for 3D layer, if we're capturing that + if (uSrcALayer == 1) + { + int line = int(fTexcoord.z); + coordA.x += uSrcAOffset[line>>2][line&0x3]; + //coordA.x += GetSrcAPos(fTexcoord.z); + } + + if (uDstMode == 0) + { + // source A only + cap_out = ivec4(texture(InputTexA, coordA) * 255.0); + } + else if (uDstMode == 1) + { + // source B only + cap_out = ivec4(texture(InputTexB, coordB) * uSrcBColorFactor); + } + else + { + // sources A and B blended + ivec4 srcA = ivec4(texture(InputTexA, coordA) * 255.0) >> 3; + ivec4 srcB = ivec4(texture(InputTexB, coordB) * uSrcBColorFactor) >> 3; + + int eva = uBlendFactors[0]; + int evb = uBlendFactors[1]; + + int aa = (srcA.a > 0) ? 1 : 0; + int ab = (srcB.a > 0) ? 1 : 0; + + cap_out.rgb = ((srcA.rgb * aa * eva) + (srcB.rgb * ab * evb) + 0x8) >> 4; + cap_out.rgb = min(cap_out.rgb, 0x1F) << 3; + cap_out.a = (eva>0 ? aa : 0) | (evb>0 ? ab : 0); + } + + oColor = vec4(vec3(cap_out.rgb) / 255.0, (cap_out.a>0) ? 1.0 : 0.0); +} diff --git a/src/OpenGL_shaders/CaptureVS.glsl b/src/OpenGL_shaders/CaptureVS.glsl new file mode 100644 index 0000000000..e6564597a0 --- /dev/null +++ b/src/OpenGL_shaders/CaptureVS.glsl @@ -0,0 +1,27 @@ +#version 140 + +layout(std140) uniform ubCaptureConfig +{ + vec2 uInvCaptureSize; + int uSrcALayer; + int uSrcBLayer; + int uSrcBOffset; + int uDstMode; + ivec2 uBlendFactors; + vec4 uSrcAOffset[48]; + float uSrcBColorFactor; +}; + +in ivec2 vPosition; +in ivec2 vTexcoord; + +smooth out vec4 fTexcoord; + +void main() +{ + vec2 pos = vec2(vPosition) * uInvCaptureSize.xx; + gl_Position = vec4((pos * 2) - 1, 0, 1); + fTexcoord.xy = vec2(vTexcoord) / vec2(256,192); + fTexcoord.z = vTexcoord.y; + fTexcoord.w = float(vTexcoord.y + uSrcBOffset) / 256.0; +} diff --git a/src/OpenGL_shaders/FinalPassFS.glsl b/src/OpenGL_shaders/FinalPassFS.glsl new file mode 100644 index 0000000000..bfbb2d933f --- /dev/null +++ b/src/OpenGL_shaders/FinalPassFS.glsl @@ -0,0 +1,97 @@ +#version 140 + +uniform sampler2D MainInputTexA; +uniform sampler2D MainInputTexB; +uniform sampler2DArray AuxInputTex; + +layout(std140) uniform ubFinalPassConfig +{ + bvec4 uScreenSwap[48]; // one bool per scanline + int uScaleFactor; + int uAuxLayer; + int uDispModeA; + int uDispModeB; + int uBrightModeA; + int uBrightModeB; + int uBrightFactorA; + int uBrightFactorB; + float uAuxColorFactor; +}; + +smooth in vec3 fTexcoord; + +out vec4 oTopColor; +out vec4 oBottomColor; + +ivec3 MasterBrightness(ivec3 color, int brightmode, int evy) +{ + if (brightmode == 1) + { + // up + color += (((0x3F - color) * evy) >> 4); + } + else if (brightmode == 2) + { + // down + color -= (((color * evy) + 0xF) >> 4); + } + + return color; +} + +void main() +{ + ivec4 col_main = ivec4(texture(MainInputTexA, fTexcoord.xy, 0) * 255.0) >> 2; + ivec4 col_sub = ivec4(texture(MainInputTexB, fTexcoord.xy, 0) * 255.0) >> 2; + + ivec3 output_main, output_sub; + + if (uDispModeA == 0) + { + // screen disabled (white) + output_main = ivec3(63, 63, 63); + } + else if (uDispModeA == 1) + { + // BG/OBJ layers + output_main = col_main.rgb; + } + else + { + // VRAM display / mainmem FIFO + output_main = ivec3(texture(AuxInputTex, vec3(fTexcoord.xz, uAuxLayer)).rgb * uAuxColorFactor); + } + + if (uDispModeB == 0) + { + // screen disabled (white) + output_sub = ivec3(63, 63, 63); + } + else + { + // BG/OBJ layers + output_sub = col_sub.rgb; + } + + if (uDispModeA != 0) + output_main = MasterBrightness(output_main, uBrightModeA, uBrightFactorA); + if (uDispModeB != 0) + output_sub = MasterBrightness(output_sub, uBrightModeB, uBrightFactorB); + + output_main = (output_main << 2) | (output_main >> 6); + output_sub = (output_sub << 2) | (output_sub >> 6); + + int line = int(fTexcoord.y * 192); + bool swapbit = uScreenSwap[line>>2][line&0x3]; + + if (!swapbit) + { + oTopColor = vec4(vec3(output_sub) / 255.0, 1.0); + oBottomColor = vec4(vec3(output_main) / 255.0, 1.0); + } + else + { + oTopColor = vec4(vec3(output_main) / 255.0, 1.0); + oBottomColor = vec4(vec3(output_sub) / 255.0, 1.0); + } +} diff --git a/src/OpenGL_shaders/FinalPassVS.glsl b/src/OpenGL_shaders/FinalPassVS.glsl new file mode 100644 index 0000000000..230a917f1f --- /dev/null +++ b/src/OpenGL_shaders/FinalPassVS.glsl @@ -0,0 +1,25 @@ +#version 140 + +layout(std140) uniform ubFinalPassConfig +{ + bvec4 uScreenSwap[48]; // one bool per scanline + int uScaleFactor; + int uAuxLayer; + int uDispModeA; + int uDispModeB; + int uBrightModeA; + int uBrightModeB; + int uBrightFactorA; + int uBrightFactorB; + float uAuxColorFactor; +}; + +in vec2 vPosition; + +smooth out vec3 fTexcoord; + +void main() +{ + gl_Position = vec4(vPosition, 0, 1); + fTexcoord = (vPosition.xyy + 1) * vec3(0.5, 0.5, 0.375); +} diff --git a/src/Savestate.cpp b/src/Savestate.cpp index 61261e6a3c..a1411911f2 100644 --- a/src/Savestate.cpp +++ b/src/Savestate.cpp @@ -191,6 +191,21 @@ void Savestate::Section(const char* magic) } } +void Savestate::VarBool(bool* var) +{ + if (Saving) + { + u8 val = (u8)*var; + Var8(&val); + } + else + { + u8 val; + Var8(&val); + *var = val != 0; + } +} + void Savestate::Bool32(bool* var) { // for compatibility diff --git a/src/Savestate.h b/src/Savestate.h index f046e43cfe..c80df283cd 100644 --- a/src/Savestate.h +++ b/src/Savestate.h @@ -24,7 +24,7 @@ #include #include "types.h" -#define SAVESTATE_MAJOR 13 +#define SAVESTATE_MAJOR 14 #define SAVESTATE_MINOR 0 // bitmask for the savestate config word @@ -73,7 +73,8 @@ class Savestate VarArray(var, sizeof(*var)); } - void Bool32(bool* var); + void VarBool(bool* var); + void Bool32(bool* var); // backwards compatibility (TODO remove) void VarArray(void* data, u32 len); diff --git a/src/frontend/qt_sdl/EmuInstance.cpp b/src/frontend/qt_sdl/EmuInstance.cpp index d6d662fa93..ddbbc8ca35 100755 --- a/src/frontend/qt_sdl/EmuInstance.cpp +++ b/src/frontend/qt_sdl/EmuInstance.cpp @@ -418,12 +418,13 @@ void EmuInstance::releaseGL() } } -void EmuInstance::drawScreenGL() + +void EmuInstance::drawScreen() { for (int i = 0; i < kMaxWindows; i++) { if (windowList[i]) - windowList[i]->drawScreenGL(); + windowList[i]->drawScreen(); } } @@ -1371,7 +1372,7 @@ bool EmuInstance::updateConsole() noexcept nds->Reset(); loadRTCData(); - //emuThread->updateVideoRenderer(); // not actually needed? + emuThread->updateVideoRenderer(); } else { diff --git a/src/frontend/qt_sdl/EmuInstance.h b/src/frontend/qt_sdl/EmuInstance.h index 01ea2a5c44..9fb55884e0 100755 --- a/src/frontend/qt_sdl/EmuInstance.h +++ b/src/frontend/qt_sdl/EmuInstance.h @@ -122,7 +122,8 @@ class EmuInstance void setVSyncGL(bool vsync); void makeCurrentGL(); void releaseGL(); - void drawScreenGL(); + + void drawScreen(); // return: empty string = setup OK, non-empty = error message QString verifySetup(); diff --git a/src/frontend/qt_sdl/EmuThread.cpp b/src/frontend/qt_sdl/EmuThread.cpp index 399a047c21..ffbb230aa0 100755 --- a/src/frontend/qt_sdl/EmuThread.cpp +++ b/src/frontend/qt_sdl/EmuThread.cpp @@ -48,9 +48,8 @@ #include "RTC.h" #include "DSi.h" #include "DSi_I2C.h" -#include "GPU3D_Soft.h" -#include "GPU3D_OpenGL.h" -#include "GPU3D_Compute.h" +#include "GPU_Soft.h" +#include "GPU_OpenGL.h" #include "Savestate.h" @@ -299,7 +298,7 @@ void EmuThread::run() // emulate u32 nlines; - if (emuInstance->nds->GPU.GetRenderer3D().NeedsShaderCompile()) + if (emuInstance->nds->GPU.GetRenderer().NeedsShaderCompile()) { compileShaders(); nlines = 1; @@ -318,17 +317,7 @@ void EmuThread::run() if (emuInstance->firmwareSave) emuInstance->firmwareSave->CheckFlush(); - if (!useOpenGL) - { - frontBufferLock.lock(); - frontBuffer = emuInstance->nds->GPU.FrontBuffer; - frontBufferLock.unlock(); - } - else - { - frontBuffer = emuInstance->nds->GPU.FrontBuffer; - emuInstance->drawScreenGL(); - } + emuInstance->drawScreen(); #ifdef MELONCAP MelonCap::Update(); @@ -447,10 +436,7 @@ void EmuThread::run() SDL_Delay(75); - if (useOpenGL) - { - emuInstance->drawScreenGL(); - } + emuInstance->drawScreen(); } handleMessages(); @@ -868,18 +854,20 @@ void EmuThread::enableCheats(bool enable) void EmuThread::updateRenderer() { + auto nds = emuInstance->nds; + if (videoRenderer != lastVideoRenderer) { switch (videoRenderer) { case renderer3D_Software: - emuInstance->nds->GPU.SetRenderer3D(std::make_unique()); + nds->SetRenderer(std::make_unique(*nds)); break; case renderer3D_OpenGL: - emuInstance->nds->GPU.SetRenderer3D(GLRenderer::New()); + nds->SetRenderer(std::make_unique(*nds, false)); break; case renderer3D_OpenGLCompute: - emuInstance->nds->GPU.SetRenderer3D(ComputeRenderer::New()); + nds->SetRenderer(std::make_unique(*nds, true)); break; default: __builtin_unreachable(); } @@ -887,37 +875,28 @@ void EmuThread::updateRenderer() lastVideoRenderer = videoRenderer; auto& cfg = emuInstance->getGlobalConfig(); - switch (videoRenderer) - { - case renderer3D_Software: - static_cast(emuInstance->nds->GPU.GetRenderer3D()).SetThreaded( - cfg.GetBool("3D.Soft.Threaded"), - emuInstance->nds->GPU); - break; - case renderer3D_OpenGL: - static_cast(emuInstance->nds->GPU.GetRenderer3D()).SetRenderSettings( - cfg.GetBool("3D.GL.BetterPolygons"), - cfg.GetInt("3D.GL.ScaleFactor")); - break; - case renderer3D_OpenGLCompute: - static_cast(emuInstance->nds->GPU.GetRenderer3D()).SetRenderSettings( - cfg.GetInt("3D.GL.ScaleFactor"), - cfg.GetBool("3D.GL.HiresCoordinates")); - break; - default: __builtin_unreachable(); - } + melonDS::RendererSettings settings = { + .ScaleFactor = cfg.GetInt("3D.GL.ScaleFactor"), + .Threaded = cfg.GetBool("3D.Soft.Threaded"), + .HiresCoordinates = cfg.GetBool("3D.GL.HiresCoordinates"), + .BetterPolygons = cfg.GetBool("3D.GL.BetterPolygons") + }; + + nds->GetRenderer().SetRenderSettings(settings); } void EmuThread::compileShaders() { + auto& renderer = emuInstance->nds->GPU.GetRenderer(); int currentShader, shadersCount; u64 startTime = SDL_GetPerformanceCounter(); // kind of hacky to look at the wallclock, though it is easier than // than disabling vsync do { - emuInstance->nds->GPU.GetRenderer3D().ShaderCompileStep(currentShader, shadersCount); - } while (emuInstance->nds->GPU.GetRenderer3D().NeedsShaderCompile() && + renderer.ShaderCompileStep(currentShader, shadersCount); + } + while (renderer.NeedsShaderCompile() && (SDL_GetPerformanceCounter() - startTime) * perfCountsSec < 1.0 / 6.0); emuInstance->osdAddMessage(0, "Compiling shader %d/%d", currentShader+1, shadersCount); } diff --git a/src/frontend/qt_sdl/EmuThread.h b/src/frontend/qt_sdl/EmuThread.h index 7a7f060052..036fc29206 100755 --- a/src/frontend/qt_sdl/EmuThread.h +++ b/src/frontend/qt_sdl/EmuThread.h @@ -137,9 +137,6 @@ class EmuThread : public QThread void updateVideoSettings() { videoSettingsDirty = true; } void updateVideoRenderer() { videoSettingsDirty = true; lastVideoRenderer = -1; } - int frontBuffer = 0; - QMutex frontBufferLock; - QWaitCondition glBorrowCond; QMutex glBorrowMutex; diff --git a/src/frontend/qt_sdl/Screen.cpp b/src/frontend/qt_sdl/Screen.cpp index 3126ea8fde..f222aca927 100755 --- a/src/frontend/qt_sdl/Screen.cpp +++ b/src/frontend/qt_sdl/Screen.cpp @@ -752,6 +752,8 @@ void ScreenPanel::calcSplashLayout() ScreenPanelNative::ScreenPanelNative(QWidget* parent) : ScreenPanel(parent) { + hasBuffers = false; + screen[0] = QImage(256, 192, QImage::Format_RGB32); screen[1] = QImage(256, 192, QImage::Format_RGB32); @@ -776,6 +778,23 @@ void ScreenPanelNative::setupScreenLayout() } } +void ScreenPanelNative::drawScreen() +{ + auto emuThread = emuInstance->getEmuThread(); + if (!emuThread->emuIsActive()) + { + hasBuffers = false; + return; + } + + auto nds = emuInstance->getNDS(); + assert(nds != nullptr); + + bufferLock.lock(); + hasBuffers = nds->GPU.GetFramebuffers(&topBuffer, &bottomBuffer); + bufferLock.unlock(); +} + void ScreenPanelNative::paintEvent(QPaintEvent* event) { QPainter painter(this); @@ -784,24 +803,18 @@ void ScreenPanelNative::paintEvent(QPaintEvent* event) painter.fillRect(event->rect(), QColor::fromRgb(0, 0, 0)); auto emuThread = emuInstance->getEmuThread(); - + if (emuThread->emuIsActive()) { emuInstance->renderLock.lock(); - auto nds = emuInstance->getNDS(); - assert(nds != nullptr); - emuThread->frontBufferLock.lock(); - int frontbuf = emuThread->frontBuffer; - if (!nds->GPU.Framebuffer[frontbuf][0] || !nds->GPU.Framebuffer[frontbuf][1]) + bufferLock.lock(); + if (hasBuffers) { - emuThread->frontBufferLock.unlock(); - return; + memcpy(screen[0].scanLine(0), topBuffer, 256 * 192 * 4); + memcpy(screen[1].scanLine(0), bottomBuffer, 256 * 192 * 4); } - - memcpy(screen[0].scanLine(0), nds->GPU.Framebuffer[frontbuf][0].get(), 256 * 192 * 4); - memcpy(screen[1].scanLine(0), nds->GPU.Framebuffer[frontbuf][1].get(), 256 * 192 * 4); - emuThread->frontBufferLock.unlock(); + bufferLock.unlock(); QRect screenrc(0, 0, 256, 192); @@ -917,31 +930,27 @@ void ScreenPanelGL::initOpenGL() {{"oColor", 0}}); glUseProgram(screenShaderProgram); - glUniform1i(glGetUniformLocation(screenShaderProgram, "ScreenTex"), 0); + glUniform1i(glGetUniformLocation(screenShaderProgram, "TopScreenTex"), 0); + glUniform1i(glGetUniformLocation(screenShaderProgram, "BottomScreenTex"), 1); screenShaderScreenSizeULoc = glGetUniformLocation(screenShaderProgram, "uScreenSize"); screenShaderTransformULoc = glGetUniformLocation(screenShaderProgram, "uTransform"); - // to prevent bleeding between both parts of the screen - // with bilinear filtering enabled - const int paddedHeight = 192*2+2; - const float padPixels = 1.f / paddedHeight; - const float vertices[] = { - 0.f, 0.f, 0.f, 0.f, - 0.f, 192.f, 0.f, 0.5f - padPixels, - 256.f, 192.f, 1.f, 0.5f - padPixels, - 0.f, 0.f, 0.f, 0.f, - 256.f, 192.f, 1.f, 0.5f - padPixels, - 256.f, 0.f, 1.f, 0.f, - - 0.f, 0.f, 0.f, 0.5f + padPixels, - 0.f, 192.f, 0.f, 1.f, - 256.f, 192.f, 1.f, 1.f, - 0.f, 0.f, 0.f, 0.5f + padPixels, - 256.f, 192.f, 1.f, 1.f, - 256.f, 0.f, 1.f, 0.5f + padPixels + 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 192.f, 0.f, 1.f, 0.f, + 256.f, 192.f, 1.f, 1.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, + 256.f, 192.f, 1.f, 1.f, 0.f, + 256.f, 0.f, 1.f, 0.f, 0.f, + + 0.f, 0.f, 0.f, 0.f, 1.f, + 0.f, 192.f, 0.f, 1.f, 1.f, + 256.f, 192.f, 1.f, 1.f, 1.f, + 0.f, 0.f, 0.f, 0.f, 1.f, + 256.f, 192.f, 1.f, 1.f, 1.f, + 256.f, 0.f, 1.f, 0.f, 1.f }; glGenBuffers(1, &screenVertexBuffer); @@ -951,22 +960,18 @@ void ScreenPanelGL::initOpenGL() glGenVertexArrays(1, &screenVertexArray); glBindVertexArray(screenVertexArray); glEnableVertexAttribArray(0); // position - glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 4*4, (void*)(0)); + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 5*4, (void*)(0)); glEnableVertexAttribArray(1); // texcoord - glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 4*4, (void*)(2*4)); + glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 5*4, (void*)(2*4)); glGenTextures(1, &screenTexture); glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, screenTexture); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 256, paddedHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - // fill the padding - u8 zeroData[256*4*4]; - memset(zeroData, 0, sizeof(zeroData)); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256, 2, GL_RGBA, GL_UNSIGNED_BYTE, zeroData); + glBindTexture(GL_TEXTURE_2D_ARRAY, screenTexture); + glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA, 256, 192, 2, 0, GL_BGRA, GL_UNSIGNED_BYTE, nullptr); OpenGL::CompileVertexFragmentProgram(osdShader, @@ -1096,7 +1101,7 @@ void ScreenPanelGL::osdDeleteItem(OSDItem* item) ScreenPanel::osdDeleteItem(item); } -void ScreenPanelGL::drawScreenGL() +void ScreenPanelGL::drawScreen() { if (!glContext) return; @@ -1110,10 +1115,11 @@ void ScreenPanelGL::drawScreenGL() glBindFramebuffer(GL_FRAMEBUFFER, 0); glDisable(GL_DEPTH_TEST); - glDepthMask(false); + glDepthMask(GL_FALSE); glDisable(GL_BLEND); glDisable(GL_SCISSOR_TEST); glDisable(GL_STENCIL_TEST); + glClearColor(0, 0, 0, 1); glClear(GL_COLOR_BUFFER_BIT); glViewport(0, 0, w, h); @@ -1125,34 +1131,33 @@ void ScreenPanelGL::drawScreenGL() glUseProgram(screenShaderProgram); glUniform2f(screenShaderScreenSizeULoc, w / factor, h / factor); - int frontbuf = emuThread->frontBuffer; - glActiveTexture(GL_TEXTURE0); - -#ifdef OGLRENDERER_ENABLED - if (nds->GPU.GetRenderer3D().Accelerated) + void* topbuf; void* bottombuf; + if (nds->GPU.GetFramebuffers(&topbuf, &bottombuf)) { - // hardware-accelerated render - nds->GPU.GetRenderer3D().BindOutputTexture(frontbuf); - } else -#endif + // if we're doing a regular render, use the provided framebuffers + // otherwise, GetFramebuffers() will set up the required state + + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D_ARRAY, screenTexture); + + glTexSubImage3D(GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, 256, 192, 1, GL_BGRA, + GL_UNSIGNED_BYTE, topbuf); + glTexSubImage3D(GL_TEXTURE_2D_ARRAY, 0, 0, 0, 1, 256, 192, 1, GL_BGRA, + GL_UNSIGNED_BYTE, bottombuf); + } + else { - // regular render - glBindTexture(GL_TEXTURE_2D, screenTexture); + GLuint texid = *(GLuint*)topbuf; - if (nds->GPU.Framebuffer[frontbuf][0] && nds->GPU.Framebuffer[frontbuf][1]) - { - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 192, GL_RGBA, - GL_UNSIGNED_BYTE, nds->GPU.Framebuffer[frontbuf][0].get()); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192 + 2, 256, 192, GL_RGBA, - GL_UNSIGNED_BYTE, nds->GPU.Framebuffer[frontbuf][1].get()); - } + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D_ARRAY, texid); } screenSettingsLock.lock(); GLint filter = this->filter ? GL_LINEAR : GL_NEAREST; - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter); + glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, filter); + glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, filter); glBindBuffer(GL_ARRAY_BUFFER, screenVertexBuffer); glBindVertexArray(screenVertexArray); diff --git a/src/frontend/qt_sdl/Screen.h b/src/frontend/qt_sdl/Screen.h index d7f54797bb..04ae3e456a 100755 --- a/src/frontend/qt_sdl/Screen.h +++ b/src/frontend/qt_sdl/Screen.h @@ -70,6 +70,8 @@ class ScreenPanel : public QWidget void osdSetEnabled(bool enabled); void osdAddMessage(unsigned int color, const char* msg); + virtual void drawScreen() {}// = 0; + private slots: void onScreenLayoutChanged(); void onAutoScreenSizingChanged(int sizing); @@ -161,12 +163,19 @@ class ScreenPanelNative : public ScreenPanel explicit ScreenPanelNative(QWidget* parent); virtual ~ScreenPanelNative(); + void drawScreen() override; + protected: void paintEvent(QPaintEvent* event) override; private: void setupScreenLayout() override; + QMutex bufferLock; + bool hasBuffers; + void* topBuffer; + void* bottomBuffer; + QImage screen[2]; QTransform screenTrans[kMaxScreenTransforms]; }; @@ -190,7 +199,8 @@ class ScreenPanelGL : public ScreenPanel void deinitOpenGL(); void makeCurrentGL(); void releaseGL(); - void drawScreenGL(); + + void drawScreen() override; GL::Context* getContext() { return glContext.get(); } diff --git a/src/frontend/qt_sdl/VideoSettingsDialog.ui b/src/frontend/qt_sdl/VideoSettingsDialog.ui index ff9baf8ff7..444a59f783 100644 --- a/src/frontend/qt_sdl/VideoSettingsDialog.ui +++ b/src/frontend/qt_sdl/VideoSettingsDialog.ui @@ -6,8 +6,8 @@ 0 0 - 427 - 262 + 514 + 316 @@ -21,7 +21,7 @@ - QLayout::SetFixedSize + QLayout::SizeConstraint::SetFixedSize 6 @@ -88,14 +88,14 @@ - Qt::Horizontal + Qt::Orientation::Horizontal - QDialogButtonBox::Cancel|QDialogButtonBox::Ok + QDialogButtonBox::StandardButton::Cancel|QDialogButtonBox::StandardButton::Ok - + Display settings @@ -127,10 +127,10 @@ - Qt::Vertical + Qt::Orientation::Vertical - QSizePolicy::Fixed + QSizePolicy::Policy::Fixed @@ -189,7 +189,7 @@ - 3D renderer: + Renderer: diff --git a/src/frontend/qt_sdl/Window.cpp b/src/frontend/qt_sdl/Window.cpp index afbfda1cfa..3071947d66 100755 --- a/src/frontend/qt_sdl/Window.cpp +++ b/src/frontend/qt_sdl/Window.cpp @@ -858,8 +858,9 @@ void MainWindow::closeEvent(QCloseEvent* event) void MainWindow::createScreenPanel() { - if (panel) delete panel; + auto oldpanel = panel; panel = nullptr; + if (oldpanel) delete oldpanel; hasOGL = globalCfg.GetBool("Screen.UseGL") || (globalCfg.GetInt("3D.Renderer") != renderer3D_Software); @@ -962,13 +963,10 @@ void MainWindow::releaseGL() return glpanel->releaseGL(); } -void MainWindow::drawScreenGL() +void MainWindow::drawScreen() { - if (!hasOGL) return; - - ScreenPanelGL* glpanel = static_cast(panel); - if (!glpanel) return; - return glpanel->drawScreenGL(); + if (!panel) return; + return panel->drawScreen(); } void MainWindow::keyPressEvent(QKeyEvent* event) diff --git a/src/frontend/qt_sdl/Window.h b/src/frontend/qt_sdl/Window.h index 678be7c9e9..638d1f515d 100755 --- a/src/frontend/qt_sdl/Window.h +++ b/src/frontend/qt_sdl/Window.h @@ -43,63 +43,6 @@ class EmuThread; const int kMaxRecentROMs = 10; -/* -class WindowBase : public QMainWindow -{ - Q_OBJECT - -public: - explicit WindowBase(QWidget* parent = nullptr); - ~WindowBase(); - - bool hasOGL; - GL::Context* getOGLContext(); - - //void onAppStateChanged(Qt::ApplicationState state); - -protected: - void resizeEvent(QResizeEvent* event) override; - void changeEvent(QEvent* event) override; - - void keyPressEvent(QKeyEvent* event) override; - void keyReleaseEvent(QKeyEvent* event) override; - - void dragEnterEvent(QDragEnterEvent* event) override; - void dropEvent(QDropEvent* event) override; - - void focusInEvent(QFocusEvent* event) override; - void focusOutEvent(QFocusEvent* event) override; - -signals: - void screenLayoutChange(); - -private slots: - //void onQuit(); - - //void onTitleUpdate(QString title); - - //void onEmuStart(); - //void onEmuStop(); - - //void onUpdateVideoSettings(bool glchange); - - void onFullscreenToggled(); - void onScreenEmphasisToggled(); - -private: - virtual void closeEvent(QCloseEvent* event) override; - - void createScreenPanel(); - - //bool pausedManually = false; - - int oldW, oldH; - bool oldMax; - -public: - ScreenPanel* panel; -};*/ - class MainWindow : public QMainWindow { Q_OBJECT @@ -125,7 +68,8 @@ class MainWindow : public QMainWindow void setGLSwapInterval(int intv); void makeCurrentGL(); void releaseGL(); - void drawScreenGL(); + + void drawScreen(); bool preloadROMs(QStringList file, QStringList gbafile, bool boot); QStringList splitArchivePath(const QString& filename, bool useMemberSyntax); diff --git a/src/frontend/qt_sdl/main_shaders.h b/src/frontend/qt_sdl/main_shaders.h index 613d9138cf..43dbfdb76f 100644 --- a/src/frontend/qt_sdl/main_shaders.h +++ b/src/frontend/qt_sdl/main_shaders.h @@ -25,9 +25,9 @@ uniform vec2 uScreenSize; uniform mat2x3 uTransform; in vec2 vPosition; -in vec2 vTexcoord; +in vec3 vTexcoord; -smooth out vec2 fTexcoord; +smooth out vec3 fTexcoord; void main() { @@ -47,9 +47,9 @@ void main() const char* kScreenFS = R"(#version 140 -uniform sampler2D ScreenTex; +uniform sampler2DArray ScreenTex; -smooth in vec2 fTexcoord; +smooth in vec3 fTexcoord; out vec4 oColor; @@ -57,7 +57,7 @@ void main() { vec4 pixel = texture(ScreenTex, fTexcoord); - oColor = vec4(pixel.bgr, 1.0); + oColor = vec4(pixel.rgb, 1.0); } )";