Skip to content

Commit 5a30082

Browse files
committed
fix(ai_denoise): probe Vulkan loader before NCNN init (refs #30, #31)
NCNN's create_gpu_instance() dispatches through function pointers that its internal simplevk fills only when libvulkan can be dlopened. On macOS without MoltenVK / LunarG SDK installed, dlopen fails silently and the next call dereferences uninitialised pointers, crashing with SIGBUS in the .data segment (instruction abort, as reported in #31) after three failed dlopen attempts (the symptom reported in #30). Add a pre-flight platform-specific dlopen probe matching NCNN's own loader names: vulkan-1.dll on Windows, libvulkan.1.dylib / libvulkan.dylib on macOS, libvulkan.so.1 / libvulkan.so on Linux. If the probe fails, skip create_gpu_instance() entirely and route the denoiser through the CPU code path. macOS gets a tailored log message naming MoltenVK / LunarG so users have a clear remediation. CI: add scripts/ci_smoke_ai_denoise.sh -- generates a 512x512 PNG and runs the binary with --force --region br:auto --denoise ai through end-to-end. Wire it into all six build jobs (full + gwt-mini, x windows/linux/macos) after the existing --version smoke test. This exercises NcnnDenoiser::initialize() and would have caught the issues on the macOS runner immediately.
1 parent 389880d commit 5a30082

3 files changed

Lines changed: 157 additions & 8 deletions

File tree

.github/workflows/build.yml

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,13 @@ jobs:
5252
cmake --preset windows-x64-Release
5353
cmake --build --preset windows-x64-Release
5454
55-
- name: Test binary
55+
- name: Test binary (--version)
5656
run: ./out/build/windows-x64-Release/GeminiWatermarkTool.exe --version
5757

58+
- name: Test binary (AI denoise pipeline)
59+
shell: bash
60+
run: bash scripts/ci_smoke_ai_denoise.sh ./out/build/windows-x64-Release/GeminiWatermarkTool.exe
61+
5862
- name: Upload artifact
5963
uses: actions/upload-artifact@v4
6064
with:
@@ -103,9 +107,12 @@ jobs:
103107
cmake --preset linux-x64-Release
104108
cmake --build --preset linux-x64-Release
105109
106-
- name: Test binary
110+
- name: Test binary (--version)
107111
run: ./out/build/linux-x64-Release/GeminiWatermarkTool --version
108112

113+
- name: Test binary (AI denoise pipeline)
114+
run: bash scripts/ci_smoke_ai_denoise.sh ./out/build/linux-x64-Release/GeminiWatermarkTool
115+
109116
- name: Upload artifact
110117
uses: actions/upload-artifact@v4
111118
with:
@@ -169,9 +176,12 @@ jobs:
169176
mv dist/GeminiWatermarkTool .
170177
rmdir dist
171178
172-
- name: Test binary
179+
- name: Test binary (--version)
173180
run: ./GeminiWatermarkTool --version
174181

182+
- name: Test binary (AI denoise pipeline)
183+
run: bash scripts/ci_smoke_ai_denoise.sh ./GeminiWatermarkTool
184+
175185
- name: Upload artifact
176186
uses: actions/upload-artifact@v4
177187
with:
@@ -233,10 +243,14 @@ jobs:
233243
echo "--- size after UPX ---"
234244
ls -lh gwt-mini.exe
235245
236-
- name: Smoke test
246+
- name: Smoke test (--version)
237247
shell: bash
238248
run: ./gwt-mini.exe --version
239249

250+
- name: Smoke test (AI denoise pipeline)
251+
shell: bash
252+
run: bash scripts/ci_smoke_ai_denoise.sh ./gwt-mini.exe
253+
240254
- name: Upload artifact
241255
uses: actions/upload-artifact@v4
242256
with:
@@ -284,9 +298,12 @@ jobs:
284298
echo "--- size after UPX ---"
285299
ls -lh gwt-mini
286300
287-
- name: Smoke test
301+
- name: Smoke test (--version)
288302
run: ./gwt-mini --version
289303

304+
- name: Smoke test (AI denoise pipeline)
305+
run: bash scripts/ci_smoke_ai_denoise.sh ./gwt-mini
306+
290307
- name: Upload artifact
291308
uses: actions/upload-artifact@v4
292309
with:
@@ -346,9 +363,12 @@ jobs:
346363
echo "--- final size (no UPX on macOS) ---"
347364
ls -lh gwt-mini
348365
349-
- name: Smoke test
366+
- name: Smoke test (--version)
350367
run: ./gwt-mini --version
351368

369+
- name: Smoke test (AI denoise pipeline)
370+
run: bash scripts/ci_smoke_ai_denoise.sh ./gwt-mini
371+
352372
- name: Upload artifact
353373
uses: actions/upload-artifact@v4
354374
with:

scripts/ci_smoke_ai_denoise.sh

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#!/usr/bin/env bash
2+
# =============================================================================
3+
# CI smoke test: AI denoise initialisation + end-to-end run
4+
# =============================================================================
5+
# Exercises the NcnnDenoiser::initialize() path, including the Vulkan loader
6+
# probe and CPU fallback. Generates a synthetic 512x512 image, runs the binary
7+
# with --denoise ai through the full pipeline, and asserts that the process
8+
# exits successfully and produces an output file.
9+
#
10+
# This catches the class of issues seen in #30 / #31 -- where Vulkan loading
11+
# fails on macOS without MoltenVK and the AI denoise init path crashes with
12+
# SIGBUS instead of falling back to CPU. A plain `--version` smoke test does
13+
# not exercise this code path.
14+
#
15+
# Usage: ci_smoke_ai_denoise.sh <path-to-binary>
16+
# =============================================================================
17+
set -euo pipefail
18+
19+
BIN="${1:?usage: $0 <path-to-binary>}"
20+
21+
if [[ ! -x "$BIN" && ! -f "$BIN" ]]; then
22+
echo "ERROR: binary not found: $BIN" >&2
23+
exit 2
24+
fi
25+
26+
WORKDIR="$(mktemp -d)"
27+
trap 'rm -rf "$WORKDIR"' EXIT
28+
29+
INPUT="$WORKDIR/smoke_in.png"
30+
OUTPUT="$WORKDIR/smoke_out.png"
31+
32+
PY=python3
33+
command -v "$PY" >/dev/null 2>&1 || PY=python
34+
35+
echo "--- generating 512x512 synthetic test image (using $PY) ---"
36+
"$PY" - "$INPUT" <<'PY_EOF'
37+
import struct, sys, zlib
38+
39+
def png(w, h, color=(128, 128, 128)):
40+
def chunk(t, d):
41+
return struct.pack(">I", len(d)) + t + d + struct.pack(">I", zlib.crc32(t + d))
42+
sig = b"\x89PNG\r\n\x1a\n"
43+
ihdr = chunk(b"IHDR", struct.pack(">IIBBBBB", w, h, 8, 2, 0, 0, 0))
44+
row = b"\0" + bytes(color) * w
45+
raw = row * h
46+
idat = chunk(b"IDAT", zlib.compress(raw))
47+
iend = chunk(b"IEND", b"")
48+
return sig + ihdr + idat + iend
49+
50+
with open(sys.argv[1], "wb") as f:
51+
f.write(png(512, 512))
52+
PY_EOF
53+
ls -lh "$INPUT"
54+
55+
echo "--- running AI denoise pipeline ---"
56+
# --force skip detection (no real watermark on a uniform image)
57+
# --region br:auto process the bottom-right region at the Gemini default position
58+
# --denoise ai exercise the NCNN/Vulkan/CPU dispatch path
59+
"$BIN" --force --region br:auto --denoise ai -i "$INPUT" -o "$OUTPUT"
60+
RC=$?
61+
62+
if [[ $RC -ne 0 ]]; then
63+
echo "ERROR: binary exited with code $RC" >&2
64+
exit $RC
65+
fi
66+
67+
if [[ ! -f "$OUTPUT" ]]; then
68+
echo "ERROR: expected output file not produced: $OUTPUT" >&2
69+
exit 3
70+
fi
71+
72+
OUT_SIZE=$(wc -c <"$OUTPUT")
73+
if [[ $OUT_SIZE -lt 1000 ]]; then
74+
echo "ERROR: output file suspiciously small ($OUT_SIZE bytes)" >&2
75+
exit 4
76+
fi
77+
78+
echo "--- AI denoise smoke test PASSED (output=$OUT_SIZE bytes) ---"

src/core/ai_denoise.cpp

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@
2828
#include <algorithm>
2929
#include <chrono>
3030

31+
#if defined(_WIN32)
32+
# define WIN32_LEAN_AND_MEAN
33+
# include <windows.h>
34+
#else
35+
# include <dlfcn.h>
36+
#endif
37+
3138
// Embedded model data (defined in ai_denoise_model.cpp)
3239
namespace gwt::ai_model {
3340
const unsigned char* param_data();
@@ -36,6 +43,35 @@ namespace gwt::ai_model {
3643

3744
namespace gwt {
3845

46+
namespace {
47+
48+
// Probe whether the Vulkan loader is dynamically loadable on this system.
49+
// NCNN's internal simplevk dlopens the loader by these exact names; if the
50+
// probe fails the loader is absent and ncnn::create_gpu_instance() will
51+
// dispatch through uninitialised function pointers and crash (SIGBUS on
52+
// macOS Apple Silicon, observed in issues #30 / #31).
53+
bool vulkan_loader_present() {
54+
#if defined(_WIN32)
55+
HMODULE h = LoadLibraryA("vulkan-1.dll");
56+
if (h) { FreeLibrary(h); return true; }
57+
return false;
58+
#elif defined(__APPLE__)
59+
// macOS does not ship a Vulkan loader. The user must install MoltenVK
60+
// via the LunarG Vulkan SDK (or it must be bundled into the app).
61+
void* h = dlopen("libvulkan.1.dylib", RTLD_LAZY | RTLD_LOCAL);
62+
if (!h) h = dlopen("libvulkan.dylib", RTLD_LAZY | RTLD_LOCAL);
63+
if (h) { dlclose(h); return true; }
64+
return false;
65+
#else
66+
void* h = dlopen("libvulkan.so.1", RTLD_LAZY | RTLD_LOCAL);
67+
if (!h) h = dlopen("libvulkan.so", RTLD_LAZY | RTLD_LOCAL);
68+
if (h) { dlclose(h); return true; }
69+
return false;
70+
#endif
71+
}
72+
73+
} // namespace
74+
3975
// Blob indices from binary param (string names not available in binary format)
4076
// These correspond to model_core.id.h: BLOB_in0 = 0, BLOB_out0 = 20
4177
static constexpr int BLOB_INPUT = 0;
@@ -292,7 +328,22 @@ bool NcnnDenoiser::initialize() {
292328
return true;
293329
}
294330

295-
ncnn::create_gpu_instance();
331+
// Probe the Vulkan loader BEFORE calling into NCNN. On systems without
332+
// it (macOS without MoltenVK is the common case), NCNN's
333+
// create_gpu_instance() dispatches through uninitialised function
334+
// pointers and crashes -- see issues #30 and #31.
335+
const bool vulkan_available = vulkan_loader_present();
336+
if (vulkan_available) {
337+
ncnn::create_gpu_instance();
338+
} else {
339+
#if defined(__APPLE__)
340+
spdlog::info("NcnnDenoiser: Vulkan loader unavailable on macOS "
341+
"(install MoltenVK or the LunarG Vulkan SDK for GPU "
342+
"acceleration); falling back to CPU");
343+
#else
344+
spdlog::info("NcnnDenoiser: Vulkan loader unavailable, falling back to CPU");
345+
#endif
346+
}
296347

297348
// FP16 storage for efficiency, FP32 compute for accuracy
298349
m_impl->net.opt.use_fp16_packed = true;
@@ -301,7 +352,7 @@ bool NcnnDenoiser::initialize() {
301352
m_impl->net.opt.use_packing_layout = true;
302353

303354
// Try GPU first, fall back to CPU
304-
m_impl->gpu_enabled = m_impl->init_gpu();
355+
m_impl->gpu_enabled = vulkan_available && m_impl->init_gpu();
305356
if (!m_impl->gpu_enabled) {
306357
m_impl->init_cpu();
307358
}

0 commit comments

Comments
 (0)