Merge remote-tracking branch 'origin/master' into feature/gpu-3d-api

shai-almog · shai-almog · commit 2cd01f4cb558 · 2026-06-08T21:32:46.000+03:00
diff --git a/scripts/lib/cn1ss.sh b/scripts/lib/cn1ss.sh
@@ -297,6 +297,49 @@ print(sum(1 for r in results if isinstance(r, dict) and r.get("status") == "miss
 PY
 }
 
+# Count the authoritative *expected* screenshot set: the number of golden PNGs
+# stored in the reference directory. The reference dir is the manifest -- it is
+# the single source of truth for how many screenshots a suite must produce. We
+# deliberately do NOT derive the expected count from whatever the harness chose
+# to deliver, because a harness that silently drops a test (a hang, a crash, a
+# transport that never delivered the frame) simply omits it from its delivered
+# set, leaving no "missing_actual" record behind. Counting goldens instead means
+# a dropped test is always visible as an uncovered golden. Top-level *.png only;
+# the reference dirs are flat <testName>.png sets.
+cn1ss_count_reference() {
+  local dir="$1"
+  if [ -z "$dir" ] || [ ! -d "$dir" ]; then
+    echo 0
+    return
+  fi
+  local n
+  n=$(find "$dir" -maxdepth 1 -name '*.png' -type f 2>/dev/null | wc -l)
+  echo "${n//[^0-9]/}"
+}
+
+# Count the goldens that were actually rendered AND compared against their
+# reference, i.e. results with status "equal" or "different". A "missing_actual"
+# (listed but no image), a "missing_expected" (new image with no golden yet) and
+# any test that never appeared at all are all NOT covered. expected - covered is
+# therefore the number of expected screenshots that failed to materialise.
+cn1ss_count_covered() {
+  local json="$1"
+  if [ -z "$json" ] || [ ! -s "$json" ]; then
+    echo 0
+    return
+  fi
+  python3 - "$json" <<'PY'
+import json, sys
+try:
+    data = json.load(open(sys.argv[1]))
+    results = data.get("results", []) if isinstance(data, dict) else []
+except Exception:
+    print(0)
+    sys.exit(0)
+print(sum(1 for r in results if isinstance(r, dict) and r.get("status") in ("equal", "different")))
+PY
+}
+
 # Shared function to generate report, compare screenshots, and post PR comment
 cn1ss_process_and_report() {
   local platform_title="$1"
@@ -411,31 +454,52 @@ cn1ss_process_and_report() {
       return 15
     fi
 
-    # Missing-screenshot regression guard. Every expected test must produce its
-    # screenshot; a test that runs but emits nothing is recorded as status
-    # "missing_actual". When a test hangs or the rendering pipeline crashes
-    # partway (the Metal DialogTheme hang), every test from that point on
-    # becomes missing_actual and the suite silently drops from 122 captures to
-    # 107 - all still listed, just unproduced. We fail when the number of
-    # missing screenshots exceeds CN1SS_ALLOWED_MISSING (default 0: no missing
-    # screenshots tolerated). A pipeline with a known, steady-state gap raises
-    # its own tolerance (e.g. the iOS jobs set CN1SS_ALLOWED_MISSING=2 for
-    # OrientationLock + MutableImageReadback, which do not render on either iOS
-    # backend). Set CN1SS_SKIP_COUNT_CHECK=1 to bypass while intentionally
-    # seeding a brand new reference set. Enforced on every pipeline that opts
-    # into strict mode (CN1SS_FAIL_ON_MISMATCH=1).
-    if [ "${CN1SS_SKIP_COUNT_CHECK:-0}" != "1" ]; then
-      local missing_count allowed_missing
-      missing_count=$(cn1ss_count_missing "$compare_json_out")
-      missing_count="${missing_count//[^0-9]/}"; : "${missing_count:=999999}"
+    # ------------------------------------------------------------------------
+    # Screenshot count-regression guard. DO NOT WEAKEN OR REMOVE.
+    #
+    # Every golden in the reference directory must be re-produced and compared
+    # on every run. The reference set is the manifest: expected == number of
+    # golden PNGs (cn1ss_count_reference), covered == goldens that were rendered
+    # and compared (cn1ss_count_covered, i.e. status equal|different). When a
+    # test hangs, crashes or its frame never gets delivered, it drops out of the
+    # comparison entirely and `covered` falls below `expected` -- which is the
+    # ONLY reliable signal, because a dropped test leaves no per-test record
+    # behind to count (the older missing_actual-only check was blind to this and
+    # let suites silently shrink from 124 captures to 58 while still going green).
+    #
+    # We fail when expected - covered exceeds CN1SS_ALLOWED_MISSING (default 0:
+    # no uncovered goldens tolerated). A pipeline with a known, steady-state gap
+    # sets its own tolerance and documents why (e.g. the iOS jobs allow 2 for
+    # OrientationLock + MutableImageReadback, which do not render on the iOS
+    # backends). CN1SS_MIN_SCREENSHOTS can raise the floor above the on-disk
+    # golden count (useful before the reference set is fully seeded). The only
+    # bypass is CN1SS_SKIP_COUNT_CHECK=1, reserved for the deliberate, manual act
+    # of seeding a brand new reference set; it is loud in the log so it can never
+    # be mistaken for normal operation.
+    # ------------------------------------------------------------------------
+    if [ "${CN1SS_SKIP_COUNT_CHECK:-0}" = "1" ]; then
+      cn1ss_log "WARNING: CN1SS_SKIP_COUNT_CHECK=1 -- screenshot count-regression guard BYPASSED. This must only be used while intentionally seeding a new reference set."
+    else
+      local expected_count covered_count uncovered_count allowed_missing min_floor
+      expected_count=$(cn1ss_count_reference "$ref_dir")
+      expected_count="${expected_count//[^0-9]/}"; : "${expected_count:=0}"
+      min_floor="${CN1SS_MIN_SCREENSHOTS:-0}"
+      min_floor="${min_floor//[^0-9]/}"; : "${min_floor:=0}"
+      if [ "$min_floor" -gt "$expected_count" ]; then
+        expected_count="$min_floor"
+      fi
+      covered_count=$(cn1ss_count_covered "$compare_json_out")
+      covered_count="${covered_count//[^0-9]/}"; : "${covered_count:=0}"
       allowed_missing="${CN1SS_ALLOWED_MISSING:-0}"
       allowed_missing="${allowed_missing//[^0-9]/}"; : "${allowed_missing:=0}"
-      if [ "$missing_count" -gt "$allowed_missing" ]; then
-        cn1ss_log "FATAL: $missing_count screenshot(s) missing (no image produced) but only $allowed_missing tolerated (CN1SS_ALLOWED_MISSING)."
-        cn1ss_log "       A test failed to emit its screenshot - the suite likely hung or crashed before finishing. See the 'missing actual' entries above."
+      uncovered_count=$(( expected_count - covered_count ))
+      [ "$uncovered_count" -lt 0 ] && uncovered_count=0
+      if [ "$uncovered_count" -gt "$allowed_missing" ]; then
+        cn1ss_log "FATAL: $uncovered_count of $expected_count expected screenshot(s) were not produced and compared (only $covered_count covered); $allowed_missing tolerated (CN1SS_ALLOWED_MISSING)."
+        cn1ss_log "       A test failed to emit its screenshot, or the suite hung/crashed before finishing. The golden set under the comparison directory is the source of truth for how many screenshots must be produced."
         return 17
       fi
-      cn1ss_log "Missing-screenshot check passed: $missing_count missing <= $allowed_missing tolerated."
+      cn1ss_log "Screenshot count check passed: $covered_count of $expected_count goldens covered ($uncovered_count uncovered <= $allowed_missing tolerated)."
     fi
   fi
 
diff --git a/scripts/run-javascript-screenshot-tests.sh b/scripts/run-javascript-screenshot-tests.sh
@@ -102,9 +102,30 @@ else
   # the harness never ran end-to-end (markers absent). Distinguish the two via
   # the SUITE:FINISHED marker; there is no base64-over-console decode any more.
   if grep -q "CN1SS:SUITE:FINISHED" "$LOG_FILE"; then
-    rj_log "No screenshots delivered over WebSocket but reached SUITE:FINISHED; treating as a no-screenshot run"
     cp -f "$LOG_FILE" "$ARTIFACTS_DIR/javascript-device-runner.log" 2>/dev/null || true
-    exit 0
+    # "Zero delivered" is only a legitimate no-screenshot run when nothing is
+    # expected. If the reference set holds goldens, reaching SUITE:FINISHED with
+    # an empty delivery is the worst kind of count regression -- the whole suite
+    # dropped -- and must fail, never exit 0. This mirrors the reference-anchored
+    # floor in cn1ss_process_and_report (cn1ss.sh) for the path that never
+    # reaches it. CN1SS_SKIP_COUNT_CHECK=1 bypasses (reserved for seeding).
+    expected_goldens=$(cn1ss_count_reference "$REFERENCE_DIR")
+    expected_goldens="${expected_goldens//[^0-9]/}"; : "${expected_goldens:=0}"
+    allowed_missing="${CN1SS_ALLOWED_MISSING:-0}"
+    allowed_missing="${allowed_missing//[^0-9]/}"; : "${allowed_missing:=0}"
+    min_floor="${CN1SS_MIN_SCREENSHOTS:-0}"
+    min_floor="${min_floor//[^0-9]/}"; : "${min_floor:=0}"
+    if [ "$min_floor" -gt "$expected_goldens" ]; then expected_goldens="$min_floor"; fi
+    if [ "${CN1SS_SKIP_COUNT_CHECK:-0}" = "1" ]; then
+      rj_log "WARNING: CN1SS_SKIP_COUNT_CHECK=1 -- accepting zero-screenshot run despite $expected_goldens expected golden(s)."
+      exit 0
+    fi
+    if [ "$expected_goldens" -le "$allowed_missing" ]; then
+      rj_log "No screenshots delivered over WebSocket but reached SUITE:FINISHED; $expected_goldens expected (<= $allowed_missing tolerated) -- treating as a no-screenshot run"
+      exit 0
+    fi
+    rj_log "FATAL: reached SUITE:FINISHED but delivered 0 of $expected_goldens expected screenshot(s) ($allowed_missing tolerated) -- the suite dropped every screenshot (hang/crash)."
+    exit 17
   fi
   rj_log "STAGE:MARKERS_NOT_FOUND -> no WebSocket screenshots and no SUITE:FINISHED in browser log"
   rj_log "---- CN1SS lines from log ----"
diff --git a/vm/tests/pom.xml b/vm/tests/pom.xml
@@ -106,6 +106,15 @@
                 <version>3.2.5</version>
                 <configuration>
                     <useModulePath>false</useModulePath>
+                    <!-- Run test classes across parallel JVM forks. Parallelism MUST be
+                         process-level: the translator (Parser) keeps static mutable state,
+                         so in-JVM thread parallelism would corrupt it. Each test writes to
+                         unique createTempDirectory paths and the only shared path
+                         (target/benchmark-dependencies) is populated by maven-dependency-plugin
+                         before the test phase and only read here, so forks never collide.
+                         1C = one fork per available CPU core. -->
+                    <forkCount>1C</forkCount>
+                    <reuseForks>true</reuseForks>
                 </configuration>
             </plugin>
             <plugin>
@@ -117,6 +126,34 @@
                         <goals>
                             <goal>prepare-agent</goal>
                         </goals>
+                        <configuration>
+                            <!-- One exec file per surefire fork. surefire substitutes
+                                 ${surefire.forkNumber} in the agent argLine per forked JVM;
+                                 without this, parallel forks would clobber a single
+                                 jacoco.exec and the coverage report would be corrupt/empty. -->
+                            <destFile>${project.build.directory}/jacoco-fork-${surefire.forkNumber}.exec</destFile>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <!-- Merge the per-fork exec files back into one before the report.
+                             Declared before the report execution so it runs first in the
+                             test phase. -->
+                        <id>merge-forks</id>
+                        <phase>test</phase>
+                        <goals>
+                            <goal>merge</goal>
+                        </goals>
+                        <configuration>
+                            <fileSets>
+                                <fileSet>
+                                    <directory>${project.build.directory}</directory>
+                                    <includes>
+                                        <include>jacoco-fork-*.exec</include>
+                                    </includes>
+                                </fileSet>
+                            </fileSets>
+                            <destFile>${project.build.directory}/jacoco.exec</destFile>
+                        </configuration>
                     </execution>
                     <execution>
                         <id>report</id>
diff --git a/vm/tests/src/test/java/com/codename1/tools/translator/BytecodeInstructionIntegrationTest.java b/vm/tests/src/test/java/com/codename1/tools/translator/BytecodeInstructionIntegrationTest.java
@@ -39,14 +39,13 @@
 
 class BytecodeInstructionIntegrationTest {
 
+    // Shared @MethodSource for the integration suites (RuntimeSemantics, Target,
+    // Bytecode, CleanTarget, Smoke, NativeAudit, FileClass, ...). Uses the
+    // diagonal compiler set -- each bytecode target compiled by its matching JDK
+    // (8->8 .. 25->25) -- rather than the full (compiler x target) cross-product,
+    // which re-tested the same bytecode shapes. See CompilerHelper.getDiagonalCompilers.
     static Stream<CompilerHelper.CompilerConfig> provideCompilerConfigs() {
-        List<CompilerHelper.CompilerConfig> configs = new ArrayList<>();
-        configs.addAll(CompilerHelper.getAvailableCompilers("1.8"));
-        configs.addAll(CompilerHelper.getAvailableCompilers("11"));
-        configs.addAll(CompilerHelper.getAvailableCompilers("17"));
-        configs.addAll(CompilerHelper.getAvailableCompilers("21"));
-        configs.addAll(CompilerHelper.getAvailableCompilers("25"));
-        return configs.stream();
+        return CompilerHelper.getDiagonalCompilers().stream();
     }
 
     @ParameterizedTest
diff --git a/vm/tests/src/test/java/com/codename1/tools/translator/CompilerHelper.java b/vm/tests/src/test/java/com/codename1/tools/translator/CompilerHelper.java
@@ -190,6 +190,29 @@ public static List<CompilerConfig> getAvailableCompilers(String targetVersion) {
         return compilers;
     }
 
+    // Diagonal compiler set: each supported bytecode target compiled only by the
+    // JDK whose major version matches that target (8->8, 11->11, 17->17, 21->21,
+    // 25->25). The translator consumes bytecode, which is governed by the target
+    // level, so the full (compiler x target) cross-product mostly re-tests the
+    // same bytecode shapes. Restricting to the diagonal keeps every target level
+    // exercised while cutting the per-method parameter count from up to 15 to 5.
+    // A target whose matching JDK is not installed locally is simply skipped
+    // (CI installs all five). Pairs use {target, jdkMajor}; "1.8" maps to JDK 8.
+    public static List<CompilerConfig> getDiagonalCompilers() {
+        String[][] pairs = { {"1.8", "8"}, {"11", "11"}, {"17", "17"}, {"21", "21"}, {"25", "25"} };
+        List<CompilerConfig> out = new ArrayList<>();
+        for (String[] pair : pairs) {
+            int wantMajor = parseJavaMajor(pair[1]);
+            for (CompilerConfig config : getAvailableCompilers(pair[0])) {
+                if (getJdkMajor(config) == wantMajor) {
+                    out.add(config);
+                    break;
+                }
+            }
+        }
+        return out;
+    }
+
     private static boolean canCompile(String compilerVersion, String targetVersion) {
         int compilerMajor = parseJavaMajor(compilerVersion);
         int targetMajor = parseJavaMajor(targetVersion);
@@ -385,7 +408,35 @@ public static boolean compileAndRun(String code, String expectedOutput) throws E
         }
     }
 
+    // The compiled JavaAPI is identical for a given (jdkVersion, targetVersion),
+    // yet it was previously re-compiled (a ~259-source javac run) on every
+    // parameterized test invocation -- hundreds of times across the suite. Cache
+    // the compiled output per combo and copy it into each caller's outputDir
+    // instead. This removes the dominant repeated cost with zero change to what
+    // is tested. Within a surefire fork tests run sequentially, so the only
+    // contention is the compile-once guard below.
+    private static final java.util.Map<String, Path> JAVA_API_CACHE =
+            new java.util.concurrent.ConcurrentHashMap<>();
+
     public static void compileJavaAPI(Path outputDir, CompilerConfig config) throws IOException, InterruptedException {
+        Files.createDirectories(outputDir);
+        copyDirectory(getCachedJavaApi(config), outputDir);
+    }
+
+    private static synchronized Path getCachedJavaApi(CompilerConfig config) throws IOException, InterruptedException {
+        String key = config.jdkVersion + "->" + config.targetVersion;
+        Path cached = JAVA_API_CACHE.get(key);
+        if (cached != null && Files.isDirectory(cached)) {
+            return cached;
+        }
+        Path cacheDir = Files.createTempDirectory(
+                "java-api-cache-" + config.jdkVersion + "-" + config.targetVersion.replaceAll("[^A-Za-z0-9]", "_") + "-");
+        compileJavaApiInto(cacheDir, config);
+        JAVA_API_CACHE.put(key, cacheDir);
+        return cacheDir;
+    }
+
+    private static void compileJavaApiInto(Path outputDir, CompilerConfig config) throws IOException, InterruptedException {
         Files.createDirectories(outputDir);
         Path javaApiRoot = Paths.get("..", "JavaAPI", "src").normalize().toAbsolutePath();
         List<String> sources = new ArrayList<>();