mikepenz · mikepenz · Dec 30, 2025 · Dec 30, 2025
diff --git a/__tests__/testParser.test.ts b/__tests__/testParser.test.ts
@@ -1413,6 +1413,129 @@ action.surefire.report.email.InvalidEmailAddressException: Invalid email address
       }
     ])
   })
+
+  it('flaky test with classname and file: multiple failures then success should pass with retries', async () => {
+    // Test that flaky tests are correctly identified using classname and file as part of the key
+    // The test_foo test appears 3 times: failure, error, then success
+    // It should be marked as success with 2 retries
+    const testResult = await parseFile(
+      'test_results/flaky_retries/flaky_with_classname_file.xml',
+      '',
+      true,   // includePassed
+      true,   // annotateNotice
+      true    // checkRetries
+    )
+    expect(testResult).toBeDefined()
+    const {totalCount, skippedCount, failedCount, passedCount, retriedCount, globalAnnotations} = testResult!!
+
+    // Should have 3 unique tests (test_foo appears once due to deduplication, plus test_bar.test_foo and test_baz)
+    expect(totalCount).toBe(3)
+    expect(skippedCount).toBe(0)
+    expect(failedCount).toBe(0)
+    expect(passedCount).toBe(3)
+    expect(retriedCount).toBe(2)  // 2 retries for the flaky test (3 occurrences - 1)
+
+    // Find the flaky test annotation
+    const flakyTest = globalAnnotations.find(a =>
+      a.title.includes('test_foo.TestFoo') || a.path.includes('test_foo.py')
+    )
+    expect(flakyTest).toBeDefined()
+    expect(flakyTest!.status).toBe('success')
+    expect(flakyTest!.retries).toBe(2)
+    expect(flakyTest!.annotation_level).toBe('notice')
+
+    // Verify that test_bar.test_foo is NOT merged with test_foo.test_foo (different classname/file)
+    const testBarFoo = globalAnnotations.find(a => a.path.includes('test_bar.py'))
+    expect(testBarFoo).toBeDefined()
+    expect(testBarFoo!.retries).toBe(0)  // Not retried, it's a separate test
+  })
+
+  it('flaky test with all failures should still be marked as failure with retries', async () => {
+    // Test that when all executions of a flaky test fail, it remains a failure but tracks retries
+    const testResult = await parseFile(
+      'test_results/flaky_retries/flaky_all_failures.xml',
+      '',
+      false,   // includePassed
+      false,   // annotateNotice
+      true     // checkRetries
+    )
+    expect(testResult).toBeDefined()
+    const {totalCount, skippedCount, failedCount, passedCount, retriedCount, globalAnnotations} = testResult!!
+
+    // Should have 1 unique test after deduplication
+    expect(totalCount).toBe(1)
+    expect(skippedCount).toBe(0)
+    expect(failedCount).toBe(1)
+    expect(passedCount).toBe(0)
+    expect(retriedCount).toBe(2)  // 2 retries (3 occurrences - 1)
+
+    // Should still have a failure annotation
+    expect(globalAnnotations).toHaveLength(1)
+    expect(globalAnnotations[0].status).toBe('failure')
+    expect(globalAnnotations[0].retries).toBe(2)
+    expect(globalAnnotations[0].annotation_level).toBe('failure')
+  })
+
+  it('flaky test with success first should still pass with retries tracked', async () => {
+    // Test that even if success comes first and failures come later,
+    // the test is still marked as success with proper retry count
+    const testResult = await parseFile(
+      'test_results/flaky_retries/flaky_success_first.xml',
+      '',
+      true,   // includePassed
+      true,   // annotateNotice
+      true    // checkRetries
+    )
+    expect(testResult).toBeDefined()
+    const {totalCount, skippedCount, failedCount, passedCount, retriedCount, globalAnnotations} = testResult!!
+
+    // Should have 1 unique test after deduplication
+    expect(totalCount).toBe(1)
+    expect(skippedCount).toBe(0)
+    expect(failedCount).toBe(0)
+    expect(passedCount).toBe(1)
+    expect(retriedCount).toBe(2)  // 2 retries (3 occurrences - 1)
+
+    // Should be marked as success
+    expect(globalAnnotations).toHaveLength(1)
+    expect(globalAnnotations[0].status).toBe('success')
+    expect(globalAnnotations[0].retries).toBe(2)
+    expect(globalAnnotations[0].annotation_level).toBe('notice')
+  })
+
+  it('same test name but different classname/file should NOT be merged', async () => {
+    // Verify that tests with the same name but different classname or file are treated as separate tests
+    const testResult = await parseFile(
+      'test_results/flaky_retries/flaky_with_classname_file.xml',
+      '',
+      true,   // includePassed
+      true,   // annotateNotice
+      true    // checkRetries
+    )
+    expect(testResult).toBeDefined()
+    const {totalCount, globalAnnotations} = testResult!!
+
+    // Should have 3 unique tests:
+    // 1. test_foo from test_foo.TestFoo (flaky, merged)
+    // 2. test_foo from test_bar.TestBar (separate)
+    // 3. test_baz from test_baz.TestBaz
+    expect(totalCount).toBe(3)
+    expect(globalAnnotations).toHaveLength(3)
+
+    // Verify we have two different test_foo entries (one from each classname)
+    const testFooAnnotations = globalAnnotations.filter(a => a.title.includes('test_foo'))
+    expect(testFooAnnotations).toHaveLength(2)
+
+    // The one from TestFoo should have retries, the one from TestBar should not
+    const testFooFromTestFoo = testFooAnnotations.find(a => a.path.includes('test_foo.py'))
+    const testFooFromTestBar = testFooAnnotations.find(a => a.path.includes('test_bar.py'))
+
+    expect(testFooFromTestFoo).toBeDefined()
+    expect(testFooFromTestFoo!.retries).toBe(2)
+
+    expect(testFooFromTestBar).toBeDefined()
+    expect(testFooFromTestBar!.retries).toBe(0)
+  })
 })
 
 describe('parseTestReports', () => {

diff --git a/dist/index.js b/dist/index.js
diff --git a/dist/index.js.map b/dist/index.js.map
diff --git a/src/testParser.ts b/src/testParser.ts
@@ -582,25 +582,41 @@ async function parseTestCases(
   let time = 0
   if (checkRetries) {
     // identify duplicates in case of flaky tests, and remove them
+    // Use a compound key including name, classname (if available), and file (if available)
+    // to prevent accidental duplicate matches across different test classes/files
     const testcaseMap = new Map<string, any>()
     for (const testcase of testcases) {
-      const key = testcase._attributes.name
+      const name = testcase._attributes.name
+      const classname = testcase._attributes.classname || ''
+      const file = testcase._attributes.file || ''
+      const key = `${name}|${classname}|${file}`
+
       if (testcaseMap.get(key) !== undefined) {
-        // testcase with matching name exists
+        // testcase with matching key exists - this is a flaky test
         const failed = testcase.failure || testcase.error
         const previous = testcaseMap.get(key)
         const previousFailed = previous.failure || previous.error
-        if (failed && !previousFailed) {
-          // previous is a success, drop failure
-          previous.retries = (previous.retries || 0) + 1
-          retriedCount += 1
-          core.debug(`Drop flaky test failure for (1): ${key}`)
-        } else if (!failed && previousFailed) {
-          // previous failed, new one not, replace
-          testcase.retries = (previous.retries || 0) + 1
+
+        // Increment retry count for each additional occurrence
+        const currentRetries = (previous.retries || 0) + 1
+
+        if (!failed) {
+          // Current execution is successful - use this as the final result
+          // The test is flaky but ultimately passed
+          testcase.retries = currentRetries
           testcaseMap.set(key, testcase)
           retriedCount += 1
-          core.debug(`Drop flaky test failure for (2): ${JSON.stringify(testcase)}`)
+          core.debug(`Flaky test succeeded after retry for: ${key}`)
+        } else if (!previousFailed) {
+          // Previous was successful, current failed - keep the successful one
+          previous.retries = currentRetries
+          retriedCount += 1
+          core.debug(`Flaky test: keeping success, dropping failure for: ${key}`)
+        } else {
+          // Both failed - keep tracking retries but keep the previous
+          previous.retries = currentRetries
+          retriedCount += 1
+          core.debug(`Flaky test: multiple failures for: ${key}`)
         }
       } else {
         testcaseMap.set(key, testcase)

diff --git a/test_results/flaky_retries/flaky_all_failures.xml b/test_results/flaky_retries/flaky_all_failures.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuite errors="1" failures="2" name="FlakyAllFailures" skips="0" time="3.0" tests="3">
+    <!-- Flaky test: fails multiple times and never passes -->
+    <testcase classname="test_always_fails.TestAlwaysFails"
+        file="/home/runner/work/tests/test_always_fails.py"
+        line="10" name="test_always_fails" time="1.0">
+        <failure message="first failure">First attempt failed</failure>
+    </testcase>
+    <testcase classname="test_always_fails.TestAlwaysFails"
+        file="/home/runner/work/tests/test_always_fails.py"
+        line="10" name="test_always_fails" time="1.0">
+        <failure message="second failure">Second attempt failed</failure>
+    </testcase>
+    <testcase classname="test_always_fails.TestAlwaysFails"
+        file="/home/runner/work/tests/test_always_fails.py"
+        line="10" name="test_always_fails" time="1.0">
+        <error message="third error">Third attempt errored</error>
+    </testcase>
+</testsuite>
diff --git a/test_results/flaky_retries/flaky_success_first.xml b/test_results/flaky_retries/flaky_success_first.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuite errors="1" failures="1" name="FlakySuccessFirst" skips="0" time="3.0" tests="3">
+    <!-- Flaky test: passes first, then fails (unusual order but should still work) -->
+    <testcase classname="test_order.TestOrder"
+        file="/home/runner/work/tests/test_order.py"
+        line="15" name="test_success_first" time="1.0" />
+    <testcase classname="test_order.TestOrder"
+        file="/home/runner/work/tests/test_order.py"
+        line="15" name="test_success_first" time="1.0">
+        <failure message="later failure">This failure came after success</failure>
+    </testcase>
+    <testcase classname="test_order.TestOrder"
+        file="/home/runner/work/tests/test_order.py"
+        line="15" name="test_success_first" time="1.0">
+        <error message="later error">This error came after success</error>
+    </testcase>
+</testsuite>
diff --git a/test_results/flaky_retries/flaky_with_classname_file.xml b/test_results/flaky_retries/flaky_with_classname_file.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<testsuite errors="2" failures="1" name="FlakyTestSuite" skips="0" time="5.526" tests="5">
+    <!-- Flaky test: fails with failure first, then error, then passes -->
+    <testcase classname="test_foo.TestFoo"
+        file="/home/runner/work/foo/bar/test_foo.py"
+        line="42" name="test_foo" time="1.842">
+        <failure message="it failed">'NoneType' object is not iterable</failure>
+    </testcase>
+    <testcase classname="test_foo.TestFoo"
+        file="/home/runner/work/foo/bar/test_foo.py"
+        line="42" name="test_foo" time="1.842">
+        <error message="it broke">oh no</error>
+    </testcase>
+    <testcase classname="test_foo.TestFoo"
+        file="/home/runner/work/foo/bar/test_foo.py"
+        line="42" name="test_foo" time="1.842" />
+
+    <!-- Regular passing test with same name but different classname - should NOT be merged -->
+    <testcase classname="test_bar.TestBar"
+        file="/home/runner/work/foo/bar/test_bar.py"
+        line="10" name="test_foo" time="0.5" />
+
+    <!-- Another regular test -->
+    <testcase classname="test_baz.TestBaz"
+        file="/home/runner/work/foo/bar/test_baz.py"
+        line="20" name="test_baz" time="0.5" />
+</testsuite>