Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions c_pal_ll/interfaces/tests/sync_int/sync_int.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,54 @@ TEST_FUNCTION(wait_on_address_64_returns_immediately)
ASSERT_ARE_EQUAL(WAIT_ON_ADDRESS_RESULT, WAIT_ON_ADDRESS_OK, return_val, "wait_on_address_64 should have returned ok");
}

/* Tests_SRS_SYNC_43_001: [ wait_on_address shall atomically compare *address and *compare_address.] */
/* Tests_SRS_SYNC_43_002: [ wait_on_address shall immediately return true if *address is not equal to *compare_address.] */
//
// Regression test: wait_on_address_64 must compare the FULL 64-bit value at
// *address against compare_value, not just the lower 32 bits.
//
// On Linux, wait_on_address_64 is implemented using the futex syscall, which
// fundamentally compares only 32 bits of the value at the address (the lower
// 32 bits on little-endian systems). When *address differs from compare_value
// only in the upper 32 bits, the kernel sees the lower 32 bits as equal and
// incorrectly sleeps until timeout instead of returning immediately.
//
// This bug also breaks InterlockedHL_WaitForNotValue64 (and the other 64-bit
// InterlockedHL waiters) in race conditions, because those functions read the
// full 64-bit value via interlocked_add_64, then call wait_on_address_64. If
// another thread changes the upper 32 bits between the read and the syscall,
// the kernel's atomic check-before-sleep fails to detect it, leading to a lost
// wakeup that blocks until timeout.
//
// Concrete example from the multiplexer integration tests: a SUBSTREAM_ID is a
// 64-bit value where the lower 32 bits are an index. The first substream has
// index = 0, so the lower 32 bits of its SUBSTREAM_ID match the initial value
// (0) used as compare_value, even though the upper 32 bits are non-zero.
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make this comment smaller.

TEST_FUNCTION(wait_on_address_64_returns_immediately_when_only_upper_32_bits_differ)
{
//arrange
volatile_atomic int64_t var;
// 0x100000000 has upper 32 bits = 1 and lower 32 bits = 0.
int64_t differing_value = 0x100000000LL;
(void)interlocked_exchange_64(&var, differing_value);
int64_t compare_value = 0; // Lower 32 bits match var's lower 32 bits but full 64 bits differ.
int32_t timeout = 5000;
double tolerance_factor = 0.1;

//act
double start_time = timer_global_get_elapsed_ms();
WAIT_ON_ADDRESS_RESULT return_val = wait_on_address_64(&var, compare_value, timeout);
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove this timer. The result is enough

double time_elapsed = timer_global_get_elapsed_ms() - start_time;

//assert
ASSERT_ARE_EQUAL(WAIT_ON_ADDRESS_RESULT, WAIT_ON_ADDRESS_OK, return_val,
"wait_on_address_64 must return WAIT_ON_ADDRESS_OK when *var (0x%" PRIx64 ") != compare_value (0x%" PRIx64 "), but it slept for %lf ms",
(uint64_t)differing_value, (uint64_t)compare_value, time_elapsed);
ASSERT_IS_TRUE(time_elapsed < timeout * tolerance_factor,
"wait_on_address_64 took too long: %lf ms (max expected %lf ms). It likely slept due to comparing only the lower 32 bits of the value.",
time_elapsed, timeout * tolerance_factor);
}

/* Tests_SRS_SYNC_43_001: [ wait_on_address shall atomically compare *address and *compare_address.] */
/* Tests_SRS_SYNC_43_002: [ wait_on_address shall immediately return true if *address is not equal to *compare_address.] */
/* Tests_SRS_SYNC_43_009: [ If timeout_ms milliseconds elapse, wait_on_address shall return false. ] */
Expand Down
66 changes: 58 additions & 8 deletions c_pal_ll/linux/src/sync_linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,25 @@
#include "c_pal/interlocked.h" // for volatile_atomic
#include "c_pal/sync.h"

// Linux 6.7+ futex2 wait/wake syscalls. The 64-bit variants of wait_on_address /
// wake_by_address require these because the legacy SYS_futex syscall fundamentally
// compares only 32 bits of the value at the address — it cannot honor the
// wait_on_address_64 contract when only the upper 32 bits of the int64_t differ
// from compare_value. The futex2 syscalls accept a size flag (FUTEX2_SIZE_U64) so
// the kernel performs a true 64-bit atomic check-before-sleep.
#ifndef SYS_futex_wake
#define SYS_futex_wake 454
#endif
#ifndef SYS_futex_wait
#define SYS_futex_wait 455
#endif
#ifndef FUTEX2_SIZE_U64
#define FUTEX2_SIZE_U64 0x03
#endif
#ifndef FUTEX2_PRIVATE
#define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG /* 128 */
#endif
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are these needed? where are they coming from?


MU_DEFINE_ENUM_STRINGS(WAIT_ON_ADDRESS_RESULT, WAIT_ON_ADDRESS_RESULT_VALUES)

WAIT_ON_ADDRESS_RESULT wait_on_address(volatile_atomic int32_t* address, int32_t compare_value, uint32_t timeout_ms)
Expand Down Expand Up @@ -68,11 +87,34 @@ WAIT_ON_ADDRESS_RESULT wait_on_address_64(volatile_atomic int64_t* address, int6
{
WAIT_ON_ADDRESS_RESULT result;

/* Codes_SRS_SYNC_LINUX_05_001: [ wait_on_address_64 shall initialize a timespec struct with .tv_nsec equal to timeout_ms* 10^6. ] */
struct timespec timeout = {timeout_ms / 1000, (timeout_ms % 1000) * 1e6 };
/* Codes_SRS_SYNC_LINUX_05_001: [ wait_on_address_64 shall compute an absolute CLOCK_MONOTONIC deadline equal to now + timeout_ms milliseconds, or pass NULL when timeout_ms is UINT32_MAX. ] */
struct timespec deadline;
struct timespec* deadline_p;
if (timeout_ms == UINT32_MAX)
{
deadline_p = NULL;
}
else
{
clock_gettime(CLOCK_MONOTONIC, &deadline);
deadline.tv_sec += timeout_ms / 1000;
deadline.tv_nsec += (long)((timeout_ms % 1000) * 1000000L);
if (deadline.tv_nsec >= 1000000000L)
{
deadline.tv_sec += 1;
deadline.tv_nsec -= 1000000000L;
}
deadline_p = &deadline;
}

/* Codes_SRS_SYNC_LINUX_05_002: [ wait_on_address_64 shall call syscall to wait on value at address to change to a value different than the one provided in compare_value. ] */
int syscall_result = syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, compare_value, &timeout, NULL, 0);
/* Codes_SRS_SYNC_LINUX_05_002: [ wait_on_address_64 shall call syscall(SYS_futex_wait) with FUTEX2_SIZE_U64 | FUTEX2_PRIVATE and a CLOCK_MONOTONIC absolute deadline, performing a true 64-bit atomic check-before-sleep. ] */
Copy link
Copy Markdown
Member Author

@parth21999 parth21999 May 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_05_002

All specs that are changed should be retagged with my devid 43. Changes are needed in devdoc, code and tests.

make sure repo_validation passes

long syscall_result = syscall(SYS_futex_wait,
address,
(uint64_t)compare_value,
~(uint64_t)0,
(unsigned int)(FUTEX2_SIZE_U64 | FUTEX2_PRIVATE),
deadline_p,
CLOCK_MONOTONIC);
if (syscall_result == 0)
{
/* Codes_SRS_SYNC_LINUX_05_003: [ If the value at address changes to a value different from compare_value then wait_on_address_64 shall return WAIT_ON_ADDRESS_OK. ] */
Expand Down Expand Up @@ -112,8 +154,12 @@ void wake_by_address_all(volatile_atomic int32_t* address)

void wake_by_address_all_64(volatile_atomic int64_t* address)
{
/* Codes_SRS_SYNC_LINUX_05_007: [ wake_by_address_all_64 shall call syscall to wake all listeners listening on address. ] */
syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, INT_MAX, NULL, NULL, 0);
/* Codes_SRS_SYNC_LINUX_05_007: [ wake_by_address_all_64 shall call syscall(SYS_futex_wake) with FUTEX2_SIZE_U64 | FUTEX2_PRIVATE to wake all listeners on the 64-bit address. ] */
syscall(SYS_futex_wake,
address,
~(uint64_t)0,
INT_MAX,
(unsigned int)(FUTEX2_SIZE_U64 | FUTEX2_PRIVATE));
}

void wake_by_address_single(volatile_atomic int32_t* address)
Expand All @@ -125,6 +171,10 @@ void wake_by_address_single(volatile_atomic int32_t* address)

void wake_by_address_single_64(volatile_atomic int64_t* address)
{
/* Codes_SRS_SYNC_LINUX_05_008: [ wake_by_address_single_64 shall call syscall to wake any single listener listening on address. ] */
syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, 1, NULL, NULL, 0);
/* Codes_SRS_SYNC_LINUX_05_008: [ wake_by_address_single_64 shall call syscall(SYS_futex_wake) with FUTEX2_SIZE_U64 | FUTEX2_PRIVATE to wake one listener on the 64-bit address. ] */
syscall(SYS_futex_wake,
address,
~(uint64_t)0,
1,
(unsigned int)(FUTEX2_SIZE_U64 | FUTEX2_PRIVATE));
}
88 changes: 88 additions & 0 deletions common/tests/interlocked_hl_int/interlocked_hl_int.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@

#include "c_pal/interlocked_hl.h"
#include "c_pal/interlocked.h"
#include "c_pal/sync.h"
#include "c_pal/timer.h"

TEST_DEFINE_ENUM_TYPE(THREADAPI_RESULT, THREADAPI_RESULT_VALUES);
TEST_DEFINE_ENUM_TYPE(INTERLOCKED_HL_RESULT, INTERLOCKED_HL_RESULT_VALUES);
TEST_DEFINE_ENUM_TYPE(WAIT_ON_ADDRESS_RESULT, WAIT_ON_ADDRESS_RESULT_VALUES);

BEGIN_TEST_SUITE(TEST_SUITE_NAME_FROM_CMAKE)

Expand Down Expand Up @@ -366,6 +369,91 @@ TEST_FUNCTION(interlocked_hl_wait_for_not_value_64)
// cleanup
}

/*
Tests:
InterlockedHL_WaitForNotValue64

Regression test for a Linux-only bug where InterlockedHL_WaitForNotValue64 can
have a lost-wakeup if a concurrent writer changes only the upper 32 bits of the
64-bit value. The function reads the full 64-bit value via interlocked_add_64,
then if it equals value_to_wait, calls wait_on_address_64. On Linux, the futex
syscall used by wait_on_address_64 only compares the lower 32 bits, so when the
upper 32 bits change between the read and the syscall (and the lower 32 bits
remain equal to compare_value's lower 32 bits), the kernel sleeps instead of
returning immediately.

The multiplexer integration tests hit this race because SUBSTREAM_IDs are
64-bit values whose lower 32 bits are an index. The first substream has
index = 0, so the lower 32 bits of its SUBSTREAM_ID match the initial sentinel
value 0, even though the upper 32 bits are non-zero.

This test deterministically reproduces the race by:
1. Reading the value (mimicking InterlockedHL_WaitForNotValue64's first step)
and confirming it equals value_to_wait.
2. Changing the upper 32 bits of the value (simulating a writer that wins the
race).
3. Calling wait_on_address_64 with the originally-read value as compare_value
(mimicking InterlockedHL_WaitForNotValue64's second step).
4. Asserting that wait_on_address_64 returns OK promptly per the
wait_on_address contract (SRS_SYNC_43_002).
*/
TEST_FUNCTION(interlocked_hl_wait_for_not_value_64_with_only_upper_32_bits_change)
{
// arrange
volatile_atomic int64_t value;
(void)interlocked_exchange_64(&value, 0);
int64_t value_to_wait = 0;
int32_t timeout_ms = 5000;
double tolerance_factor = 0.1;

// Step 1 of InterlockedHL_WaitForNotValue64: read value with interlocked_add_64.
int64_t current_value = interlocked_add_64(&value, 0);
ASSERT_ARE_EQUAL(int64_t, value_to_wait, current_value, "test setup error: current_value must equal value_to_wait so that wait_on_address_64 is reached");

// Simulate the race window: a concurrent writer changes ONLY the upper 32 bits
// of the value before the (about-to-execute) wait_on_address_64 syscall.
// The lower 32 bits remain equal to current_value's lower 32 bits.
int64_t new_value = 0x100000000LL;
(void)interlocked_exchange_64(&value, new_value);

// act
// Step 2 of InterlockedHL_WaitForNotValue64: call wait_on_address_64 with the
// original current_value as compare_value. Per SRS_SYNC_43_002 this must return
// immediately because *value (0x100000000) != current_value (0).
double start_time = timer_global_get_elapsed_ms();
WAIT_ON_ADDRESS_RESULT wait_result = wait_on_address_64(&value, current_value, timeout_ms);
double time_elapsed = timer_global_get_elapsed_ms() - start_time;

// Step 3 of InterlockedHL_WaitForNotValue64: re-read and translate to INTERLOCKED_HL_RESULT.
INTERLOCKED_HL_RESULT hl_result;
int64_t reread_value = interlocked_add_64(&value, 0);
if (reread_value != value_to_wait)
{
hl_result = INTERLOCKED_HL_OK;
}
else if (wait_result == WAIT_ON_ADDRESS_TIMEOUT)
{
hl_result = INTERLOCKED_HL_TIMEOUT;
}
else
{
hl_result = INTERLOCKED_HL_ERROR;
}

// assert
// The final hl_result will be INTERLOCKED_HL_OK even when the bug is present
// (because the value was changed, so the post-wait re-read sees it). The bug
// shows up in the time_elapsed check: the function should return promptly,
// not after the full 5 second timeout.
ASSERT_ARE_EQUAL(WAIT_ON_ADDRESS_RESULT, WAIT_ON_ADDRESS_OK, wait_result,
"wait_on_address_64 must return WAIT_ON_ADDRESS_OK when *value (0x%" PRIx64 ") != compare_value (0x%" PRIx64 "). It returned after %lf ms.",
(uint64_t)new_value, (uint64_t)current_value, time_elapsed);
ASSERT_IS_TRUE(time_elapsed < timeout_ms * tolerance_factor,
"InterlockedHL_WaitForNotValue64 (simulated) took too long: %lf ms (max expected %lf ms). The lost-wakeup bug likely caused wait_on_address_64 to sleep until timeout.",
time_elapsed, timeout_ms * tolerance_factor);
ASSERT_ARE_EQUAL(INTERLOCKED_HL_RESULT, INTERLOCKED_HL_OK, hl_result);
}

/*
Tests:
InterlockedHL_WaitForValue
Expand Down
Loading