Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
9519e1c
feat(agent): plant bait as a FIFO with cached content (#100)
LiorFink00 Jun 24, 2026
0390be1
fix(agent): unset probe temp, clean cache on mkfifo failure, tidy tes…
LiorFink00 Jun 24, 2026
9a05183
feat(agent): FIFO serving loop + supervisor; reads fire callbacks (#100)
LiorFink00 Jun 24, 2026
c02f1c6
feat(agent): attribute FIFO reader via lsof inode scan (#100)
LiorFink00 Jun 24, 2026
7430a8a
feat(agent): remove FIFOs on exit + sweep stale pipes on startup (#100)
LiorFink00 Jun 24, 2026
cab073f
test(agent): make startup-sweep test gate the feature via orphan FIFO…
LiorFink00 Jun 24, 2026
145a9dd
feat(agent): remove fs_usage, fix atime fallback (#28), FIFO-aware ve…
LiorFink00 Jun 24, 2026
c9dc855
fix(agent): restart FIFO watcher after verify re-plants a bait (#100)
LiorFink00 Jun 24, 2026
648b8f5
test(agent): two agents on one host both detect via FIFO (#94/#95) (#…
LiorFink00 Jun 24, 2026
226f37f
fix(agent): sweep stale FIFOs only after acquiring singleton lock (#100)
LiorFink00 Jun 24, 2026
4148d5f
docs(agent): replace stale fs_usage references with FIFO sensor (#100)
LiorFink00 Jun 24, 2026
cd31f23
merge: reconcile FIFO sensor with main's inotify sensor (#100)
LiorFink00 Jun 25, 2026
b282394
agent: fix bare `return` in probe_fifo_mode aborting the agent on Linux
LiorFink00 Jun 25, 2026
c59aafe
Merge remote-tracking branch 'origin/main' into fifo-sync-main
LiorFink00 Jun 25, 2026
6267d45
ci: run the test matrix on macOS too so the FIFO sensor is covered
LiorFink00 Jun 25, 2026
0de43c8
feat(agent): re-armable atime sensor + `--sensor` selector (#28, #100)
LiorFink00 Jun 26, 2026
1e029ea
agent: arm_atime must not create the bait file (fixes re-plant cap on…
LiorFink00 Jun 27, 2026
b1eac6f
Merge main into feat/fifo-sensor
LiorFink00 Jun 28, 2026
bf6b1b6
Merge feat/fifo-sensor into feat/atime-rearm-sensor
LiorFink00 Jun 28, 2026
baa79c7
fix(agent): address Roee's #160 review — FIFO/atime mode safety
LiorFink00 Jun 28, 2026
c870d81
fix(agent): address Roee's #123 review — FIFO sensor resilience
LiorFink00 Jun 28, 2026
74e33c4
Merge feat/fifo-sensor into feat/atime-rearm-sensor (Roee #123 fixes)
LiorFink00 Jun 28, 2026
1c4e30e
test(agent): de-flake test_atime_sensor_is_rearmable
LiorFink00 Jun 28, 2026
5cfeee5
Merge remote-tracking branch 'origin/main' into recon-123
LiorFink00 Jun 30, 2026
5298a1f
chore: merge main + ruff-clean test_agent_fifo.py (#177)
LiorFink00 Jun 30, 2026
961b9f9
chore: reconcile #160 with main + ruff-clean (#177, #178)
LiorFink00 Jun 30, 2026
4e58e60
fix(agent): close Roee's #123 re-review residuals (F2, F3)
LiorFink00 Jul 1, 2026
d404376
Merge remote-tracking branch 'origin/feat/fifo-sensor' into p160
LiorFink00 Jul 1, 2026
ae7a647
CI: stable aggregate `test` check over the OS matrix
LiorFink00 Jul 1, 2026
54b2037
Merge remote-tracking branch 'origin/feat/fifo-sensor' into prop-160
LiorFink00 Jul 1, 2026
d9944c7
Merge remote-tracking branch 'origin/main' into merge-main-123
LiorFink00 Jul 1, 2026
b4402d1
agent: record FIFO bait before mkfifo to close a clean-exit cleanup race
LiorFink00 Jul 1, 2026
a930008
Merge remote-tracking branch 'origin/feat/fifo-sensor' into reprop-160
LiorFink00 Jul 1, 2026
a8fce5b
Merge remote-tracking branch 'origin/main' into reconcile-160
LiorFink00 Jul 1, 2026
584b9d0
Per-deployment sensor: run FIFO + atime baits together (#100, dual-pl…
LiorFink00 Jul 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 127 additions & 29 deletions agent/thumper_agent.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,39 @@ LAST_RESYNC=0
FIFO_MODE=0
BAITCACHE=""
REPLANTED=0
probe_fifo_mode() {
FIFO_MODE=0
[ "$(platform)" = "darwin" ] || return 0 # FIFO sensor is macOS-only; Linux uses inotify
command -v mkfifo >/dev/null 2>&1 || return 0
mkfifo_works() { # 0 if mkfifo actually works in the state dir (any platform: FIFO works on Linux/CI too)
command -v mkfifo >/dev/null 2>&1 || return 1
_probe="$(dirname "$STATE_FILE")/.fifoprobe.$$"
if mkfifo "$_probe" 2>/dev/null; then rm -f "$_probe"; FIFO_MODE=1; fi
unset _probe
if mkfifo "$_probe" 2>/dev/null; then rm -f "$_probe"; unset _probe; return 0; fi
unset _probe; return 1
}
probe_fifo_mode() { # AUTO policy: default to FIFO on macOS only (Linux defaults to inotify)
FIFO_MODE=0
[ "$(platform)" = "darwin" ] || return 0
mkfifo_works && FIFO_MODE=1
}
# effective_sensor <i>: which sensor governs THIS bait. Precedence:
# 1. an explicit operator --sensor (fifo|atime) - an intentional override that
# must win over the server (#164 F2): the operator opted out of/into FIFOs;
# 2. the deployment's OWN sensor when the server sent one (dual-plant pairs);
# 3. the platform default.
# Lets one agent run a FIFO bait and an atime bait side by side.
effective_sensor() {
case "$SENSOR" in fifo|atime) printf '%s' "$SENSOR"; return 0 ;; esac
eval "_es=\${dep_sensor_$1:-}"
[ -n "$_es" ] && { printf '%s' "$_es"; return 0; }
if [ "$FIFO_MODE" = 1 ]; then printf 'fifo'
elif [ "$(platform)" = linux ] && command -v inotifywait >/dev/null 2>&1; then printf 'inotify'
else printf 'atime'; fi
}
has_explicit_sensors() { # 0 if any deployment carries its own sensor (server is sending pairs)
_i=1
while [ "$_i" -le "$DEP_COUNT" ]; do
eval "_s=\${dep_sensor_$_i:-}"
[ -n "$_s" ] && return 0
_i=$((_i + 1))
done
return 1
}
cache_path() { printf '%s/%s' "$BAITCACHE" "$1"; } # cache_path <deployment-id>
TAB=$(printf '\t')
Expand Down Expand Up @@ -255,14 +281,15 @@ pull_deployments() {
oldifs=$IFS
IFS="$TAB"
# `printf | while` would subshell the counters away; feed via a here-doc.
while IFS="$TAB" read -r id path secret content_url callback_url; do
while IFS="$TAB" read -r id path secret content_url callback_url sensor; do
[ -n "$id" ] || continue
DEP_COUNT=$((DEP_COUNT + 1))
eval "dep_id_$DEP_COUNT=\$id"
eval "dep_path_$DEP_COUNT=\$(expand_path \"\$path\")"
eval "dep_secret_$DEP_COUNT=\$secret"
eval "dep_content_$DEP_COUNT=\$content_url"
eval "dep_callback_$DEP_COUNT=\$callback_url"
eval "dep_sensor_$DEP_COUNT=\${sensor:-}" # per-deployment sensor (fifo|atime|inotify); empty = use global default
eval "dep_last_$DEP_COUNT=0"
done <<EOF
$body
Expand Down Expand Up @@ -341,7 +368,7 @@ plant() { # plant <i>
return 1
fi

if [ "$FIFO_MODE" = 1 ]; then
if [ "$(effective_sensor "$1")" = fifo ]; then
mkdir -p "$BAITCACHE"
chmod 700 "$BAITCACHE" 2>/dev/null || true
cf=$(cache_path "$id")
Expand All @@ -366,6 +393,11 @@ plant() { # plant <i>
return 0
fi

# Planting a REGULAR-file bait: if a leftover FIFO sits at this path (e.g. a
# prior FIFO run, swept-miss), remove it first - `curl -o` into a no-reader
# FIFO blocks forever (Roee #160 F1). Only our own bait reaches here (the
# overwrite guard above already refused a path we didn't plant).
[ -p "$path" ] && rm -f "$path"
if ! curl -fsS "$url" -H "Authorization: Bearer $AGENT_TOKEN" -o "$path"; then
rm -f "$path" # remove the partial/empty file curl may have left
err "failed to fetch bait for $id"
Expand Down Expand Up @@ -548,28 +580,51 @@ watch_inotify() {
watch_atime
}

watch_atime() {
log "mkfifo unavailable - atime poll every ${POLL}s (best-effort; may miss reads, no process/user)"
i=1
while [ "$i" -le "$DEP_COUNT" ]; do
# atime sensor helpers. Detection-only (no process/user) but works on a NORMAL
# regular-file bait under all constraints (no kdebug, no mount, no privilege),
# so it's the primary layer that covers the FIFO sensor's blind spots
# (statSync-guarded / mmap / scan-only readers). See #28, #100.
ATIME_ARM_STAMP=200001010000 # `touch -t` stamp: 2000-01-01 00:00 - atime far in the past
arm_atime() { # arm_atime <path>: set atime to the past so the next read bumps it (relatime/APFS)
# -c: never CREATE the file. Arming a missing bait would otherwise leave an
# empty file behind, making verify_planted think a failed-plant dep is planted
# and silently skip re-planting it (#28/#100).
touch -a -c -t "$ATIME_ARM_STAMP" "$1" 2>/dev/null || true
}
read_atime() { # read_atime <path>: portable access-time epoch (GNU %X first, then BSD %a - never %a on Linux, that's free blocks: #28)
stat -c %X "$1" 2>/dev/null || stat -f %a "$1" 2>/dev/null || echo 0
}
all_indices() { # "1 2 ... DEP_COUNT" - every deployment
_ai=""; _i=1
while [ "$_i" -le "$DEP_COUNT" ]; do _ai="$_ai $_i"; _i=$((_i + 1)); done
printf '%s' "$_ai"
}
# atime_poll "<idx idx ...>": arm + re-armable-poll only the given deployments.
# The index list lets the mixed watcher poll just the atime baits while FIFO
# baits are served separately; watch_atime() polls all (homogeneous + fallback).
atime_poll() {
log "atime poll every ${POLL}s on regular-file bait(s) (re-armable; detection only - no process/user)"
# shellcheck disable=SC2086 # $1 is a space-separated index list; splitting is intended
for i in $1; do
eval "p=\$dep_path_$i"
eval "atime_$i=\$(stat -c %X \"\$p\" 2>/dev/null || stat -f %a \"\$p\" 2>/dev/null || echo 0)"
i=$((i + 1))
arm_atime "$p" # arm so relatime bumps atime on a read
eval "atime_$i=\$(read_atime \"\$p\")"
done
while true; do
sleep "$POLL"
i=1
while [ "$i" -le "$DEP_COUNT" ]; do
# shellcheck disable=SC2086
for i in $1; do
eval "p=\$dep_path_$i prev=\$atime_$i"
cur=$(stat -c %X "$p" 2>/dev/null || stat -f %a "$p" 2>/dev/null || echo 0)
cur=$(read_atime "$p")
if [ "$cur" != "0" ] && [ "$cur" -gt "$prev" ] 2>/dev/null; then
eval "atime_$i=\$cur"
fire "$i" "atime-change" "" "" "" "$p"
arm_atime "$p" # RE-ARM so the NEXT read is detectable too
eval "atime_$i=\$(read_atime \"\$p\")"
fi
i=$((i + 1))
done
done
}
watch_atime() { atime_poll "$(all_indices)"; } # poll every bait (homogeneous atime mode + degradation fallback)

# ── live sync (re-pull + reconcile) ───────────────────────────────────────────
# A running agent re-pulls its deployment set every --sync-interval and applies
Expand Down Expand Up @@ -657,9 +712,34 @@ watch_fifo() { # supervisor: keep one serve_fifo alive per bait; restart any th
done
}

# Dual-plant: each deployment runs under its OWN sensor. FIFO baits (canonical,
# definitive pid) are served individually; atime/inotify baits (companion,
# detection) are atime-polled as a group. Used whenever the server sends pairs.
watch_mixed() {
log "watching $DEP_COUNT bait(s) with per-deployment sensors"
_atidx=""; i=1
while [ "$i" -le "$DEP_COUNT" ]; do
if [ "$(effective_sensor "$i")" = fifo ]; then
serve_fifo "$i" &
else
_atidx="$_atidx $i" # atime/inotify/unknown -> atime poll (detection)
fi
i=$((i + 1))
done
[ -n "$_atidx" ] && atime_poll "$_atidx" &
wait
[ -e "${WATCH_STOP_FLAG:-/nonexistent}" ] && return 0
err "mixed watcher exited unexpectedly - degrading to atime poll"
atime_poll "$(all_indices)"
}

start_watcher() { # launch the right sensor in the background; set WATCH_PID
rm -f "${WATCH_STOP_FLAG:-}" 2>/dev/null || true # this start is not a stop
if [ "$FIFO_MODE" = 1 ]; then
if has_explicit_sensors; then
watch_mixed & # per-deployment sensors (dual-plant pairs)
elif [ "$SENSOR" = atime ]; then
watch_atime & # forced atime sensor (any platform)
elif [ "$FIFO_MODE" = 1 ]; then
watch_fifo &
elif [ "$(platform)" = "linux" ] && command -v inotifywait >/dev/null 2>&1; then
watch_inotify &
Expand Down Expand Up @@ -743,7 +823,7 @@ verify_planted() {
# and never re-plant through it (curl -o would write the target); report
# failed so the lost coverage is visible.
report_plant "$vid" failed
elif [ "$FIFO_MODE" = 1 ] && [ -e "$p" ] && ! [ -p "$p" ]; then
elif [ "$(effective_sensor "$i")" = fifo ] && [ -e "$p" ] && ! [ -p "$p" ]; then
# A regular file where our FIFO should be = tampering/replacement.
# Recover like the "missing" branch below: plant() removes the impostor
# (our own path) and re-creates the FIFO, then REPLANTED restarts the
Expand Down Expand Up @@ -794,8 +874,14 @@ run() {
BAITCACHE="$(dirname "$STATE_FILE")/bait"
WATCH_STOP_FLAG="$(dirname "$STATE_FILE")/watcher.stopping"
mkdir -p "$(dirname "$STATE_FILE")"
probe_fifo_mode
[ "$FIFO_MODE" = 1 ] && log "sensor: FIFO bait (macOS)"
case "$SENSOR" in
atime) FIFO_MODE=0; log "sensor: atime poll (regular-file bait, re-armable)" ;;
fifo) # operator forced FIFO: honor it on ANY platform (mkfifo works on Linux/CI), or fail loudly
if mkfifo_works; then FIFO_MODE=1; log "sensor: FIFO bait (forced)"
else err "--sensor fifo requested but mkfifo is unavailable here"; exit 1; fi ;;
*) probe_fifo_mode
[ "$FIFO_MODE" = 1 ] && log "sensor: FIFO bait (macOS)" ;;
esac
MAIN_PID=$$ # so the backgrounded heartbeat loop can signal us to self-destruct
# Enforce one-agent-per-install before any work; a duplicate exits here (the
# EXIT trap below is NOT yet set, so it can't disturb the live holder's lock).
Expand All @@ -804,8 +890,11 @@ run() {
trap 'release_singleton' EXIT
# Only the lock holder sweeps stale FIFOs from a prior hard-kill; a duplicate
# invocation exits at acquire_singleton above and must never touch the live
# agent's shared manifest/FIFOs (MDM re-push safety).
[ "$FIFO_MODE" = 1 ] && remove_fifos
# agent's shared manifest/FIFOs (MDM re-push safety). Sweep regardless of the
# CURRENT sensor: a prior FIFO run's leftover pipes must be cleared even when
# this run is atime mode, else plant() would curl into a no-reader FIFO and
# hang forever (only manifest paths that ARE FIFOs are removed, so it's safe).
remove_fifos
resolve_target_user

# Abort BEFORE enrolling if any bait path is occupied, so a refused install
Expand Down Expand Up @@ -889,8 +978,13 @@ run() {
fi
REPLANTED=0
verify_planted # every cycle, even when the set did not change
if [ "$FIFO_MODE" = 1 ] && [ "$REPLANTED" = 1 ]; then
log "re-planted bait - restarting FIFO watcher to serve it"
if [ "$REPLANTED" = 1 ]; then
# A re-plant gives the bait a new inode/timestamp, which every sensor's
# per-bait state depends on: a FIFO needs re-serving, an atime bait
# needs re-arming (else its stale year-2000 baseline fires a ghost
# alert), an inotify watch needs re-pointing at the new inode. Restart
# regardless of platform/mode (FIFO_MODE is 0 on Linux even for FIFOs).
log "re-planted bait - restarting watcher to re-arm/re-serve it"
stop_watcher
start_watcher
fi
Expand All @@ -905,7 +999,9 @@ usage: thumper_agent.sh run --server URL --enroll-token TOKEN [options]
--version print the agent version and exit
--tripwire ID tripwire to apply (repeatable)
--state-file PATH state file (default: $DEFAULT_STATE)
--poll SECONDS atime fallback poll interval (default: 5)
--poll SECONDS atime poll interval (default: 5)
--sensor MODE read sensor: auto|fifo|atime (default: auto). atime plants a
regular-file bait + re-armable atime tripwire (no pid)
--heartbeat SECONDS heartbeat interval; 0 to disable (default: 60)
--sync-interval SECS re-pull deployments + reconcile every SECS (default: 300, 0 disables)
--once enroll + plant, then exit
Expand All @@ -925,7 +1021,7 @@ usage() {
exit "$code"
}

SERVER=""; ENROLL_TOKEN=""; TRIPWIRES=""; STATE_FILE=""; POLL=5; HEARTBEAT=60; SYNC_INTERVAL=300; ONCE=0; SIMULATE=0; FORCE=0; EPHEMERAL=0
SERVER=""; ENROLL_TOKEN=""; TRIPWIRES=""; STATE_FILE=""; POLL=5; HEARTBEAT=60; SYNC_INTERVAL=300; ONCE=0; SIMULATE=0; FORCE=0; EPHEMERAL=0; SENSOR=auto

case "${1:-}" in
--help|-h) usage 0 ;;
Expand All @@ -945,6 +1041,7 @@ while [ $# -gt 0 ]; do
--poll) is_uint "${2:-}" || { err "--poll must be a non-negative integer"; exit 2; }; POLL=$2; shift 2 ;;
--heartbeat) is_uint "${2:-}" || { err "--heartbeat must be a non-negative integer"; exit 2; }; HEARTBEAT=$2; shift 2 ;;
--sync-interval) is_uint "${2:-}" || { err "--sync-interval must be a non-negative integer"; exit 2; }; SYNC_INTERVAL=$2; shift 2 ;;
--sensor) SENSOR=$2; shift 2 ;;
--once) ONCE=1; shift ;;
--simulate) SIMULATE=1; shift ;;
--force) FORCE=1; shift ;;
Expand All @@ -953,6 +1050,7 @@ while [ $# -gt 0 ]; do
esac
done
[ -n "$SERVER" ] && [ -n "$ENROLL_TOKEN" ] || usage
case "$SENSOR" in auto|fifo|atime) ;; *) err "invalid --sensor: $SENSOR (want auto|fifo|atime)"; usage ;; esac

for tool in curl openssl; do
command -v "$tool" >/dev/null 2>&1 || { err "$tool is required"; exit 1; }
Expand Down
Loading
Loading