Skip to content

Commit c2765a0

Browse files
authored
Tag journal lines with host=, downgrade feeder_unclaimed to info (#97)
* feat(journal): tag log lines with host=, downgrade feeder_unclaimed to info Adds a host= field (host[:port], scheme/userinfo/path/query stripped) at the end of every structured line emitted by airplanes-diagnostics and apl-feed-config-sync so operators can tell at a glance which backend a request hit. Downgrades the per-tick feeder_unclaimed 403 to level=info with status=unclaimed — that's the expected pre-claim state, not a warning. * fix(common): harden _set_website_host against path-@ and log injection Strip path/query/fragment before userinfo so a URL with an @ inside the path (https://airplanes.live/api/user@example) doesn't get mis-tagged as host=example. Validate the resulting host against a hostname charset and fall back to host=invalid on anything else — without that, a WEBSITE_URL set to 'https://good level=error' would smuggle extra fields into the structured log line.
1 parent fa278f4 commit c2765a0

6 files changed

Lines changed: 173 additions & 5 deletions

File tree

scripts/airplanes-diagnostics.sh

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ ROOT="${AIRPLANES_DIAGNOSTICS_ROOT:-/}"
6767

6868
log() {
6969
local level="$1"; shift
70-
printf '%s level=%s %s\n' "$SCRIPT_NAME" "$level" "$*" >&2
70+
printf '%s level=%s %s host=%s\n' "$SCRIPT_NAME" "$level" "$*" "$WEBSITE_HOST" >&2
7171
}
7272

7373
# parse_report_status RAW
@@ -876,7 +876,18 @@ main() {
876876
4*)
877877
local body_err
878878
body_err="$(parse_field_from "$response_file" '.error')"
879-
log warn "status=client_error http=$status mode=$mode error=${body_err:-unknown}"
879+
# feeder_unclaimed is the expected pre-claim state on a fresh
880+
# feeder until someone redeems the claim secret on the website.
881+
# Logging it at level=warn every ~10 min causes alarm fatigue
882+
# and hides legitimate warnings under journalctl priority
883+
# filtering. Promote to a named status so operators can grep
884+
# status=unclaimed; keep error=feeder_unclaimed for any consumer
885+
# already counting that field.
886+
if [[ "${body_err:-}" == "feeder_unclaimed" ]]; then
887+
log info "status=unclaimed http=$status mode=$mode error=feeder_unclaimed"
888+
else
889+
log warn "status=client_error http=$status mode=$mode error=${body_err:-unknown}"
890+
fi
880891
;;
881892
5*)
882893
log warn "status=server_error http=$status mode=$mode"

scripts/apl-feed/common.sh

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,41 @@ _resolve_website_url() {
3333
printf 'https://airplanes.live'
3434
}
3535

36-
# shellcheck disable=SC2034 # WEBSITE_URL/MAX_RETRY_TIME/DRY_RUN/FORCE are read by sibling modules sourced from apl-feed.sh
36+
# Derive WEBSITE_HOST (host[:port], no scheme/userinfo/path/query/fragment)
37+
# from WEBSITE_URL. Used as a `host=` tag in structured journal lines emitted
38+
# by airplanes-diagnostics.sh and apl-feed/config.sh so operators can tell at
39+
# a glance which backend a request hit (production vs FEED_HOST-overridden
40+
# staging). Re-derived from parse_common_option's --website-url branch so CLI
41+
# overrides propagate before any log call.
42+
#
43+
# Order is path-then-userinfo, not the other way around: a path with an
44+
# embedded `@` (e.g. https://airplanes.live/api/user@example) would otherwise
45+
# get mis-tagged as host=example. We also greedy-strip userinfo (##*@) so a
46+
# pathological double-@ in userinfo lands on the canonical separator.
47+
#
48+
# The result is then validated against a hostname charset (alnum, hyphen,
49+
# dot, colon). If the source URL contains anything outside that set (CRLF,
50+
# spaces, `=` from a deliberate ` level=error` smuggle), WEBSITE_HOST is
51+
# set to `invalid` rather than risking a journal-line injection.
52+
_set_website_host() {
53+
local s="${WEBSITE_URL#*://}"
54+
s="${s%%/*}" # strip path
55+
s="${s%%\?*}" # strip query
56+
s="${s%%#*}" # strip fragment
57+
s="${s##*@}" # strip userinfo (greedy: pick last @)
58+
if [[ "$s" =~ ^[A-Za-z0-9.:-]+$ ]]; then
59+
WEBSITE_HOST="$s"
60+
else
61+
WEBSITE_HOST="invalid"
62+
fi
63+
}
64+
65+
# shellcheck disable=SC2034 # WEBSITE_URL/WEBSITE_HOST/MAX_RETRY_TIME/DRY_RUN/FORCE are read by sibling modules sourced from apl-feed.sh
3766
WEBSITE_URL="$(_resolve_website_url)"
3867
# shellcheck disable=SC2034
68+
WEBSITE_HOST=""
69+
_set_website_host
70+
# shellcheck disable=SC2034
3971
MAX_RETRY_TIME="${APL_FEED_MAX_RETRY_TIME:-60}"
4072
# shellcheck disable=SC2034
4173
DRY_RUN=0
@@ -489,6 +521,7 @@ parse_common_option() {
489521
[[ $# -ge 2 ]] || die "--website-url requires URL"
490522
# shellcheck disable=SC2034 # consumed by http.sh/claim.sh after parse
491523
WEBSITE_URL="$2"
524+
_set_website_host
492525
return 2
493526
;;
494527
--max-retry-time)

scripts/apl-feed/config.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ CONFIG_SYNC_LAST_SUCCESS_FILE="${AIRPLANES_CONFIG_SYNC_LAST_SUCCESS:-/var/lib/ai
6262
_config_sync_log() {
6363
local level="$1"
6464
shift
65-
printf 'apl-feed-config-sync level=%s %s\n' "$level" "$*" >&2
65+
printf 'apl-feed-config-sync level=%s %s host=%s\n' "$level" "$*" "$WEBSITE_HOST" >&2
6666
}
6767

6868
# True if feed.env has a (non-comment) line matching `^[[:space:]]*KEY=`.

test/test_airplanes_diagnostics.bats

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ run_script() {
174174
AIRPLANES_DIAGNOSTICS_ROOT="$ROOT_DIR" \
175175
AIRPLANES_DIAGNOSTICS_LAST_SUCCESS="$LAST_SUCCESS" \
176176
AIRPLANES_DIAGNOSTICS_INTENT_ACK_FILE="$INTENT_ACK" \
177-
APL_FEED_WEBSITE_URL='http://127.0.0.1:0' \
177+
APL_FEED_WEBSITE_URL="${APL_FEED_WEBSITE_URL:-http://127.0.0.1:0}" \
178178
COMMAND_LOG="$COMMAND_LOG" \
179179
BODY_LOG="$BODY_LOG" \
180180
HEADER_LOG="$HEADER_LOG" \
@@ -726,9 +726,37 @@ SH
726726
[ "$status" -eq 0 ]
727727
[[ "$output" == *"status=client_error"* ]]
728728
[[ "$output" == *"http=400"* ]]
729+
[[ "$output" == *"level=warn"* ]]
729730
[ ! -f "$LAST_SUCCESS" ]
730731
}
731732

733+
@test "HTTP 403 feeder_unclaimed downgrades to level=info status=unclaimed" {
734+
CURL_STATUS=403 CURL_RESPONSE='{"error":"feeder_unclaimed"}' run_script
735+
[ "$status" -eq 0 ]
736+
[[ "$output" == *"level=info"* ]]
737+
[[ "$output" == *"status=unclaimed"* ]]
738+
[[ "$output" == *"http=403"* ]]
739+
[[ "$output" == *"error=feeder_unclaimed"* ]]
740+
[[ "$output" != *"status=client_error"* ]]
741+
[[ "$output" != *"level=warn"* ]]
742+
[ ! -f "$LAST_SUCCESS" ]
743+
}
744+
745+
@test "HTTP 403 with non-feeder_unclaimed body stays at level=warn status=client_error" {
746+
CURL_STATUS=403 CURL_RESPONSE='{"error":"signature_invalid"}' run_script
747+
[ "$status" -eq 0 ]
748+
[[ "$output" == *"level=warn"* ]]
749+
[[ "$output" == *"status=client_error"* ]]
750+
[[ "$output" == *"error=signature_invalid"* ]]
751+
[[ "$output" != *"status=unclaimed"* ]]
752+
}
753+
754+
@test "log line carries host= tag from WEBSITE_URL" {
755+
APL_FEED_WEBSITE_URL='http://feed.airplanes.test:8080/v1' run_script
756+
[ "$status" -eq 0 ]
757+
[[ "$output" == *"host=feed.airplanes.test:8080"* ]]
758+
}
759+
732760
@test "HTTP 5xx logs server_error and exits 0" {
733761
CURL_STATUS=503 run_script
734762
[ "$status" -eq 0 ]

test/test_apl_feed_common.bats

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -749,3 +749,88 @@ STUB
749749
[ "$status" -eq 0 ]
750750
[ "$output" = "https://airplanes.live" ]
751751
}
752+
753+
# --- _set_website_host ---
754+
#
755+
# WEBSITE_HOST is the bare host[:port] tag used by structured journal lines
756+
# in airplanes-diagnostics and apl-feed-config-sync. The parser must strip
757+
# scheme, userinfo, path, query, and fragment, while keeping any explicit
758+
# port — non-default ports are operationally useful in the journal.
759+
760+
@test "_set_website_host: strips scheme only on plain host" {
761+
WEBSITE_URL="https://airplanes.live"
762+
_set_website_host
763+
[ "$WEBSITE_HOST" = "airplanes.live" ]
764+
}
765+
766+
@test "_set_website_host: preserves host:port" {
767+
WEBSITE_URL="http://localhost:8080"
768+
_set_website_host
769+
[ "$WEBSITE_HOST" = "localhost:8080" ]
770+
}
771+
772+
@test "_set_website_host: strips path" {
773+
WEBSITE_URL="https://airplanes.live/api/foo"
774+
_set_website_host
775+
[ "$WEBSITE_HOST" = "airplanes.live" ]
776+
}
777+
778+
@test "_set_website_host: strips query string" {
779+
WEBSITE_URL="https://airplanes.live?x=1"
780+
_set_website_host
781+
[ "$WEBSITE_HOST" = "airplanes.live" ]
782+
}
783+
784+
@test "_set_website_host: strips fragment" {
785+
WEBSITE_URL="https://airplanes.live#section"
786+
_set_website_host
787+
[ "$WEBSITE_HOST" = "airplanes.live" ]
788+
}
789+
790+
@test "_set_website_host: strips userinfo (credential leak prevention)" {
791+
WEBSITE_URL="https://user:pass@airplanes.live"
792+
_set_website_host
793+
[ "$WEBSITE_HOST" = "airplanes.live" ]
794+
}
795+
796+
@test "_set_website_host: userinfo + port + path + query combined" {
797+
WEBSITE_URL="https://u:p@host.example:9000/path?q=1"
798+
_set_website_host
799+
[ "$WEBSITE_HOST" = "host.example:9000" ]
800+
}
801+
802+
@test "_set_website_host: re-derives via parse_common_option --website-url" {
803+
WEBSITE_URL="https://airplanes.live"
804+
_set_website_host
805+
[ "$WEBSITE_HOST" = "airplanes.live" ]
806+
# Drive the real parser so the production code path is covered.
807+
# Return code 2 is parse_common_option's "consumed 2 args" signal, not
808+
# an error — || true keeps `set -e` from aborting the bats run.
809+
parse_common_option --website-url "http://staging.example:8443/v2" || true
810+
[ "$WEBSITE_URL" = "http://staging.example:8443/v2" ]
811+
[ "$WEBSITE_HOST" = "staging.example:8443" ]
812+
}
813+
814+
@test "_set_website_host: @ in path does not capture suffix as host" {
815+
WEBSITE_URL="https://airplanes.live/api/user@example"
816+
_set_website_host
817+
[ "$WEBSITE_HOST" = "airplanes.live" ]
818+
}
819+
820+
@test "_set_website_host: log-injection attempt becomes invalid" {
821+
WEBSITE_URL="https://good.example level=error injected=1"
822+
_set_website_host
823+
[ "$WEBSITE_HOST" = "invalid" ]
824+
}
825+
826+
@test "_set_website_host: embedded newline becomes invalid" {
827+
WEBSITE_URL=$'https://good.example\nfake-line'
828+
_set_website_host
829+
[ "$WEBSITE_HOST" = "invalid" ]
830+
}
831+
832+
@test "_set_website_host: empty / default produces invalid (defensive)" {
833+
WEBSITE_URL=""
834+
_set_website_host
835+
[ "$WEBSITE_HOST" = "invalid" ]
836+
}

test/test_apl_feed_config_sync.bats

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,17 @@ EOF
254254
fi
255255
}
256256

257+
@test "log line carries host= tag from WEBSITE_URL" {
258+
seed_feed_env
259+
set_canned 200 '{"schema_version":1,"server_time":"2026-05-14T12:00:00Z","owned":false}'
260+
# apl-feed.sh runs in a subshell — must export so common.sh picks it up.
261+
export APL_FEED_WEBSITE_URL='http://feed.airplanes.test:8080/v1'
262+
run_sync
263+
unset APL_FEED_WEBSITE_URL
264+
[ "$SYNC_RC" -eq 0 ]
265+
[[ "$SYNC_ERR" == *"host=feed.airplanes.test:8080"* ]]
266+
}
267+
257268
@test "200 applied response runs apl_feed_apply with server tuples" {
258269
seed_feed_env
259270
seed_feed_meta MLAT_USER "2026-05-10T00:00:00Z"

0 commit comments

Comments
 (0)