@@ -423,12 +423,14 @@ write_files:
423423 REMOTE_USER=$(echo "$SECRET_DATA" | jq -r '. user // ""')
424424 REMOTE_PASSWORD=$(echo "$SECRET_DATA" | jq -r '. password // ""')
425425 LOCAL_REMOTE_WRITE_URL=$(echo "$SECRET_DATA" | jq -r '. localRemoteWriteUrl // ""')
426+ MANAGED_ENDPOINT_URL=$(echo "$SECRET_DATA" | jq -r '. managedEndpointUrl // ""')
427+ MANAGED_BEARER_TOKEN=$(echo "$SECRET_DATA" | jq -r '. managedBearerToken // ""')
426428 RUNNER_ID=$(echo "$SECRET_DATA" | jq -r '. runnerId // ""')
427429 ORGANIZATION_ID=$(echo "$SECRET_DATA" | jq -r '. organizationId // ""')
428430 # Read allowlist as a bash array of prefixes
429431 mapfile -t ALLOWLIST_PREFIXES < <(echo "$SECRET_DATA" | jq -r '. allowlistPrefixes // [] | . []')
430432
431- log "Metrics enabled: $ENABLE_METRICS, URL: $REMOTE_URL, local remote write: $LOCAL_REMOTE_WRITE_URL, runner_id: $RUNNER_ID, organization_id: $ORGANIZATION_ID, allowlist prefixes: $${#ALLOWLIST_PREFIXES[@]}"
433+ log "Metrics enabled: $ENABLE_METRICS, URL: $REMOTE_URL, managed endpoint: $MANAGED_ENDPOINT_URL, local remote write: $LOCAL_REMOTE_WRITE_URL, runner_id: $RUNNER_ID, organization_id: $ORGANIZATION_ID, allowlist prefixes: $${#ALLOWLIST_PREFIXES[@]}"
432434
433435 # Generate final configuration using template substitution
434436 sed -e "s/{{INSTANCE_NAME}}/$INSTANCE_NAME/g" \
@@ -464,33 +466,57 @@ write_files:
464466 fi
465467 fi
466468
467- # Add local remote write target for managed metrics pipeline
468- if [ -n "$LOCAL_REMOTE_WRITE_URL" ]; then
469+ # Build allowlist regex once — reused by both managed and local targets.
470+ ALLOWLIST_REGEX=""
471+ if [ $${#ALLOWLIST_PREFIXES[@]} -gt 0 ]; then
472+ ALLOWLIST_REGEX="("
473+ for i in "$${!ALLOWLIST_PREFIXES[@]}"; do
474+ if [ "$i" -gt 0 ]; then ALLOWLIST_REGEX+="|"; fi
475+ ALLOWLIST_REGEX+="$${ALLOWLIST_PREFIXES[$i]}.*"
476+ done
477+ ALLOWLIST_REGEX+=")"
478+ fi
479+
480+ # Helper: append write_relabel_configs for the allowlist.
481+ append_allowlist_relabel() {
482+ if [ -n "$ALLOWLIST_REGEX" ]; then
483+ echo " write_relabel_configs:" >> /tmp/prometheus. yml . new
484+ echo " - source_labels: [__name__]" >> /tmp/prometheus. yml . new
485+ echo " regex: '$ALLOWLIST_REGEX'" >> /tmp/prometheus. yml . new
486+ echo " action: keep" >> /tmp/prometheus. yml . new
487+ fi
488+ }
489+
490+ # Add managed endpoint direct push (preferred over local receiver).
491+ # Uses a scoped JWT to push metrics directly to the management plane.
492+ if [ -n "$MANAGED_ENDPOINT_URL" ] && [ -n "$MANAGED_BEARER_TOKEN" ]; then
493+ log "Adding managed endpoint remote write target: $MANAGED_ENDPOINT_URL"
494+ if [ "$HAS_REMOTE_WRITE" = "false" ]; then
495+ echo "" >> /tmp/prometheus. yml . new
496+ echo "remote_write:" >> /tmp/prometheus. yml . new
497+ HAS_REMOTE_WRITE=true
498+ fi
499+ echo " - url: $MANAGED_ENDPOINT_URL" >> /tmp/prometheus. yml . new
500+ echo " authorization:" >> /tmp/prometheus. yml . new
501+ echo " type: Bearer" >> /tmp/prometheus. yml . new
502+ echo " credentials: $MANAGED_BEARER_TOKEN" >> /tmp/prometheus. yml . new
503+ append_allowlist_relabel
504+
505+ # Audit: send the same filtered payload to the local audit receiver
506+ # which persists each write to GCS for customer audit trails.
507+ log "Adding metrics audit receiver remote write target"
508+ echo " - url: http://127. 0 . 0 . 1 :9095/write" >> /tmp/prometheus. yml . new
509+ append_allowlist_relabel
510+ elif [ -n "$LOCAL_REMOTE_WRITE_URL" ]; then
511+ # Fallback: local remote write target for managed metrics pipeline.
469512 log "Adding local remote write target: $LOCAL_REMOTE_WRITE_URL"
470513 if [ "$HAS_REMOTE_WRITE" = "false" ]; then
471514 echo "" >> /tmp/prometheus. yml . new
472515 echo "remote_write:" >> /tmp/prometheus. yml . new
473516 HAS_REMOTE_WRITE=true
474517 fi
475518 echo " - url: $LOCAL_REMOTE_WRITE_URL" >> /tmp/prometheus. yml . new
476-
477- # Add write_relabel_configs to filter by allowlist prefixes.
478- # Only metrics matching these prefixes are forwarded to the
479- # managed metrics pipeline. Uses a single regex with alternation.
480- if [ $${#ALLOWLIST_PREFIXES[@]} -gt 0 ]; then
481- # Build regex: (prefix1.*|prefix2.*|... )
482- REGEX="("
483- for i in "$${!ALLOWLIST_PREFIXES[@]}"; do
484- if [ "$i" -gt 0 ]; then REGEX+="|"; fi
485- REGEX+="$${ALLOWLIST_PREFIXES[$i]}.*"
486- done
487- REGEX+=")"
488-
489- echo " write_relabel_configs:" >> /tmp/prometheus. yml . new
490- echo " - source_labels: [__name__]" >> /tmp/prometheus. yml . new
491- echo " regex: '$REGEX'" >> /tmp/prometheus. yml . new
492- echo " action: keep" >> /tmp/prometheus. yml . new
493- fi
519+ append_allowlist_relabel
494520 fi
495521
496522 # Check if configuration changed
@@ -576,6 +602,8 @@ write_files:
576602 INSTANCE_GROUP_NAME=${INSTANCE_GROUP_NAME}
577603 BUILD_CACHE_BUCKET=${BUILD_CACHE_BUCKET}
578604 GITPOD_DEVELOPMENT_VERSION=${DEVELOPMENT_VERSION}
605+ MANAGED_METRICS_DIRECT_PUSH=true
606+ RUNNER_ASSETS_BUCKET_NAME=${RUNNER_ASSETS_BUCKET_NAME}
579607 PUBSUB_SUBSCRIPTION_ID=${PUBSUB_SUBSCRIPTION_ID}
580608 AUTH_PROXY_URL=${AUTH_PROXY_URL}
581609 RUNNER_LOGS_URL="${RUNNER_LOGS_URL}"
@@ -682,6 +710,7 @@ write_files:
682710 --env https_proxy=${HTTPS_PROXY} \
683711 --env all_proxy=${ALL_PROXY} \
684712 --env GITPOD_DEVELOPMENT_VERSION=${DEVELOPMENT_VERSION} \
713+ --env MANAGED_METRICS_DIRECT_PUSH=true \
685714 --env GITPOD_TERRAFORM_MODULE_VERSION=${TERRAFORM_MODULE_VERSION} \
686715 --env no_proxy=${NO_PROXY} \
687716%{ if HAS_TRUST_BUNDLE ~}
@@ -861,6 +890,91 @@ write_files:
861890 # Execute main function
862891 main "$@"
863892
893+ # Metrics audit receiver — accepts Prometheus remote_write POSTs and
894+ # writes each payload to GCS so customers can audit exactly what data
895+ # leaves their network. Listens on 127. 0 . 0 . 1 :9095.
896+ - path : /var /lib/gitpod/metrics-audit-receiver. py
897+ permissions: '0755'
898+ content: |
899+ #!/usr/bin/env python3
900+ """Receives Prometheus remote_write payloads and writes them to GCS. """
901+ import os
902+ import subprocess
903+ import sys
904+ import time
905+ from http. server import HTTPServer, BaseHTTPRequestHandler
906+
907+ LISTEN_ADDR = "127. 0 . 0 . 1 "
908+ LISTEN_PORT = 9095
909+ BUCKET = os. environ . get ("RUNNER_ASSETS_BUCKET_NAME", "")
910+ RUNNER_ID = os. environ . get ("RUNNER_ID", "")
911+
912+ class AuditHandler(BaseHTTPRequestHandler):
913+ def do_POST(self):
914+ length = int(self. headers . get (" Content-Length" , 0 ))
915+ if length == 0 :
916+ self.send_response(204 )
917+ self.end_headers()
918+ return
919+
920+ body = self.rfile.read(length)
921+ now = time.gmtime()
922+ key = " metrics/runner/{rid}/{y}/{m:02d}/{d:02d}/{H:02d}{M:02d}{S:02d}.pb.snappy" .format (
923+ rid=RUNNER_ID,
924+ y=now. tm_year , m=now. tm_mon , d=now. tm_mday ,
925+ H=now. tm_hour , M=now. tm_min , S=now. tm_sec ,
926+ )
927+ dst = " gs://{}/{}" .format (BUCKET, key)
928+
929+ try:
930+ proc = subprocess.run(
931+ [" gcloud" , " storage" , " cp" , " -" , dst ],
932+ input=body, capture_output=True, timeout=30 ,
933+ )
934+ if proc . returncode != 0 :
935+ sys.stderr.write(" gcloud cp failed: {}\n " . format (proc. stderr . decode ()))
936+ self.send_response(502 )
937+ self.end_headers()
938+ return
939+ except Exception as e:
940+ sys.stderr.write(" audit write error: {}\n " . format (e))
941+ self.send_response(502 )
942+ self.end_headers()
943+ return
944+
945+ self.send_response(204 )
946+ self.end_headers()
947+
948+ def log_message(self, fmt, * args):
949+ pass # suppress per-request access logs
950+
951+ if not BUCKET or not RUNNER_ID :
952+ sys.stderr.write(" RUNNER_ASSETS_BUCKET_NAME or RUNNER_ID not set, exiting\n " )
953+ sys.exit(1 )
954+
955+ server = HTTPServer((LISTEN_ADDR, LISTEN_PORT), AuditHandler)
956+ sys.stderr.write(" metrics-audit-receiver listening on {}:{}\n " . format (LISTEN_ADDR, LISTEN_PORT))
957+ server.serve_forever()
958+
959+ # Systemd service for the metrics audit receiver
960+ - path : / var / lib/ systemd/ system/ metrics- audit- receiver.service
961+ permissions: '0644 '
962+ content: |
963+ [Unit]
964+ Description= Metrics Audit Receiver
965+ After= network.target
966+ Before= prometheus.service
967+
968+ [Service]
969+ Type= simple
970+ Restart= always
971+ RestartSec= 5s
972+ EnvironmentFile= / var / lib/ gitpod/ runner.env
973+ ExecStart= / var / lib/ gitpod/ metrics- audit- receiver.py
974+
975+ [Install]
976+ WantedBy= multi- user.target
977+
864978 # Enhanced startup script with better error handling and validation
865979 - path : / tmp/ container- startup.sh
866980 permissions: '0755 '
@@ -999,6 +1113,7 @@ write_files:
9991113 " node-exporter.service"
10001114 " prometheus-config-updater.service"
10011115 " prometheus-config-updater.timer"
1116+ " metrics-audit-receiver.service"
10021117 " gitpod-auth-proxy.service"
10031118 " gitpod-runner.service"
10041119 )
0 commit comments