5252 <concurrentBuild >false</concurrentBuild >
5353 <builders >
5454 <hudson .tasks.Shell>
55- <command >#!/bin/bash
55+ <command >#!/usr/ bin/env bash
5656set -euo pipefail
5757
58- ### assume etl role
59- aws sts assume-role --duration-seconds 900 --role-arn arn:aws:iam::736265540791:role/dbgap-etl --role-session-name " s3-test" > assume-role-output.txt
58+ LOG_TS() { date +" %Y-%m-%dT%H:%M:%S%z" ; }
59+ log() { echo " [$(LOG_TS)] INFO $*" >& 2; }
60+ warn() { echo " [$(LOG_TS)] WARN $*" >& 2; }
61+ error() { echo " [$(LOG_TS)] ERROR $*" >& 2; }
6062
61- export AWS_ACCESS_KEY_ID=`grep AccessKeyId assume-role-output.txt | cut -d ' :' -f 2 | sed " s/[ ,\" ]//g" `
62- export AWS_SECRET_ACCESS_KEY=`grep SecretAccessKey assume-role-output.txt | cut -d ' :' -f 2 | sed " s/[ ,\" ]//g" `
63- export AWS_SESSION_TOKEN=`grep SessionToken assume-role-output.txt | cut -d ' :' -f 2 | sed " s/[ ,\" ]//g" `
63+ need() { command -v " $1" > /dev/null 2>& 1 || { error " Missing required command: $1" ; exit 2; }; }
6464
65- mkdir data
66- mkdir completed
67- mkdir hierarchies
68- mkdir processing
69- mkdir mappings
70- mkdir resources
71- mkdir reports
65+ cleanup() {
66+ local rc=$?
67+ local cmd=" ${BASH_COMMAND:-}"
68+ local line=" ${BASH_LINENO[0]:-}"
7269
73- find data/ -type f -exec rm -rf {} \;
74- find data/ -type d -exec rm -rf {} \;
75- find processing/ -type f -exec rm -rf {} \;
76- find completed/ -type f -exec rm -rf {} \;
77- find mappings/ -type f -exec rm -rf {} \;
78- find resources/ -type f -exec rm -rf {} \;
70+ if [[ $rc -eq 0 ]]; then
71+ log " EXIT rc=0 (success). Cleaning AWS env vars."
72+ else
73+ error " EXIT rc=${rc} at line=${line} cmd=${cmd}. Cleaning AWS env vars."
74+ fi
7975
76+ unset AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN
8077
81- rm -rf pic-sure-hpds/
78+ if [[ -s logs/error_logging.txt ]]; then
79+ warn " logs/error_logging.txt present (last 50 lines):"
80+ tail -n 50 logs/error_logging.txt >& 2 || true
81+ fi
82+ }
83+ on_err() {
84+ local rc=$?
85+ local line=" ${BASH_LINENO[0]:-}"
86+ local cmd=" ${BASH_COMMAND:-}"
87+ error " ERR rc=${rc} at line=${line} cmd=${cmd}"
88+ return $rc
89+ }
90+ trap on_err ERR
91+ trap cleanup EXIT
8292
83- #aws s3 cp ${managed_inputs} data/
93+ assume_role() {
94+ local role_arn=" ${ROLE_ARN:-arn:aws:iam::736265540791:role/dbgap-etl}"
95+ local session_name=" ${ROLE_SESSION_NAME:-s3-test}"
96+ local duration=" ${ASSUME_DURATION_SECONDS:-3600}"
8497
85- #aws s3 cp ${metadata_file} data/metadata.json
98+ log " Assuming role ${role_arn} (duration=${duration}s)"
99+ local assume_json
100+ assume_json=" $(mktemp)"
86101
87- unset AWS_ACCESS_KEY_ID
88- unset AWS_SECRET_ACCESS_KEY
89- unset AWS_SESSION_TOKEN
102+ aws sts assume-role \
103+ --duration-seconds " $duration" \
104+ --role-arn " $role_arn" \
105+ --role-session-name " $session_name" \
106+ > " $assume_json"
90107
91- ### assume etl role
92- aws sts assume-role --duration-seconds 3600 --role-arn arn:aws:iam::736265540791:role/dbgap-etl --role-session-name " s3-test" > assume-role-output.txt
108+ export AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN
109+ AWS_ACCESS_KEY_ID=" $(jq -r ' .Credentials.AccessKeyId' " $assume_json" )"
110+ AWS_SECRET_ACCESS_KEY=" $(jq -r ' .Credentials.SecretAccessKey' " $assume_json" )"
111+ AWS_SESSION_TOKEN=" $(jq -r ' .Credentials.SessionToken' " $assume_json" )"
112+ rm -f " $assume_json"
93113
94- export AWS_ACCESS_KEY_ID=`grep AccessKeyId assume-role-output.txt | cut -d ' :' -f 2 | sed " s/[ ,\" ]//g" `
95- export AWS_SECRET_ACCESS_KEY=`grep SecretAccessKey assume-role-output.txt | cut -d ' :' -f 2 | sed " s/[ ,\" ]//g" `
96- export AWS_SESSION_TOKEN=`grep SessionToken assume-role-output.txt | cut -d ' :' -f 2 | sed " s/[ ,\" ]//g" `
114+ log " Role assumed"
115+ }
97116
98- aws s3 cp s3://avillach-73-bdcatalyst-etl/general/data/ data/ --recursive --no-progress
99- aws s3 cp s3://avillach-73-bdcatalyst-etl/general/globalvars/study_consents/ data/ --recursive --no-progress
100- aws s3 cp s3://avillach-73-bdcatalyst-etl/general/mappings/mapping.postrootnodes.csv mappings/mapping.csv --no-progress
117+ reset_dir() {
118+ local d=" $1"
119+ rm -rf " $d"
120+ mkdir -p " $d"
121+ }
101122
102- cat data/studies_consents_mapping.csv >> mappings/mapping.csv
123+ # S3 " directory" safe sync:
124+ # - DOES NOT call head-object on " prefix/" (which 404s in S3)
125+ # - Uses s3 sync and fails if nothing was synced (unless allow_empty=true)
126+ s3_sync_prefix() {
127+ local src_prefix=" $1" dst_dir=" $2" allow_empty=" ${3:-false}"
128+ mkdir -p " $dst_dir"
103129
104- aws s3 cp s3://avillach-73-bdcatalyst-etl/general/resources/job.config resources/ --no-progress
130+ log " S3 sync ${src_prefix} -> ${dst_dir} (allow_empty=${allow_empty})"
131+ local out
132+ out=" $(mktemp)"
133+ # sync prints lines for transferred/updated files; capture output for emptiness check
134+ if ! aws s3 sync " $src_prefix" " $dst_dir" --no-progress --only-show-errors | tee " $out" > /dev/null; then
135+ rm -f " $out"
136+ error " S3 sync failed: ${src_prefix} -> ${dst_dir}"
137+ return 1
138+ fi
105139
106- java -jar jars/GenerateAllConcepts.jar -propertiesfile resources/job.config
140+ # If sync produced no output AND destination is still empty, treat as empty prefix
141+ if [[ " $allow_empty" != " true" ]]; then
142+ if [[ ! -s " $out" ]] && [[ -z " $(find " $dst_dir" -type f -print -quit 2> /dev/null || true)" ]]; then
143+ rm -f " $out"
144+ error " S3 prefix appears empty or does not exist: ${src_prefix}"
145+ return 1
146+ fi
147+ fi
107148
108- aws s3 cp completed/GLOBAL_allConcepts.csv s3://avillach-73-bdcatalyst-etl/general/completed/ --no-progress</command >
149+ rm -f " $out"
150+ return 0
151+ }
152+
153+ s3_cp_file() {
154+ local src=" $1" dst=" $2" required=" ${3:-true}"
155+ if aws s3 cp " $src" " $dst" --no-progress --only-show-errors; then
156+ return 0
157+ fi
158+ if [[ " $required" == " true" ]]; then
159+ error " S3 copy failed (required): $src -> $dst"
160+ return 1
161+ fi
162+ warn " S3 copy failed (optional): $src -> $dst"
163+ return 0
164+ }
165+
166+ # -----------------------------
167+ # Prereqs
168+ # -----------------------------
169+ need aws
170+ need jq
171+ need java
172+
173+ S3_BUCKET=" ${S3_BUCKET:-avillach-73-bdcatalyst-etl}"
174+ JOB_CONFIG_S3=" ${JOB_CONFIG_S3:-s3://${S3_BUCKET}/general/resources/job.config}"
175+ MAPPING_S3=" ${MAPPING_S3:-s3://${S3_BUCKET}/general/mappings/mapping.postrootnodes.csv}"
176+ OUT_S3_PREFIX=" ${OUT_S3_PREFIX:-s3://${S3_BUCKET}/general/completed/}"
177+
178+ ASSUME_DURATION_SECONDS=" ${ASSUME_DURATION_SECONDS:-3600}"
179+
180+ # -----------------------------
181+ # Workspace dirs (cleanly)
182+ # -----------------------------
183+ reset_dir data
184+ reset_dir completed
185+ reset_dir hierarchies
186+ reset_dir processing
187+ reset_dir mappings
188+ reset_dir resources
189+ reset_dir reports
190+ reset_dir logs
191+ rm -rf pic-sure-hpds/ || true
192+
193+ # -----------------------------
194+ # Assume role once (keep creds for all S3 ops)
195+ # -----------------------------
196+ assume_role
197+
198+ # -----------------------------
199+ # Pull inputs needed by GenerateAllConcepts
200+ # -----------------------------
201+ # IMPORTANT:
202+ # s3://bucket/prefix/ is not an object, so head-object on it will 404.
203+ # Use sync only for prefixes.
204+ s3_sync_prefix " s3://${S3_BUCKET}/general/data/" " data" false
205+ s3_sync_prefix " s3://${S3_BUCKET}/general/globalvars/study_consents/" " data" false
206+
207+ log " Downloading mapping.postrootnodes.csv -> mappings/mapping.csv"
208+ s3_cp_file " $MAPPING_S3" " mappings/mapping.csv" true
209+
210+ if [[ -s " data/studies_consents_mapping.csv" ]]; then
211+ log " Appending data/studies_consents_mapping.csv -> mappings/mapping.csv"
212+ cat " data/studies_consents_mapping.csv" >> " mappings/mapping.csv"
213+ else
214+ warn " data/studies_consents_mapping.csv missing/empty; not appending"
215+ fi
216+
217+ log " Downloading job.config -> resources/job.config"
218+ s3_cp_file " $JOB_CONFIG_S3" " resources/job.config" true
219+
220+ # -----------------------------
221+ # Run generator
222+ # -----------------------------
223+ log " Running GenerateAllConcepts.jar"
224+ java -jar jars/GenerateAllConcepts.jar -propertiesfile " resources/job.config" 2>>" logs/error_logging.txt"
225+
226+ if [[ -s logs/error_logging.txt ]]; then
227+ warn " GenerateAllConcepts wrote to logs/error_logging.txt (last 50 lines):"
228+ tail -n 50 logs/error_logging.txt >& 2 || true
229+ # Uncomment to hard-fail on any logged errors:
230+ # error " Errors logged during GenerateAllConcepts" ; exit 1
231+ fi
232+
233+ # -----------------------------
234+ # Upload outputs
235+ # -----------------------------
236+ if [[ ! -s " completed/GLOBAL_allConcepts.csv" ]]; then
237+ error " Expected output not found or empty: completed/GLOBAL_allConcepts.csv"
238+ exit 1
239+ fi
240+
241+ log " Uploading GLOBAL_allConcepts.csv -> ${OUT_S3_PREFIX}"
242+ aws s3 cp " completed/GLOBAL_allConcepts.csv" " ${OUT_S3_PREFIX}" --no-progress --only-show-errors
243+
244+ log " Done" </command >
109245 <configuredLocalRules />
110246 </hudson .tasks.Shell>
111247 </builders >
112248 <publishers />
113- <buildWrappers />
249+ <buildWrappers >
250+ <
hudson .plugins.ws__cleanup.PreBuildCleanup
plugin =
" [email protected] " >
251+ <deleteDirs >false</deleteDirs >
252+ <cleanupParameter ></cleanupParameter >
253+ <externalDelete ></externalDelete >
254+ <disableDeferredWipeout >false</disableDeferredWipeout >
255+ </hudson .plugins.ws__cleanup.PreBuildCleanup>
256+ </buildWrappers >
114257</project >
0 commit comments