Skip to content

Commit 1296b3e

Browse files
author
jenkins
committed
Jenkins backup: 2025-12-20T00:01:26Z
1 parent cd89da8 commit 1296b3e

File tree

6 files changed

+719
-194
lines changed
  • jenkins-docker/jobs

6 files changed

+719
-194
lines changed

jenkins-docker/jobs/Curation - Create Sample and Subject Files/config.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
</userRemoteConfigs>
4949
<branches>
5050
<hudson.plugins.git.BranchSpec>
51-
<name>*/multi_mapping_fix</name>
51+
<name>*/main</name>
5252
</hudson.plugins.git.BranchSpec>
5353
</branches>
5454
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>

jenkins-docker/jobs/ETL - Generate DBGAP Global All Concepts/config.xml

Lines changed: 181 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -52,63 +52,206 @@
5252
<concurrentBuild>false</concurrentBuild>
5353
<builders>
5454
<hudson.tasks.Shell>
55-
<command>#!/bin/bash
55+
<command>#!/usr/bin/env bash
5656
set -euo pipefail
5757

58-
### assume etl role
59-
aws sts assume-role --duration-seconds 900 --role-arn arn:aws:iam::736265540791:role/dbgap-etl --role-session-name &quot;s3-test&quot; &gt; assume-role-output.txt
58+
LOG_TS() { date +&quot;%Y-%m-%dT%H:%M:%S%z&quot;; }
59+
log() { echo &quot;[$(LOG_TS)] INFO $*&quot; &gt;&amp;2; }
60+
warn() { echo &quot;[$(LOG_TS)] WARN $*&quot; &gt;&amp;2; }
61+
error() { echo &quot;[$(LOG_TS)] ERROR $*&quot; &gt;&amp;2; }
6062

61-
export AWS_ACCESS_KEY_ID=`grep AccessKeyId assume-role-output.txt | cut -d &apos;:&apos; -f 2 | sed &quot;s/[ ,\&quot;]//g&quot;`
62-
export AWS_SECRET_ACCESS_KEY=`grep SecretAccessKey assume-role-output.txt | cut -d &apos;:&apos; -f 2 | sed &quot;s/[ ,\&quot;]//g&quot;`
63-
export AWS_SESSION_TOKEN=`grep SessionToken assume-role-output.txt | cut -d &apos;:&apos; -f 2 | sed &quot;s/[ ,\&quot;]//g&quot;`
63+
need() { command -v &quot;$1&quot; &gt;/dev/null 2&gt;&amp;1 || { error &quot;Missing required command: $1&quot;; exit 2; }; }
6464

65-
mkdir data
66-
mkdir completed
67-
mkdir hierarchies
68-
mkdir processing
69-
mkdir mappings
70-
mkdir resources
71-
mkdir reports
65+
cleanup() {
66+
local rc=$?
67+
local cmd=&quot;${BASH_COMMAND:-}&quot;
68+
local line=&quot;${BASH_LINENO[0]:-}&quot;
7269

73-
find data/ -type f -exec rm -rf {} \;
74-
find data/ -type d -exec rm -rf {} \;
75-
find processing/ -type f -exec rm -rf {} \;
76-
find completed/ -type f -exec rm -rf {} \;
77-
find mappings/ -type f -exec rm -rf {} \;
78-
find resources/ -type f -exec rm -rf {} \;
70+
if [[ $rc -eq 0 ]]; then
71+
log &quot;EXIT rc=0 (success). Cleaning AWS env vars.&quot;
72+
else
73+
error &quot;EXIT rc=${rc} at line=${line} cmd=${cmd}. Cleaning AWS env vars.&quot;
74+
fi
7975

76+
unset AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN
8077

81-
rm -rf pic-sure-hpds/
78+
if [[ -s logs/error_logging.txt ]]; then
79+
warn &quot;logs/error_logging.txt present (last 50 lines):&quot;
80+
tail -n 50 logs/error_logging.txt &gt;&amp;2 || true
81+
fi
82+
}
83+
on_err() {
84+
local rc=$?
85+
local line=&quot;${BASH_LINENO[0]:-}&quot;
86+
local cmd=&quot;${BASH_COMMAND:-}&quot;
87+
error &quot;ERR rc=${rc} at line=${line} cmd=${cmd}&quot;
88+
return $rc
89+
}
90+
trap on_err ERR
91+
trap cleanup EXIT
8292

83-
#aws s3 cp ${managed_inputs} data/
93+
assume_role() {
94+
local role_arn=&quot;${ROLE_ARN:-arn:aws:iam::736265540791:role/dbgap-etl}&quot;
95+
local session_name=&quot;${ROLE_SESSION_NAME:-s3-test}&quot;
96+
local duration=&quot;${ASSUME_DURATION_SECONDS:-3600}&quot;
8497

85-
#aws s3 cp ${metadata_file} data/metadata.json
98+
log &quot;Assuming role ${role_arn} (duration=${duration}s)&quot;
99+
local assume_json
100+
assume_json=&quot;$(mktemp)&quot;
86101

87-
unset AWS_ACCESS_KEY_ID
88-
unset AWS_SECRET_ACCESS_KEY
89-
unset AWS_SESSION_TOKEN
102+
aws sts assume-role \
103+
--duration-seconds &quot;$duration&quot; \
104+
--role-arn &quot;$role_arn&quot; \
105+
--role-session-name &quot;$session_name&quot; \
106+
&gt; &quot;$assume_json&quot;
90107

91-
### assume etl role
92-
aws sts assume-role --duration-seconds 3600 --role-arn arn:aws:iam::736265540791:role/dbgap-etl --role-session-name &quot;s3-test&quot; &gt; assume-role-output.txt
108+
export AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN
109+
AWS_ACCESS_KEY_ID=&quot;$(jq -r &apos;.Credentials.AccessKeyId&apos; &quot;$assume_json&quot;)&quot;
110+
AWS_SECRET_ACCESS_KEY=&quot;$(jq -r &apos;.Credentials.SecretAccessKey&apos; &quot;$assume_json&quot;)&quot;
111+
AWS_SESSION_TOKEN=&quot;$(jq -r &apos;.Credentials.SessionToken&apos; &quot;$assume_json&quot;)&quot;
112+
rm -f &quot;$assume_json&quot;
93113

94-
export AWS_ACCESS_KEY_ID=`grep AccessKeyId assume-role-output.txt | cut -d &apos;:&apos; -f 2 | sed &quot;s/[ ,\&quot;]//g&quot;`
95-
export AWS_SECRET_ACCESS_KEY=`grep SecretAccessKey assume-role-output.txt | cut -d &apos;:&apos; -f 2 | sed &quot;s/[ ,\&quot;]//g&quot;`
96-
export AWS_SESSION_TOKEN=`grep SessionToken assume-role-output.txt | cut -d &apos;:&apos; -f 2 | sed &quot;s/[ ,\&quot;]//g&quot;`
114+
log &quot;Role assumed&quot;
115+
}
97116

98-
aws s3 cp s3://avillach-73-bdcatalyst-etl/general/data/ data/ --recursive --no-progress
99-
aws s3 cp s3://avillach-73-bdcatalyst-etl/general/globalvars/study_consents/ data/ --recursive --no-progress
100-
aws s3 cp s3://avillach-73-bdcatalyst-etl/general/mappings/mapping.postrootnodes.csv mappings/mapping.csv --no-progress
117+
reset_dir() {
118+
local d=&quot;$1&quot;
119+
rm -rf &quot;$d&quot;
120+
mkdir -p &quot;$d&quot;
121+
}
101122

102-
cat data/studies_consents_mapping.csv &gt;&gt; mappings/mapping.csv
123+
# S3 &quot;directory&quot; safe sync:
124+
# - DOES NOT call head-object on &quot;prefix/&quot; (which 404s in S3)
125+
# - Uses s3 sync and fails if nothing was synced (unless allow_empty=true)
126+
s3_sync_prefix() {
127+
local src_prefix=&quot;$1&quot; dst_dir=&quot;$2&quot; allow_empty=&quot;${3:-false}&quot;
128+
mkdir -p &quot;$dst_dir&quot;
103129

104-
aws s3 cp s3://avillach-73-bdcatalyst-etl/general/resources/job.config resources/ --no-progress
130+
log &quot;S3 sync ${src_prefix} -&gt; ${dst_dir} (allow_empty=${allow_empty})&quot;
131+
local out
132+
out=&quot;$(mktemp)&quot;
133+
# sync prints lines for transferred/updated files; capture output for emptiness check
134+
if ! aws s3 sync &quot;$src_prefix&quot; &quot;$dst_dir&quot; --no-progress --only-show-errors | tee &quot;$out&quot; &gt;/dev/null; then
135+
rm -f &quot;$out&quot;
136+
error &quot;S3 sync failed: ${src_prefix} -&gt; ${dst_dir}&quot;
137+
return 1
138+
fi
105139

106-
java -jar jars/GenerateAllConcepts.jar -propertiesfile resources/job.config
140+
# If sync produced no output AND destination is still empty, treat as empty prefix
141+
if [[ &quot;$allow_empty&quot; != &quot;true&quot; ]]; then
142+
if [[ ! -s &quot;$out&quot; ]] &amp;&amp; [[ -z &quot;$(find &quot;$dst_dir&quot; -type f -print -quit 2&gt;/dev/null || true)&quot; ]]; then
143+
rm -f &quot;$out&quot;
144+
error &quot;S3 prefix appears empty or does not exist: ${src_prefix}&quot;
145+
return 1
146+
fi
147+
fi
107148

108-
aws s3 cp completed/GLOBAL_allConcepts.csv s3://avillach-73-bdcatalyst-etl/general/completed/ --no-progress</command>
149+
rm -f &quot;$out&quot;
150+
return 0
151+
}
152+
153+
s3_cp_file() {
154+
local src=&quot;$1&quot; dst=&quot;$2&quot; required=&quot;${3:-true}&quot;
155+
if aws s3 cp &quot;$src&quot; &quot;$dst&quot; --no-progress --only-show-errors; then
156+
return 0
157+
fi
158+
if [[ &quot;$required&quot; == &quot;true&quot; ]]; then
159+
error &quot;S3 copy failed (required): $src -&gt; $dst&quot;
160+
return 1
161+
fi
162+
warn &quot;S3 copy failed (optional): $src -&gt; $dst&quot;
163+
return 0
164+
}
165+
166+
# -----------------------------
167+
# Prereqs
168+
# -----------------------------
169+
need aws
170+
need jq
171+
need java
172+
173+
S3_BUCKET=&quot;${S3_BUCKET:-avillach-73-bdcatalyst-etl}&quot;
174+
JOB_CONFIG_S3=&quot;${JOB_CONFIG_S3:-s3://${S3_BUCKET}/general/resources/job.config}&quot;
175+
MAPPING_S3=&quot;${MAPPING_S3:-s3://${S3_BUCKET}/general/mappings/mapping.postrootnodes.csv}&quot;
176+
OUT_S3_PREFIX=&quot;${OUT_S3_PREFIX:-s3://${S3_BUCKET}/general/completed/}&quot;
177+
178+
ASSUME_DURATION_SECONDS=&quot;${ASSUME_DURATION_SECONDS:-3600}&quot;
179+
180+
# -----------------------------
181+
# Workspace dirs (cleanly)
182+
# -----------------------------
183+
reset_dir data
184+
reset_dir completed
185+
reset_dir hierarchies
186+
reset_dir processing
187+
reset_dir mappings
188+
reset_dir resources
189+
reset_dir reports
190+
reset_dir logs
191+
rm -rf pic-sure-hpds/ || true
192+
193+
# -----------------------------
194+
# Assume role once (keep creds for all S3 ops)
195+
# -----------------------------
196+
assume_role
197+
198+
# -----------------------------
199+
# Pull inputs needed by GenerateAllConcepts
200+
# -----------------------------
201+
# IMPORTANT:
202+
# s3://bucket/prefix/ is not an object, so head-object on it will 404.
203+
# Use sync only for prefixes.
204+
s3_sync_prefix &quot;s3://${S3_BUCKET}/general/data/&quot; &quot;data&quot; false
205+
s3_sync_prefix &quot;s3://${S3_BUCKET}/general/globalvars/study_consents/&quot; &quot;data&quot; false
206+
207+
log &quot;Downloading mapping.postrootnodes.csv -&gt; mappings/mapping.csv&quot;
208+
s3_cp_file &quot;$MAPPING_S3&quot; &quot;mappings/mapping.csv&quot; true
209+
210+
if [[ -s &quot;data/studies_consents_mapping.csv&quot; ]]; then
211+
log &quot;Appending data/studies_consents_mapping.csv -&gt; mappings/mapping.csv&quot;
212+
cat &quot;data/studies_consents_mapping.csv&quot; &gt;&gt; &quot;mappings/mapping.csv&quot;
213+
else
214+
warn &quot;data/studies_consents_mapping.csv missing/empty; not appending&quot;
215+
fi
216+
217+
log &quot;Downloading job.config -&gt; resources/job.config&quot;
218+
s3_cp_file &quot;$JOB_CONFIG_S3&quot; &quot;resources/job.config&quot; true
219+
220+
# -----------------------------
221+
# Run generator
222+
# -----------------------------
223+
log &quot;Running GenerateAllConcepts.jar&quot;
224+
java -jar jars/GenerateAllConcepts.jar -propertiesfile &quot;resources/job.config&quot; 2&gt;&gt;&quot;logs/error_logging.txt&quot;
225+
226+
if [[ -s logs/error_logging.txt ]]; then
227+
warn &quot;GenerateAllConcepts wrote to logs/error_logging.txt (last 50 lines):&quot;
228+
tail -n 50 logs/error_logging.txt &gt;&amp;2 || true
229+
# Uncomment to hard-fail on any logged errors:
230+
# error &quot;Errors logged during GenerateAllConcepts&quot;; exit 1
231+
fi
232+
233+
# -----------------------------
234+
# Upload outputs
235+
# -----------------------------
236+
if [[ ! -s &quot;completed/GLOBAL_allConcepts.csv&quot; ]]; then
237+
error &quot;Expected output not found or empty: completed/GLOBAL_allConcepts.csv&quot;
238+
exit 1
239+
fi
240+
241+
log &quot;Uploading GLOBAL_allConcepts.csv -&gt; ${OUT_S3_PREFIX}&quot;
242+
aws s3 cp &quot;completed/GLOBAL_allConcepts.csv&quot; &quot;${OUT_S3_PREFIX}&quot; --no-progress --only-show-errors
243+
244+
log &quot;Done&quot;</command>
109245
<configuredLocalRules/>
110246
</hudson.tasks.Shell>
111247
</builders>
112248
<publishers/>
113-
<buildWrappers/>
249+
<buildWrappers>
250+
<hudson.plugins.ws__cleanup.PreBuildCleanup plugin="[email protected]">
251+
<deleteDirs>false</deleteDirs>
252+
<cleanupParameter></cleanupParameter>
253+
<externalDelete></externalDelete>
254+
<disableDeferredWipeout>false</disableDeferredWipeout>
255+
</hudson.plugins.ws__cleanup.PreBuildCleanup>
256+
</buildWrappers>
114257
</project>

0 commit comments

Comments
 (0)