Skip to content

Commit 5f14239

Browse files
committed
feat: added script with instructions
1 parent a4f32c8 commit 5f14239

File tree

1 file changed

+273
-0
lines changed

1 file changed

+273
-0
lines changed
Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
#! /bin/bash
2+
3+
# INSTRUCTIONS:
4+
# Run this script (bash deduplicate_project_exclusions.sh) which will go through all the projects and transform the exclusion tag object into a string,
5+
# containing just the exclusion tag id. For example: exclusionTag: { "id": "neurosynth_exclude_exclusion", label: "Exclude", isExclusionTag: true, isAssignable: true } -> exclusionTag: "neurosynth_exclude_exclusion"
6+
# This script will:
7+
# 1. Create a reusable PostgreSQL function for the transformation
8+
# 2. Validate the transformation by checking that the number of stub studies and the exclusion tag id are the same for each project
9+
# 3. If the validation passes, update the projects with the transformed data WHEN THE USER CONFIRMS "yes"
10+
# 4. Clean up the function
11+
12+
DATABASE_NAME="compose"
13+
DOCKER_COMPOSE_COMMAND="docker compose"
14+
15+
# Create a reusable PostgreSQL function for the transformation
16+
QUERY_CREATE_FUNCTION="
17+
CREATE OR REPLACE FUNCTION transform_exclusion_tags(provenance_data JSONB) RETURNS JSONB AS \$\$
18+
BEGIN
19+
RETURN jsonb_set(
20+
provenance_data,
21+
'{curationMetadata,columns}',
22+
COALESCE(
23+
(
24+
SELECT jsonb_agg(
25+
jsonb_set(
26+
curationColumn,
27+
'{stubStudies}',
28+
COALESCE(
29+
(
30+
SELECT jsonb_agg(
31+
CASE
32+
WHEN stub->>'exclusionTag' IS NULL THEN stub
33+
WHEN jsonb_typeof(stub->'exclusionTag') = 'string' THEN stub
34+
ELSE jsonb_set(
35+
stub,
36+
'{exclusionTag}',
37+
stub->'exclusionTag'->'id'
38+
)
39+
END
40+
)
41+
FROM jsonb_array_elements(curationColumn->'stubStudies') AS stub
42+
),
43+
'[]'::jsonb
44+
)
45+
)
46+
)
47+
FROM jsonb_array_elements(provenance_data->'curationMetadata'->'columns') AS curationColumn
48+
),
49+
'[]'::jsonb
50+
)
51+
);
52+
END;
53+
\$\$ LANGUAGE plpgsql IMMUTABLE;
54+
"
55+
56+
# Query to show current state of all projects
57+
QUERY_BEFORE="SELECT id, jsonb_pretty(provenance::jsonb) FROM projects WHERE provenance::jsonb->'curationMetadata'->'columns' IS NOT NULL ORDER BY id;"
58+
59+
# Query to preview what the transformed data would look like
60+
QUERY_PREVIEW="SELECT id, jsonb_pretty(transform_exclusion_tags(provenance::jsonb)) AS new_provenance \
61+
FROM projects \
62+
WHERE provenance::jsonb->'curationMetadata'->'columns' IS NOT NULL \
63+
ORDER BY id;"
64+
65+
# Validation query to check both conditions for each project
66+
QUERY_VALIDATE="WITH transformed AS ( \
67+
SELECT \
68+
id, \
69+
provenance::jsonb AS original_provenance, \
70+
transform_exclusion_tags(provenance::jsonb) AS transformed_provenance \
71+
FROM projects \
72+
WHERE provenance::jsonb->'curationMetadata'->'columns' IS NOT NULL \
73+
), \
74+
stub_counts AS ( \
75+
SELECT \
76+
id, \
77+
( \
78+
SELECT COUNT(*) \
79+
FROM jsonb_array_elements(original_provenance->'curationMetadata'->'columns') AS col, \
80+
jsonb_array_elements(col->'stubStudies') AS stub \
81+
WHERE stub IS NOT NULL AND stub != 'null'::jsonb \
82+
) AS original_stub_count, \
83+
( \
84+
SELECT COUNT(*) \
85+
FROM jsonb_array_elements(transformed_provenance->'curationMetadata'->'columns') AS col, \
86+
jsonb_array_elements(col->'stubStudies') AS stub \
87+
WHERE stub IS NOT NULL AND stub != 'null'::jsonb \
88+
) AS transformed_stub_count \
89+
FROM transformed \
90+
), \
91+
exclusion_tag_validation AS ( \
92+
SELECT \
93+
t.id, \
94+
col_idx, \
95+
stub_idx, \
96+
original_stub->'exclusionTag'->>'id' AS original_exclusion_id, \
97+
transformed_stub->>'exclusionTag' AS transformed_exclusion_tag \
98+
FROM transformed t, \
99+
jsonb_array_elements(t.original_provenance->'curationMetadata'->'columns') WITH ORDINALITY AS col(col_data, col_idx), \
100+
jsonb_array_elements(col.col_data->'stubStudies') WITH ORDINALITY AS original(original_stub, stub_idx), \
101+
jsonb_array_elements(t.transformed_provenance->'curationMetadata'->'columns') WITH ORDINALITY AS tcol(tcol_data, tcol_idx), \
102+
jsonb_array_elements(tcol.tcol_data->'stubStudies') WITH ORDINALITY AS transformed(transformed_stub, tstub_idx) \
103+
WHERE col.col_idx = tcol.tcol_idx \
104+
AND original.stub_idx = transformed.tstub_idx \
105+
AND original.original_stub->>'exclusionTag' IS NOT NULL \
106+
AND original.original_stub->'exclusionTag'->>'id' IS NOT NULL \
107+
) \
108+
SELECT \
109+
NULL AS project_id, \
110+
'VALIDATION RESULTS' AS report_type, \
111+
'==================' AS separator \
112+
UNION ALL \
113+
SELECT \
114+
sc.id AS project_id, \
115+
'Stub Count' AS report_type, \
116+
CASE \
117+
WHEN sc.original_stub_count != sc.transformed_stub_count THEN \
118+
'❌ FAILED - Stub count mismatch: ' || sc.original_stub_count || ' -> ' || sc.transformed_stub_count \
119+
ELSE \
120+
'✓ PASSED - Stub count: ' || sc.original_stub_count \
121+
END AS separator \
122+
FROM stub_counts sc \
123+
UNION ALL \
124+
SELECT \
125+
etv.id AS project_id, \
126+
'ExclusionTag (Col ' || etv.col_idx || ', Stub ' || etv.stub_idx || ')' AS report_type, \
127+
CASE \
128+
WHEN etv.original_exclusion_id != etv.transformed_exclusion_tag THEN \
129+
'❌ FAILED - Mismatch: \"' || COALESCE(etv.original_exclusion_id, 'NULL') || '\" -> \"' || COALESCE(etv.transformed_exclusion_tag, 'NULL') || '\"' \
130+
ELSE \
131+
'✓ PASSED - Preserved: \"' || etv.original_exclusion_id || '\"' \
132+
END AS separator \
133+
FROM exclusion_tag_validation etv \
134+
ORDER BY project_id NULLS FIRST, report_type, separator;"
135+
136+
# Query to actually update all projects
137+
QUERY_UPDATE="UPDATE projects \
138+
SET provenance = transform_exclusion_tags(provenance::jsonb) \
139+
WHERE provenance::jsonb->'curationMetadata'->'columns' IS NOT NULL;"
140+
141+
echo "================================================"
142+
echo "Creating transformation function..."
143+
echo "================================================"
144+
$DOCKER_COMPOSE_COMMAND exec compose_pgsql17 psql -U postgres -d $DATABASE_NAME -c "$QUERY_CREATE_FUNCTION" > /dev/null 2>&1
145+
echo "✓ Function created successfully"
146+
echo ""
147+
148+
echo "================================================"
149+
echo "Running validation checks on all projects..."
150+
echo "================================================"
151+
echo ""
152+
153+
$DOCKER_COMPOSE_COMMAND exec compose_pgsql17 psql -U postgres -d $DATABASE_NAME -c "$QUERY_VALIDATE"
154+
155+
echo ""
156+
echo "================================================"
157+
echo "Checking for validation failures..."
158+
echo "================================================"
159+
160+
# Query to check if there are any failures and raise error with project IDs
161+
QUERY_CHECK_FAILURES="DO \$\$ \
162+
DECLARE \
163+
failure_count INTEGER; \
164+
failed_projects TEXT; \
165+
BEGIN \
166+
WITH transformed AS ( \
167+
SELECT \
168+
id, \
169+
provenance::jsonb AS original_provenance, \
170+
transform_exclusion_tags(provenance::jsonb) AS transformed_provenance \
171+
FROM projects \
172+
WHERE provenance::jsonb->'curationMetadata'->'columns' IS NOT NULL \
173+
), \
174+
stub_counts AS ( \
175+
SELECT \
176+
id, \
177+
( \
178+
SELECT COUNT(*) \
179+
FROM jsonb_array_elements(original_provenance->'curationMetadata'->'columns') AS col, \
180+
jsonb_array_elements(col->'stubStudies') AS stub \
181+
WHERE stub IS NOT NULL AND stub != 'null'::jsonb \
182+
) AS original_stub_count, \
183+
( \
184+
SELECT COUNT(*) \
185+
FROM jsonb_array_elements(transformed_provenance->'curationMetadata'->'columns') AS col, \
186+
jsonb_array_elements(col->'stubStudies') AS stub \
187+
WHERE stub IS NOT NULL AND stub != 'null'::jsonb \
188+
) AS transformed_stub_count \
189+
FROM transformed \
190+
), \
191+
exclusion_tag_validation AS ( \
192+
SELECT \
193+
t.id, \
194+
original_stub->'exclusionTag'->>'id' AS original_exclusion_id, \
195+
transformed_stub->>'exclusionTag' AS transformed_exclusion_tag \
196+
FROM transformed t, \
197+
jsonb_array_elements(t.original_provenance->'curationMetadata'->'columns') WITH ORDINALITY AS col(col_data, col_idx), \
198+
jsonb_array_elements(col.col_data->'stubStudies') WITH ORDINALITY AS original(original_stub, stub_idx), \
199+
jsonb_array_elements(t.transformed_provenance->'curationMetadata'->'columns') WITH ORDINALITY AS tcol(tcol_data, tcol_idx), \
200+
jsonb_array_elements(tcol.tcol_data->'stubStudies') WITH ORDINALITY AS transformed(transformed_stub, tstub_idx) \
201+
WHERE col.col_idx = tcol.tcol_idx \
202+
AND original.stub_idx = transformed.tstub_idx \
203+
AND original.original_stub->>'exclusionTag' IS NOT NULL \
204+
AND original.original_stub->'exclusionTag'->>'id' IS NOT NULL \
205+
), \
206+
failures AS ( \
207+
SELECT DISTINCT id \
208+
FROM stub_counts \
209+
WHERE original_stub_count != transformed_stub_count \
210+
UNION \
211+
SELECT DISTINCT id \
212+
FROM exclusion_tag_validation \
213+
WHERE original_exclusion_id != transformed_exclusion_tag \
214+
) \
215+
SELECT COUNT(*), string_agg(id, ', ') \
216+
INTO failure_count, failed_projects \
217+
FROM failures; \
218+
\
219+
IF failure_count > 0 THEN \
220+
RAISE EXCEPTION 'Validation failed for % project(s): %', failure_count, failed_projects; \
221+
ELSE \
222+
RAISE NOTICE '✓ All validations passed!'; \
223+
END IF; \
224+
END \$\$;"
225+
226+
$DOCKER_COMPOSE_COMMAND exec compose_pgsql17 psql -U postgres -d $DATABASE_NAME -c "$QUERY_CHECK_FAILURES"
227+
228+
# echo "Saving current state of all projects to output_before.txt..."
229+
$DOCKER_COMPOSE_COMMAND exec compose_pgsql17 psql -U postgres -d $DATABASE_NAME -c "$QUERY_BEFORE" -t -A > ~/WORK/neurostuff/scripts/output_before.txt
230+
231+
# echo "Saving transformed data preview to output_after.txt..."
232+
$DOCKER_COMPOSE_COMMAND exec compose_pgsql17 psql -U postgres -d $DATABASE_NAME -c "$QUERY_PREVIEW" -t -A > ~/WORK/neurostuff/scripts/output_after.txt
233+
234+
if [ $? -ne 0 ]; then
235+
echo ""
236+
echo "❌ Validation failed! Check the error message above for project IDs."
237+
echo "Cleaning up..."
238+
$DOCKER_COMPOSE_COMMAND exec compose_pgsql17 psql -U postgres -d $DATABASE_NAME -c "DROP FUNCTION IF EXISTS transform_exclusion_tags(JSONB);" > /dev/null 2>&1
239+
exit 1
240+
fi
241+
242+
echo ""
243+
echo "================================================"
244+
echo "Validation complete!"
245+
echo "================================================"
246+
echo ""
247+
# echo "Saving current state of all projects to output_before.txt..."
248+
# docker compose exec compose_pgsql17 psql -U postgres -d compose -c "$QUERY_BEFORE" -t -A > ~/WORK/neurostuff/scripts/output_before.txt
249+
250+
# echo "Saving transformed data preview to output_after.txt..."
251+
# docker compose exec compose_pgsql17 psql -U postgres -d compose -c "$QUERY_PREVIEW" -t -A > ~/WORK/neurostuff/scripts/output_after.txt
252+
253+
# echo ""
254+
# echo "Review the validation results above and the output files."
255+
# echo ""
256+
read -p "Do you want to proceed with the update? (yes/no): " confirmation
257+
258+
if [[ "$confirmation" == "yes" ]]; then
259+
echo ""
260+
echo "Applying updates to all projects..."
261+
RESULT=$($DOCKER_COMPOSE_COMMAND exec compose_pgsql17 psql -U postgres -d $DATABASE_NAME -c "$QUERY_UPDATE")
262+
echo "$RESULT"
263+
echo ""
264+
echo "✓ Update complete! All projects have been transformed."
265+
else
266+
echo ""
267+
echo "Update cancelled. No changes were made to the database."
268+
fi
269+
270+
echo ""
271+
echo "Cleaning up..."
272+
$DOCKER_COMPOSE_COMMAND exec compose_pgsql17 psql -U postgres -d $DATABASE_NAME -c "DROP FUNCTION IF EXISTS transform_exclusion_tags(JSONB);" > /dev/null 2>&1
273+
echo "✓ Cleanup complete"

0 commit comments

Comments
 (0)