Skip to content

Commit 2a2b845

Browse files
committed
Test database inspection
1 parent d2c58b7 commit 2a2b845

File tree

4 files changed

+405
-41
lines changed

4 files changed

+405
-41
lines changed
Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
# TODO: Commit these changes
2+
# TODO: Factor out functions in this script
3+
# TODO: Run the unit tests too
4+
# TODO: Add any functionality added to create to update as well.
5+
# TODO: Add any new parameters to the usage docs
6+
7+
setup()
8+
{
9+
echo "##########################################################################################################"
10+
local case_name="${1}"
11+
local src_prefix="${2}"
12+
echo "Testing: ${case_name}"
13+
rm -rf ${src_prefix}_check
14+
mkdir -p ${src_prefix}_check
15+
rm -rf ${src_prefix}_create
16+
mkdir -p ${src_prefix}_create
17+
cd ${src_prefix}_create
18+
19+
mkdir zstash_demo
20+
mkdir zstash_demo/empty_dir
21+
mkdir zstash_demo/dir
22+
echo -n '' > zstash_demo/file_empty.txt
23+
echo 'file0 stuff' > zstash_demo/dir/file0.txt
24+
}
25+
26+
run_test_cases()
27+
{
28+
# Before running the first time:
29+
# globus.org
30+
# Authenticate into LCRC Improv DTN, NERSC Perlmutter
31+
# Run toy problem:
32+
#
33+
# source /lcrc/soft/climate/e3sm-unified/load_latest_e3sm_unified_chrysalis.sh
34+
# cd /lcrc/group/e3sm/ac.forsyth2/zstash_testing/test_20250729
35+
# mkdir zstash_demo; echo 'file0 stuff' > zstash_demo/file0.txt
36+
# Chrysalis: 15288284-7006-4041-ba1a-6b52501e49f1
37+
# Perlmutter: 6bdc7956-fc0f-4ad2-989c-7aa5ee643a79
38+
# zstash create --hpss=globus://6bdc7956-fc0f-4ad2-989c-7aa5ee643a79/global/homes/f/forsyth/zstash/tests/manual_run zstash_demo
39+
# Will prompt for LCRC AND NERSC authentication, and then paste generated auth code one time.
40+
41+
# Before each run:
42+
# Perlmutter:
43+
# cd /global/homes/f/forsyth/zstash/tests/
44+
# rm -rf test_database_corruption
45+
#
46+
# Chrysalis:
47+
# cd ~/ez/zstash/
48+
# conda activate zstash-377-20250728
49+
# pre-commit run --all-files
50+
# python -m pip install .
51+
# cd tests/scripts
52+
# ./database_corruption.bash
53+
54+
SRC_DIR=/lcrc/group/e3sm/ac.forsyth2/zstash_testing/test_database_corruption # Chrysalis
55+
DST_DIR=globus://6bdc7956-fc0f-4ad2-989c-7aa5ee643a79/global/homes/f/forsyth/zstash/tests/test_database_corruption # Perlmutter
56+
57+
success_count=0
58+
fail_count=0
59+
review_str=""
60+
61+
# Standard cases ##########################################################
62+
63+
64+
# Case 1: zstash create, check from different directory
65+
case_name="normal"
66+
src_prefix=${SRC_DIR}/${case_name}
67+
setup ${case_name} ${src_prefix}
68+
cd ${src_prefix}_create
69+
zstash create --hpss=${DST_DIR}/${case_name} zstash_demo 2>&1 | tee create.log
70+
grep "INFO: Adding 000000.tar to the database." create.log
71+
if [ $? != 0 ]; then
72+
((fail_count++))
73+
review_str+="${case_name}_create/create.log,"
74+
else
75+
((success_count++))
76+
fi
77+
cd ${src_prefix}_check
78+
zstash check --hpss=${DST_DIR}/${case_name} 2>&1 | tee check.log
79+
grep "INFO: 000000.tar: Found a single database entry." check.log
80+
if [ $? != 0 ]; then
81+
((fail_count++))
82+
review_str+="${case_name}_check/check.log,"
83+
else
84+
((success_count++))
85+
fi
86+
87+
88+
# Case 2: zstash create, check from same directory (i.e., an index.db already exists)
89+
case_name="check_from_same_dir"
90+
src_prefix=${SRC_DIR}/${case_name}
91+
setup ${case_name} ${src_prefix}
92+
cd ${src_prefix}_create
93+
zstash create --hpss=${DST_DIR}/${case_name} zstash_demo 2>&1 | tee create.log
94+
grep "INFO: Adding 000000.tar to the database." create.log
95+
if [ $? != 0 ]; then
96+
((fail_count++))
97+
review_str+="${case_name}_create/create.log,"
98+
else
99+
((success_count++))
100+
fi
101+
# Do NOT change directory
102+
zstash check --hpss=${DST_DIR}/${case_name} 2>&1 | tee check.log
103+
grep "INFO: 000000.tar: Found a single database entry." check.log
104+
if [ $? != 0 ]; then
105+
((fail_count++))
106+
review_str+="${case_name}_check/check.log,"
107+
else
108+
((success_count++))
109+
fi
110+
111+
112+
# Corrupted database cases ################################################
113+
# --for-developers-force-database-corruption is set on `zstash create`
114+
115+
116+
# Case 3: Duplicates detected! Error out on create. Don't even get to check.
117+
case_name="error_on_create"
118+
src_prefix=${SRC_DIR}/${case_name}
119+
setup ${case_name} ${src_prefix}
120+
cd ${src_prefix}_create
121+
zstash create --hpss=${DST_DIR}/${case_name} --for-developers-force-database-corruption="simulate_row_existing" --error-on-duplicate-tar zstash_demo 2>&1 | tee create.log
122+
grep "INFO: TESTING/DEBUGGING ONLY: Simulating row existing for 000000.tar." create.log
123+
if [ $? != 0 ]; then
124+
((fail_count++))
125+
review_str+="${case_name}_create/create.log,"
126+
else
127+
((success_count++))
128+
fi
129+
grep "ERROR: Database corruption detected! 000000.tar is already in the database." create.log
130+
if [ $? != 0 ]; then
131+
((fail_count++))
132+
else
133+
((success_count++))
134+
fi
135+
136+
137+
# Case 4: Duplicates detected! Overwrite them. Proceed with check, as usual.
138+
case_name="overwrite_duplicate"
139+
src_prefix=${SRC_DIR}/${case_name}
140+
setup ${case_name} ${src_prefix}
141+
cd ${src_prefix}_create
142+
zstash create --hpss=${DST_DIR}/${case_name} --for-developers-force-database-corruption="simulate_row_existing_bad_size" --overwrite-duplicate-tars zstash_demo 2>&1 | tee create.log
143+
grep "INFO: TESTING/DEBUGGING ONLY: Simulating row existing with bad size for 000000.tar." create.log
144+
if [ $? != 0 ]; then
145+
((fail_count++))
146+
review_str+="${case_name}_create/create.log,"
147+
else
148+
((success_count++))
149+
fi
150+
grep "WARNING: Database corruption detected! 000000.tar is already in the database." create.log
151+
if [ $? != 0 ]; then
152+
((fail_count++))
153+
else
154+
((success_count++))
155+
fi
156+
grep "WARNING: Updating existing tar 000000.tar to proceed." create.log
157+
if [ $? != 0 ]; then
158+
((fail_count++))
159+
else
160+
((success_count++))
161+
fi
162+
cd ${src_prefix}_check
163+
# We should have ovewritten the wrong size with the real size, so check should pass.
164+
zstash check --hpss=${DST_DIR}/${case_name} 2>&1 | tee check.log
165+
grep "INFO: 000000.tar: Found a single database entry." check.log
166+
if [ $? != 0 ]; then
167+
((fail_count++))
168+
review_str+="${case_name}_check/check.log,"
169+
else
170+
((success_count++))
171+
fi
172+
173+
174+
# Case 5: Duplicates detected! Allow them. Error out on check because duplicates are present.
175+
case_name="check_detects_duplicate"
176+
src_prefix=${SRC_DIR}/${case_name}
177+
setup ${case_name} ${src_prefix}
178+
cd ${src_prefix}_create
179+
zstash create --hpss=${DST_DIR}/${case_name} --for-developers-force-database-corruption="simulate_row_existing" zstash_demo 2>&1 | tee create.log
180+
grep "INFO: TESTING/DEBUGGING ONLY: Simulating row existing for 000000.tar." create.log
181+
if [ $? != 0 ]; then
182+
((fail_count++))
183+
review_str+="${case_name}_create/create.log,"
184+
else
185+
((success_count++))
186+
fi
187+
grep "WARNING: Database corruption detected! 000000.tar is already in the database." create.log
188+
if [ $? != 0 ]; then
189+
((fail_count++))
190+
else
191+
((success_count++))
192+
fi
193+
grep "WARNING: Adding a new entry for 000000.tar." create.log
194+
if [ $? != 0 ]; then
195+
((fail_count++))
196+
else
197+
((success_count++))
198+
fi
199+
cd ${src_prefix}_check
200+
zstash check --hpss=${DST_DIR}/${case_name} --error-on-duplicate-tar 2>&1 | tee check.log
201+
grep "ERROR: Database corruption detected! Found 2 database entries for 000000.tar with sizes" check.log
202+
if [ $? != 0 ]; then
203+
((fail_count++))
204+
review_str+="${case_name}_check/check.log,"
205+
else
206+
((success_count++))
207+
fi
208+
209+
210+
# Case 6: Duplicates detected! Allow them. Error out on check because none of the sizes match.
211+
case_name="check_finds_no_matching_size"
212+
src_prefix=${SRC_DIR}/${case_name}
213+
setup ${case_name} ${src_prefix}
214+
cd ${src_prefix}_create
215+
zstash create --hpss=${DST_DIR}/${case_name} --for-developers-force-database-corruption="simulate_no_correct_size" zstash_demo 2>&1 | tee create.log
216+
grep "INFO: TESTING/DEBUGGING ONLY: Simulating no correct size for 000000.tar." create.log
217+
if [ $? != 0 ]; then
218+
((fail_count++))
219+
review_str+="${case_name}_create/create.log,"
220+
else
221+
((success_count++))
222+
fi
223+
cd ${src_prefix}_check
224+
zstash check --hpss=${DST_DIR}/${case_name} 2>&1 | tee check.log
225+
grep "INFO: 000000.tar: No database entries match the actual file size:" check.log
226+
if [ $? != 0 ]; then
227+
((fail_count++))
228+
review_str+="${case_name}_check/check.log,"
229+
else
230+
((success_count++))
231+
fi
232+
233+
234+
# Case 7: Duplicates detected! Allow them. Pass check because at least one of the sizes match.
235+
case_name="check_finds_a_matching_size"
236+
src_prefix=${SRC_DIR}/${case_name}
237+
setup ${case_name} ${src_prefix}
238+
cd ${src_prefix}_create
239+
zstash create --hpss=${DST_DIR}/${case_name} --for-developers-force-database-corruption="simulate_row_existing_bad_size" zstash_demo 2>&1 | tee create.log
240+
grep "INFO: TESTING/DEBUGGING ONLY: Simulating row existing with bad size for 000000.tar." create.log
241+
if [ $? != 0 ]; then
242+
((fail_count++))
243+
review_str+="${case_name}_create/create.log,"
244+
else
245+
((success_count++))
246+
fi
247+
grep "WARNING: Database corruption detected! 000000.tar is already in the database." create.log
248+
if [ $? != 0 ]; then
249+
((fail_count++))
250+
else
251+
((success_count++))
252+
fi
253+
grep "WARNING: Adding a new entry for 000000.tar." create.log
254+
if [ $? != 0 ]; then
255+
((fail_count++))
256+
else
257+
((success_count++))
258+
fi
259+
cd ${src_prefix}_check
260+
zstash check --hpss=${DST_DIR}/${case_name} 2>&1 | tee check.log
261+
grep "INFO: 000000.tar: Found a database entry with the same size as the actual file size:" check.log
262+
if [ $? != 0 ]; then
263+
((fail_count++))
264+
review_str+="${case_name}_check/check.log,"
265+
else
266+
((success_count++))
267+
fi
268+
269+
270+
# Summary
271+
echo "Success count: ${success_count}"
272+
echo "Fail count: ${fail_count}"
273+
echo "Review: ${review_str}"
274+
}
275+
276+
run_test_cases
277+
278+
# Success count: 20
279+
# Fail count: 0
280+
# Review:

zstash/create.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,22 @@ def setup_create() -> Tuple[str, argparse.Namespace]:
166166
action="store_true",
167167
help="Hard copy symlinks. This is useful for preventing broken links. Note that a broken link will result in a failed create.",
168168
)
169+
optional.add_argument(
170+
"--error-on-duplicate-tar",
171+
action="store_true",
172+
help="Raise an error if a tar file with the same name already exists in the database. If this flag is set, zstash will exit if it sees a duplicate tar. If it is not set, zstash's behavior will depend on whether or not the --overwrite-duplicate-tar flag is set.",
173+
)
174+
optional.add_argument(
175+
"--overwrite-duplicate-tars",
176+
action="store_true",
177+
help="If a duplicate tar is encountered, overwrite the existing tar file with the new one (i.e., it will assume the latest tar is the correct one). If this flag is not set, zstash will permit multiple entries for the same tar in its database.",
178+
)
179+
optional.add_argument(
180+
"--for-developers-force-database-corruption",
181+
type=str,
182+
help="For developers only! Forces database corruption by inserting a duplicate tar entry into the tars table. This is useful for testing. 3 options: simulate_row_existing -- add the tar before the main logic. simulate_row_existing_bad_size -- add the tar before the main logic, but with the wrong size. simulate_no_correct_size -- add the tar twice, both times with incorrect size. If this option is not set, no corruption will be forced.",
183+
default="",
184+
)
169185
# Now that we're inside a subcommand, ignore the first two argvs
170186
# (zstash create)
171187
args: argparse.Namespace = parser.parse_args(sys.argv[2:])
@@ -260,6 +276,9 @@ def create_database(cache: str, args: argparse.Namespace) -> List[str]:
260276
args.follow_symlinks,
261277
skip_tars_md5=args.no_tars_md5,
262278
non_blocking=args.non_blocking,
279+
error_on_duplicate_tar=args.error_on_duplicate_tar,
280+
overwrite_duplicate_tars=args.overwrite_duplicate_tars,
281+
force_database_corruption=args.for_developers_force_database_corruption,
263282
)
264283
except FileNotFoundError:
265284
raise Exception("Archive creation failed due to broken symlink.")
@@ -275,6 +294,9 @@ def create_database(cache: str, args: argparse.Namespace) -> List[str]:
275294
args.follow_symlinks,
276295
skip_tars_md5=args.no_tars_md5,
277296
non_blocking=args.non_blocking,
297+
error_on_duplicate_tar=args.error_on_duplicate_tar,
298+
overwrite_duplicate_tars=args.overwrite_duplicate_tars,
299+
force_database_corruption=args.for_developers_force_database_corruption,
278300
)
279301

280302
# Close database

0 commit comments

Comments
 (0)