Skip to content

Commit 3e28683

Browse files
authored
Merge pull request #12 from E3SM-Project/check
Check
2 parents 30c0a5c + c39f6ed commit 3e28683

File tree

5 files changed

+103
-41
lines changed

5 files changed

+103
-41
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name="zstash",
5-
version="0.0.2",
5+
version="0.1.0",
66
author="Chris Golaz, Zeshawn Shaheen",
77
88
description="Long term HPSS archiving software for E3SM",

tests/test.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def write_file(name, contents):
1212

1313
def run_cmd(cmd):
1414
"""
15-
Run a command while printing and returning the stdout and stderr
15+
Run a command while printing and returning the stdout and stderr.
1616
"""
1717
print('+ {}'.format(cmd))
1818
if isinstance(cmd, str):
@@ -29,9 +29,9 @@ def str_not_in(output, msg):
2929
"""
3030
if msg in output:
3131
print('*'*40)
32-
print('This was not supposed to be found: {}',format(msg))
32+
print('This was not supposed to be found: {}'.format(msg))
3333
print('*'*40)
34-
exit()
34+
stop()
3535

3636
def str_in(output, msg):
3737
"""
@@ -41,7 +41,7 @@ def str_in(output, msg):
4141
print('*'*40)
4242
print('This was supposed to be found, but was not: {}'.format(msg))
4343
print('*'*40)
44-
exit()
44+
stop()
4545

4646
def cleanup():
4747
"""
@@ -55,15 +55,15 @@ def cleanup():
5555
cmd = 'hsi rm -R {}'.format(HPSS_PATH)
5656
run_cmd(cmd)
5757

58-
def exit():
58+
def stop():
5959
"""
60-
Cleanup and stop running this script
60+
Cleanup and stop running this script.
6161
"""
6262
cleanup()
6363
sys.exit()
6464

6565

66-
# TODO: Change the hpss directory to a dir that's accessable to everyone
66+
# TODO: Change the hpss directory to a dir that's accessable to everyone.
6767
HPSS_PATH='/home/z/zshaheen/zstash_test'
6868

6969
# Create files and directories
@@ -96,28 +96,32 @@ def exit():
9696
cmd = 'zstash create --hpss={} zstash_test'.format(HPSS_PATH)
9797
output, err = run_cmd(cmd)
9898
str_in(output+err, 'Transferring file to HPSS')
99+
str_not_in(output+err, 'ERROR')
99100

100101
print('Testing chgrp')
101102
GROUP = 'acme'
102103
print('First, make sure that the files are not already in the {} group'.format(GROUP))
103104
cmd = 'hsi ls -l {}'.format(HPSS_PATH)
104105
output, err = run_cmd(cmd)
105106
str_not_in(output+err, GROUP)
107+
str_not_in(output+err, 'ERROR')
106108
print('Running zstash chgrp')
107109
cmd = 'zstash chgrp -R {} {}'.format(GROUP, HPSS_PATH)
108110
output, err = run_cmd(cmd)
111+
str_not_in(output+err, 'ERROR')
109112
print('Now check that the files are in the {} group'.format(GROUP))
110113
cmd = 'hsi ls -l {}'.format(HPSS_PATH)
111114
output, err = run_cmd(cmd)
112115
str_in(output+err, 'acme')
116+
str_not_in(output+err, 'ERROR')
113117

114118
print('Running update on the newly created directory, nothing should happen')
115119
os.chdir('zstash_test')
116120
cmd = 'zstash update --hpss={}'.format(HPSS_PATH)
117121
output, err = run_cmd(cmd)
118122
os.chdir('../')
119123
str_in(output+err, 'Nothing to update')
120-
124+
str_not_in(output+err, 'ERROR')
121125

122126
print('Testing update with an actual change')
123127
if not os.path.exists('zstash_test/dir2'):
@@ -130,10 +134,17 @@ def exit():
130134
output, err = run_cmd(cmd)
131135
os.chdir('../')
132136
str_in(output+err, 'Transferring file to HPSS')
137+
str_not_in(output+err, 'ERROR')
133138
# Make sure none of the old files are moved
134139
str_not_in(output+err, 'file0')
135140
str_not_in(output+err, 'file_empty')
136141
str_not_in(output+err, 'empty_dir')
142+
str_not_in(output+err, 'ERROR')
143+
144+
print('Testing the checking functionality')
145+
cmd = 'zstash check --hpss={}'.format(HPSS_PATH)
146+
output, err = run_cmd(cmd)
147+
str_not_in(output+err, 'ERROR')
137148

138149
print('Testing the extract functionality')
139150
os.rename('zstash_test', 'zstash_test_backup')
@@ -150,13 +161,15 @@ def exit():
150161
str_in(output+err, 'Extracting dir/file1.txt')
151162
str_in(output+err, 'Extracting empty_dir')
152163
str_in(output+err, 'Extracting dir2/file2.txt')
164+
str_not_in(output+err, 'ERROR')
153165

154166
print('Running update on the newly extracted directory, nothing should happen')
155167
os.chdir('zstash_test')
156168
cmd = 'zstash update --hpss={}'.format(HPSS_PATH)
157169
output, err = run_cmd(cmd)
158170
os.chdir('../')
159171
str_in(output+err, 'Nothing to update')
172+
str_not_in(output+err, 'ERROR')
160173

161174
print('Verifying the data from database with the actual files')
162175
# Checksums from HPSS

zstash/check.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import logging
2+
import extract
3+
4+
def check():
5+
"""
6+
Check that the files in a given HPSS archive are valid.
7+
"""
8+
# This basically just goes through the process of extracting the files,
9+
# but doesn't actually save the output.
10+
extract.extract(keep_files=False)

zstash/extract.py

Lines changed: 65 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,14 @@
1111
from settings import config, CACHE, BLOCK_SIZE, DB_FILENAME
1212

1313

14-
def extract():
14+
def extract(keep_files=True):
15+
"""
16+
Given an HPSS path in the zstash database or passed via the command line,
17+
extract the archived data based on the file pattern (if given).
18+
"""
1519
parser = argparse.ArgumentParser(
1620
usage='zstash extract [<args>] [files]',
1721
description='Extract files from existing archive')
18-
required = parser.add_argument_group('required named arguments')
1922
optional = parser.add_argument_group('optional named arguments')
2023
optional.add_argument('--hpss', type=str, help='path to HPSS storage')
2124
parser.add_argument('files', nargs='*', default=['*'])
@@ -50,7 +53,8 @@ def extract():
5053
config.hpss = args.hpss
5154

5255
# Start doing actual work
53-
logging.debug('Running zstash extract')
56+
cmd = 'extract' if keep_files else 'check'
57+
logging.debug('Running zstash ' + cmd)
5458
logging.debug('Local path : %s' % (config.path))
5559
logging.debug('HPSS path : %s' % (config.hpss))
5660
logging.debug('Max size : %i' % (config.maxsize))
@@ -59,7 +63,7 @@ def extract():
5963
# Find matching files
6064
matches = []
6165
for file in args.files:
62-
cur.execute(u"select * from files where name GLOB ?", (file,))
66+
cur.execute(u"select * from files where name GLOB ? or tar GLOB ?", (file, file))
6367
matches = matches + cur.fetchall()
6468

6569
# Remove duplicates
@@ -69,15 +73,32 @@ def extract():
6973
matches = sorted(matches, key=lambda x: (x[5], x[6]))
7074

7175
# Retrieve from tapes
72-
extractFiles(matches)
76+
failures = extractFiles(matches, keep_files)
7377

7478
# Close database
7579
logging.debug('Closing index database')
7680
con.close()
7781

82+
if failures:
83+
logging.error('Encountered an error for files:')
84+
for fail in failures:
85+
logging.error('{} in {}'.format(fail[1], fail[5]))
7886

79-
def extractFiles(files):
87+
broken_tars = set(sorted([f[5] for f in failures]))
88+
logging.error('The following tar archives had errors:')
89+
for tar in broken_tars:
90+
logging.error(tar)
8091

92+
93+
def extractFiles(files, keep_files):
94+
"""
95+
Given a list of database rows, extract the files from the
96+
tar archives to the current location on disk.
97+
98+
If keep_files is False, the files are not extracted.
99+
This is used for when checking if the files in an HPSS
100+
repository are valid.
101+
"""
81102
failures = []
82103
tfname = None
83104
newtar = True
@@ -97,7 +118,8 @@ def extractFiles(files):
97118
tar = tarfile.open(tfname, "r")
98119

99120
# Extract file
100-
logging.info('Extracting %s' % (file[1]))
121+
cmd = 'Extracting' if keep_files else 'Checking'
122+
logging.info(cmd + ' %s' % (file[1]))
101123
try:
102124

103125
# Seek file position
@@ -108,39 +130,48 @@ def extractFiles(files):
108130

109131
if tarinfo.isfile():
110132
# fileobj to extract
111-
fin = tar.extractfile(tarinfo)
112-
fname = tarinfo.name
113-
path, name = os.path.split(fname)
114-
if path != '':
115-
if not os.path.isdir(path):
116-
os.makedirs(path)
117-
fout = open(fname, 'w')
118-
hash_md5 = hashlib.md5()
119-
while True:
120-
s = fin.read(BLOCK_SIZE)
121-
if len(s) > 0:
122-
fout.write(s)
123-
hash_md5.update(s)
124-
if len(s) < BLOCK_SIZE:
125-
break
126-
fin.close()
127-
fout.close()
133+
try:
134+
fin = tar.extractfile(tarinfo)
135+
fname = tarinfo.name
136+
path, name = os.path.split(fname)
137+
if path != '':
138+
if not os.path.isdir(path):
139+
os.makedirs(path)
140+
if keep_files:
141+
fout = open(fname, 'w')
142+
143+
hash_md5 = hashlib.md5()
144+
while True:
145+
s = fin.read(BLOCK_SIZE)
146+
if len(s) > 0:
147+
hash_md5.update(s)
148+
if keep_files:
149+
fout.write(s)
150+
if len(s) < BLOCK_SIZE:
151+
break
152+
finally:
153+
fin.close()
154+
if keep_files:
155+
fout.close()
156+
128157
md5 = hash_md5.hexdigest()
129-
tar.chown(tarinfo, fname)
130-
tar.chmod(tarinfo, fname)
131-
tar.utime(tarinfo, fname)
132-
# Verify size
133-
if os.path.getsize(fname) != file[2]:
134-
logging.error('size mismatch for: %s' % (fname))
158+
if keep_files:
159+
tar.chown(tarinfo, fname)
160+
tar.chmod(tarinfo, fname)
161+
tar.utime(tarinfo, fname)
162+
# Verify size
163+
if os.path.getsize(fname) != file[2]:
164+
logging.error('size mismatch for: %s' % (fname))
135165
# Verify md5 checksum
136166
if md5 != file[4]:
137167
logging.error('md5 mismatch for: %s' % (fname))
138168
logging.error('md5 of extracted file: %s' % (md5))
139169
logging.error('md5 of original file: %s' % (file[4]))
170+
failures.append(file)
140171
else:
141172
logging.debug('Valid md5: %s %s' % (md5, fname))
142173

143-
else:
174+
elif keep_files:
144175
tar.extract(tarinfo)
145176
# Note: tar.extract() will not restore time stamps of symbolic
146177
# links. Could not find a Python-way to restore it either, so
@@ -155,6 +186,7 @@ def extractFiles(files):
155186
except:
156187
traceback.print_exc()
157188
logging.error('Retrieving %s' % (file[1]))
189+
failures.append(file)
158190

159191
# Close current archive?
160192
if (i == nfiles-1 or files[i][5] != files[i+1][5]):
@@ -165,3 +197,5 @@ def extractFiles(files):
165197

166198
# Open new archive next time
167199
newtar = True
200+
201+
return failures

zstash/main.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from update import update
1111
from extract import extract
1212
from chgrp import chgrp
13+
from check import check
1314

1415

1516
# -----------------------------------------------------------------------------
@@ -24,6 +25,8 @@ def main():
2425
create create new archive
2526
update update existing archive
2627
extract extract files from archive
28+
chgrp change the group of an archive
29+
check check the integrity of the files in the archive
2730
2831
For help with a specific command
2932
zstash command --help
@@ -42,10 +45,12 @@ def main():
4245
extract()
4346
elif args.command == 'chgrp':
4447
chgrp()
48+
elif args.command == 'check':
49+
check()
4550
else:
4651
print 'Unrecognized command'
4752
parser.print_help()
48-
exit(1)
53+
sys.exit(1)
4954

5055

5156
# -----------------------------------------------------------------------------

0 commit comments

Comments
 (0)