Skip to content

Commit dd8b552

Browse files
Merge pull request #25 from ACCESS-NRI/Remove-nchash-dependency
Remove nchash dependency
2 parents 1b0a801 + 68701af commit dd8b552

File tree

6 files changed

+47
-46
lines changed

6 files changed

+47
-46
lines changed

.conda/env_dev.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ dependencies:
1212
- libnetcdf
1313
- six
1414
- PyYAML
15-
- nchash>=0.1.5
1615
- pytest
1716
- pytest-cov
1817
- versioneer

.conda/meta.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@ requirements:
2727
- python
2828
- six
2929
- pyyaml
30-
- nchash
31-
3230
test:
3331
imports:
3432
- yamanifest
@@ -42,4 +40,4 @@ about:
4240
license: Apache Software
4341
license_file: LICENSE
4442
summary: {{ project.get('description') }}
45-
license_family: Apache
43+
license_family: Apache

.github/workflows/CI.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,21 @@ jobs:
5555
auto-update-conda: false
5656
show-channel-urls: true
5757

58+
- name: Enforce .tar.bz2 packages
59+
# Temporary work-arounds while the action uibcdf/action-build-and-upload-conda-packages gets updated:
60+
# We create a `~/.condarc` file with the correct options to enforce the use of `.tar.bz2` packages
61+
# and we set the channels to be used by conda build
62+
shell: bash
63+
run: |
64+
cat > ~/.condarc << EOF
65+
conda-build:
66+
pkg_format: .tar.bz2
67+
channels:
68+
- accessnri
69+
- conda-forge
70+
- nodefaults
71+
EOF
72+
5873
- name: Verify conda recipe
5974
shell: bash -el {0}
6075
# Ignores:

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ dynamic = ["version"]
1313
requires-python = ">=3.10"
1414
dependencies = [
1515
"PyYAML",
16-
"nchash>=0.1.5",
1716
"six"
1817
]
1918

@@ -40,4 +39,4 @@ style = "pep440"
4039
versionfile_source = "yamanifest/_version.py"
4140
versionfile_build = "yamanifest/_version.py"
4241
tag_prefix = ""
43-
parentdir_prefix = "yamanifest-"
42+
parentdir_prefix = "yamanifest-"

test/test_manifest.py

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616

1717
from __future__ import print_function
1818

19-
import pytest
20-
import sys, os, time, glob
19+
import glob
20+
import os
2121
import shutil
22-
import pdb # Add pdb.set_trace() to set breakpoints
22+
import sys
23+
24+
import pytest
2325

2426
print("Version: {}".format(sys.version))
2527

@@ -28,7 +30,8 @@
2830

2931
verbose = True
3032

31-
import os
33+
34+
3235
def touch(fname, times=None):
3336
with open(fname, 'a'):
3437
os.utime(fname, times)
@@ -113,7 +116,7 @@ def test_manifest_netcdf():
113116
mf1 = mf.Manifest('mf1.yaml')
114117

115118
for filepath in glob.glob('*.nc'):
116-
mf1.add(filepath,['nchash','md5','sha1'])
119+
mf1.add(filepath,['binhash','md5','sha1'])
117120

118121
mf1.dump()
119122

@@ -122,7 +125,7 @@ def test_manifest_netcdf():
122125
mf2 = mf.Manifest('mf2.yaml')
123126

124127
for filepath in glob.glob('*.nc'):
125-
mf2.add(filepath,['nchash','md5','sha1'])
128+
mf2.add(filepath,['binhash','md5','sha1'])
126129

127130
mf2.dump()
128131

@@ -136,7 +139,7 @@ def test_manifest_netcdf():
136139

137140
mf1 = mf.Manifest('mf1.yaml')
138141

139-
mf1.add(glob.glob('*.nc'),['nchash'])
142+
mf1.add(glob.glob('*.nc'),['binhash'])
140143
mf1.add(hashfn=['md5','sha1'])
141144

142145
assert(mf1.equals(mf2))
@@ -149,10 +152,10 @@ def test_manifest_netcdf_changed_time():
149152

150153
for filepath in glob.glob('*.nc'):
151154
touch(filepath)
152-
mf3.add(filepath,['nchash','md5','sha1'])
155+
mf3.add(filepath,['md5','sha1','binhash'])
153156

154157
mf3.dump()
155-
158+
mf3.add(filepath,['md5','sha1','binhash'])
156159
mf2 = mf.Manifest('mf2.yaml')
157160
mf2.load()
158161

@@ -198,13 +201,13 @@ def test_manifest_find():
198201

199202
for filepath in mf1:
200203
# Test for hashes we know should be in the manifest
201-
for hashfn in ['nchash','md5','sha1']:
204+
for hashfn in ['md5','sha1']:
202205
hashval = mf1.get(filepath,hashfn)
203206
print(hashfn,hashval,filepath,mf1.find(hashfn,hashval))
204207
assert(mf1.find(hashfn,hashval) == filepath)
205208

206209
# Test for one we know shouldn't be there
207-
for hashfn in ['binhash',]:
210+
for hashfn in ['binhash-nomtime',]:
208211
hashval = mf1.get(filepath,hashfn)
209212
print(hashfn,hashval,filepath,mf1.find(hashfn,hashval))
210213
assert(mf1.find(hashfn,hashval) == None)
@@ -213,9 +216,9 @@ def test_manifest_find():
213216

214217
mf2 = mf.Manifest('mf2.yaml')
215218

216-
# Make a manifest only with nchash
219+
# Make a manifest only with md5
217220
for filepath in glob.glob('*.nc'):
218-
mf1.add(filepath,['nchash'])
221+
mf1.add(filepath,['md5'])
219222

220223
# Update with hashes from mf1
221224
mf2.update_matching_hashes(mf1)
@@ -227,7 +230,7 @@ def test_manifest_find():
227230
mf3 = mf.Manifest('mf3.yaml')
228231

229232
for filepath in glob.glob(os.path.join('test','testfiles','*.nc')):
230-
mf3.add(filepath,['nchash'])
233+
mf3.add(filepath,['md5'])
231234

232235
mf3.update_matching_hashes(mf1)
233236

@@ -241,7 +244,7 @@ def test_manifest_with_mixed_file_types():
241244
mf6 = mf.Manifest('mf6.yaml')
242245

243246
for filepath in glob.glob('*.bin') + glob.glob('*.nc'):
244-
mf6.add(filepath,hashfn=['nchash','binhash'])
247+
mf6.add(filepath,hashfn=['binhash'])
245248

246249
mf6.dump()
247250
assert(mf6.check())
@@ -261,7 +264,7 @@ def test_open_manifest_and_add():
261264
mf7 = mf.Manifest('mf7.yaml')
262265

263266
for filepath in glob.glob('*.nc'):
264-
mf7.add(filepath,hashfn=['nchash','binhash'])
267+
mf7.add(filepath,hashfn=['binhash'])
265268

266269
mf7.dump()
267270

@@ -271,7 +274,7 @@ def test_open_manifest_and_add():
271274
mf7.load()
272275

273276
for filepath in glob.glob('*.bin'):
274-
mf7.add(filepath,hashfn=['nchash','binhash'])
277+
mf7.add(filepath,hashfn=['binhash'])
275278

276279
mf7.dump()
277280

@@ -293,7 +296,7 @@ def test_yamf():
293296
with cd(os.path.join('test','testfiles_copy')):
294297

295298
files = glob.glob('*.bin') + glob.glob('*.nc')
296-
yamf.main_parse_args(["add","-n","mf8.yaml", "-s", "binhash", "-s", "nchash"] + files)
299+
yamf.main_parse_args(["add","-n","mf8.yaml", "-s", "binhash"] + files)
297300

298301
mf8 = mf.Manifest('mf8.yaml')
299302
mf8.load()
@@ -302,7 +305,7 @@ def test_yamf():
302305
mf6.load()
303306

304307
assert(mf8.equals(mf6))
305-
assert(yamf.main_parse_args(["check","-n","mf8.yaml", "-s", "binhash", "-s", "nchash"]))
308+
assert(yamf.main_parse_args(["check","-n","mf8.yaml", "-s", "binhash"]))
306309

307310

308311
def test_shortcircuit_condition():
@@ -362,17 +365,17 @@ def test_shortcircuit_add():
362365
mf6 = mf.Manifest('mf6.yaml')
363366

364367
for filepath in glob.glob('*.bin') + glob.glob('*.nc'):
365-
mf6.add(filepath,hashfn=['nchash','binhash'],shortcircuit=True)
368+
mf6.add(filepath,hashfn=['md5'],shortcircuit=True)
366369

367370
mf6.dump()
368371
# print("mf6: ",mf6.data)
369372
# pdb.set_trace()
370373

371374
assert(mf6.check())
372375

373-
# Should have no nchash for the bin files
376+
# Should have no sha1 for the bin files
374377
for filepath in glob.glob('*.bin'):
375-
assert(mf6.get(filepath,hashfn='nchash') == None)
378+
assert(mf6.get(filepath,hashfn='sha1') == None)
376379

377380
# Should have no binhash for the netcdf files
378381
for filepath in glob.glob('*.nc'):
@@ -385,7 +388,7 @@ def test_malformed_file():
385388
mf9 = mf.Manifest('mf9.yaml')
386389

387390
for filepath in glob.glob('*.nc'):
388-
mf9.add(filepath,['nchash','md5','sha1'])
391+
mf9.add(filepath,['md5','sha1'])
389392

390393
# Intentionally alter the format string
391394
mf9.header["format"] = 'bogus'
@@ -499,4 +502,4 @@ def test_binhash_nomtime():
499502

500503
assert(not mf1.equals(mf2))
501504
assert(not mf1.equals(mf3))
502-
assert(mf2.equals(mf4))
505+
assert(mf2.equals(mf4))

yamanifest/hashing.py

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,33 +18,22 @@
1818
limitations under the License.
1919
"""
2020

21-
from __future__ import print_function, absolute_import
21+
from __future__ import absolute_import, print_function
2222

2323
import hashlib
2424
import io
2525
import os
2626
import sys
27-
from nchash import NCDataHash, NotNetcdfFileError
2827

2928
length=io.DEFAULT_BUFFER_SIZE
3029
one_hundred_megabytes = 104857600
3130

3231
# List of supported hashes and the ordering used to determine relative expense of
3332
# calculation
3433
supported_hashes = [
35-
'nchash', 'binhash', 'binhash-nomtime', 'md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512'
34+
'binhash', 'binhash-nomtime', 'md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512'
3635
]
3736

38-
def _nchash(path):
39-
hashval = ''
40-
m = NCDataHash(path)
41-
try:
42-
hashval = m.gethash()
43-
except NotNetcdfFileError as e:
44-
sys.stderr.write(str(e))
45-
hashval = None
46-
return hashval
47-
4837
def _binhash(path, size, include_mtime):
4938
m = hashlib.new('md5')
5039
with io.open(path, mode="rb") as fd:
@@ -81,9 +70,7 @@ def hash(path, hashfn, size=one_hundred_megabytes):
8170
if hashfn not in supported_hashes:
8271
sys.stderr.write('\nUnsupported hash function {}, skipping {}\n'.format(hashfn, path))
8372
try:
84-
if hashfn == 'nchash':
85-
return _nchash(path)
86-
elif hashfn == 'binhash':
73+
if hashfn == 'binhash':
8774
return _binhash(path, one_hundred_megabytes, True)
8875
elif hashfn == 'binhash-nomtime':
8976
return _binhash(path, one_hundred_megabytes, False)

0 commit comments

Comments
 (0)