Skip to content

Commit 43b840d

Browse files
committed
update code, add tests
1 parent 021a5e0 commit 43b840d

File tree

24 files changed

+253
-81
lines changed

24 files changed

+253
-81
lines changed

example_catalog/catalog.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,6 @@
6363
"*"
6464
],
6565
"ordering": "ascending",
66-
"description": "Computed by including merkle:object_hash values in ascending order and building the Merkle tree."
66+
"description": "Computed by excluding Merkle fields and including merkle:object_hash values in ascending order to build the Merkle tree."
6767
}
6868
}

example_catalog/collections/COP-DEM/DEM1_SAR_DGE_30_20101212T230244_20140325T230302_ADS_000000_1jTi.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-12T23:02:44.000000Z",
4444
"end_datetime": "2014-03-25T23:03:02.000000Z",
4545
"start_datetime": "2010-12-12T23:02:44.000000Z",
46-
"merkle:object_hash": "f89779ccd725c06a7758f0e855292316820b0bff1625aacd6270d5c1e915ab87"
46+
"merkle:object_hash": "ce9f56e695ab1751b8f0c8d9ef1f1ecedaf04574ec3077e70e7426ec9fc61ea4"
4747
},
4848
"bbox": [
4949
99,

example_catalog/collections/COP-DEM/DEM1_SAR_DGE_30_20101212T230244_20140329T113710_ADS_000000_eAmG.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-12T23:02:44.000000Z",
4444
"end_datetime": "2014-03-29T11:37:10.000000Z",
4545
"start_datetime": "2010-12-12T23:02:44.000000Z",
46-
"merkle:object_hash": "37c33cc7213eb078b049780c7e0fee217241588e9c3ea134ac97f57e62ea6cca"
46+
"merkle:object_hash": "ac66e07717b56e8421c8fec00b2b300afd49d30a8ec9c6d505df3b0568de9c77"
4747
},
4848
"bbox": [
4949
100,

example_catalog/collections/COP-DEM/DEM1_SAR_DGE_30_20101215T103647_20130405T103047_ADS_000000_RHJx.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-15T10:36:47.000000Z",
4444
"end_datetime": "2013-04-05T10:30:47.000000Z",
4545
"start_datetime": "2010-12-15T10:36:47.000000Z",
46-
"merkle:object_hash": "32c8198df8d0168b595fa57ad528d7443c2705200a2fe91a685de891bffdefea"
46+
"merkle:object_hash": "39969fd5f4a3a170ff39df8f2c13ebca66aab40890275a94ce798e281b85d54d"
4747
},
4848
"bbox": [
4949
98,

example_catalog/collections/COP-DEM/DEM1_SAR_DGE_30_20101215T103647_20130405T103047_ADS_000000_oCX9.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-15T10:36:47.000000Z",
4444
"end_datetime": "2013-04-05T10:30:47.000000Z",
4545
"start_datetime": "2010-12-15T10:36:47.000000Z",
46-
"merkle:object_hash": "32c8198df8d0168b595fa57ad528d7443c2705200a2fe91a685de891bffdefea"
46+
"merkle:object_hash": "22a31ab988181f280a0bfc6872556ad1d670373bd997f711389d057e1b1f531a"
4747
},
4848
"bbox": [
4949
100,

example_catalog/collections/COP-DEM/DEM1_SAR_DGE_30_20101215T203649_20140913T090954_ADS_000000_4KBA.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-15T20:36:49.000000Z",
4444
"end_datetime": "2014-09-13T09:09:54.000000Z",
4545
"start_datetime": "2010-12-15T20:36:49.000000Z",
46-
"merkle:object_hash": "24ba2117f123a314f9271266190e53e79a8f70e9e597d491616bdce7be3c6209"
46+
"merkle:object_hash": "3c22648957e7c76f75a9fbfdf7c164e7d745d9e4ebe809db15d2474e01be4764"
4747
},
4848
"bbox": [
4949
135,

example_catalog/collections/COP-DEM/DEM1_SAR_DGE_30_20101215T203649_20140913T090954_ADS_000000_Quqd.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-15T20:36:49.000000Z",
4444
"end_datetime": "2014-09-13T09:09:54.000000Z",
4545
"start_datetime": "2010-12-15T20:36:49.000000Z",
46-
"merkle:object_hash": "24ba2117f123a314f9271266190e53e79a8f70e9e597d491616bdce7be3c6209"
46+
"merkle:object_hash": "6d16f23e2fe2849ce0316c012e9284b413f7c086a11d6de421c0582b397f513e"
4747
},
4848
"bbox": [
4949
135,

example_catalog/collections/COP-DEM/DEM1_SAR_DGE_30_20101215T203914_20121226T204852_ADS_000000_Yjwo.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-15T20:39:14.000000Z",
4444
"end_datetime": "2012-12-26T20:48:52.000000Z",
4545
"start_datetime": "2010-12-15T20:39:14.000000Z",
46-
"merkle:object_hash": "5a51073c5a1998c43ced7e0cae0de4ba99847b62f52601dca021c89b5bf9c96f"
46+
"merkle:object_hash": "ed99b4348ede45959a9cf471490f0f2c6a106bb4337a46d07d4fc4b4988f099f"
4747
},
4848
"bbox": [
4949
134,

example_catalog/collections/COP-DEM/DEM1_SAR_DGE_30_20101216T005027_20130331T005208_ADS_000000_J3m5.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-16T00:50:27.000000Z",
4444
"end_datetime": "2013-03-31T00:52:08.000000Z",
4545
"start_datetime": "2010-12-16T00:50:27.000000Z",
46-
"merkle:object_hash": "e9b4e12f0d747e25432a72f2a8e7fd5518fbc1e91caca3d90eaecaebd6ff0308"
46+
"merkle:object_hash": "90a0e5265d02fd58ab81437d14ef1c318ec1b86167ac935d6abb598602798690"
4747
},
4848
"bbox": [
4949
-111,

example_catalog/collections/COP-DEM/DEM1_SAR_DGE_30_20101216T005027_20130411T005216_ADS_000000_Hbh2.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-16T00:50:27.000000Z",
4444
"end_datetime": "2013-04-11T00:52:16.000000Z",
4545
"start_datetime": "2010-12-16T00:50:27.000000Z",
46-
"merkle:object_hash": "a2ebb590daa1915fd4d42fb677769d93b3c71e2151c998aa67af83bf2075ecd1"
46+
"merkle:object_hash": "176a6f0026a763a85909d64d3eeac37873cd7c970fae685a6f37bd16d806b3fe"
4747
},
4848
"bbox": [
4949
-112,

example_catalog/collections/COP-DEM/DEM1_SAR_DGE_90_20101212T102356_20130709T103701_ADS_000000_6275.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-12T10:24:16.000000Z",
4444
"end_datetime": "2010-12-12T10:24:17.000000Z",
4545
"start_datetime": "2010-12-12T10:24:16.000000Z",
46-
"merkle:object_hash": "440004ed55c220eb73041f3af7597b52e7e53a2d84daef4494e911cd6e4b0645"
46+
"merkle:object_hash": "ffaee98a244aaad0f970100f0f3b11bf3ffd3f5de74fa473ed86cc851c73dbc9"
4747
},
4848
"bbox": [
4949
-72,

example_catalog/collections/COP-DEM/DEM1_SAR_DGE_90_20101212T102356_20130709T103701_ADS_000000_8521.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-12T10:24:18.000000Z",
4444
"end_datetime": "2010-12-12T10:24:19.000000Z",
4545
"start_datetime": "2010-12-12T10:24:18.000000Z",
46-
"merkle:object_hash": "4c51b04cfc7393a7a6dd23c078070be66512709d71f6ad25ebc0ce58456570ee"
46+
"merkle:object_hash": "6d7dca89eec5da5e31023b7e24b9527e33a021099c8e74791ecda860f96091ab"
4747
},
4848
"bbox": [
4949
-72,

example_catalog/collections/COP-DEM/DEM1_SAR_DTE_30_20101215T203649_20140913T090954_ADS_000000_kipX.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-15T20:36:49.000000Z",
4444
"end_datetime": "2014-09-13T09:09:54.000000Z",
4545
"start_datetime": "2010-12-15T20:36:49.000000Z",
46-
"merkle:object_hash": "371fd0c1f79f165d3cf8a0c32a1380c812f69583c0c183aefdadb741963325a8"
46+
"merkle:object_hash": "88f4d9bb5b1512f214f1ca60e6ff2bac28640fd0a00339d01c453b6db9fd7f88"
4747
},
4848
"bbox": [
4949
135,

example_catalog/collections/COP-DEM/DEM1_SAR_DTE_30_20101215T203914_20130404T204908_ADS_000000_CURF.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-15T20:39:14.000000Z",
4444
"end_datetime": "2013-04-04T20:49:08.000000Z",
4545
"start_datetime": "2010-12-15T20:39:14.000000Z",
46-
"merkle:object_hash": "0ed67614e47c0d1d6fbaedc465f1362b8c77c6849d331b3d34389c086c759824"
46+
"merkle:object_hash": "457aa1fbc4005627077b55cd673a4457f2afc8a15ef3b776af8df60e60e8e1ab"
4747
},
4848
"bbox": [
4949
133,

example_catalog/collections/COP-DEM/DEM1_SAR_DTE_30_20101216T100443_20140415T214254_ADS_000000_9Wqx.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-16T10:04:43.000000Z",
4444
"end_datetime": "2014-04-15T21:42:54.000000Z",
4545
"start_datetime": "2010-12-16T10:04:43.000000Z",
46-
"merkle:object_hash": "fc3fe2b76328c498cb310769ffea2e529b7e9a202a853a048502239bdf7bf0c7"
46+
"merkle:object_hash": "e7b65b76ed20f947d8a2b4ae126b832d9f07fb18822863556c9fa0ac40dae873"
4747
},
4848
"bbox": [
4949
122,

example_catalog/collections/COP-DEM/DEM1_SAR_DTE_30_20101217T200231_20140802T083458_ADS_000000_tFcK.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-17T20:02:31.000000Z",
4444
"end_datetime": "2014-08-02T08:34:58.000000Z",
4545
"start_datetime": "2010-12-17T20:02:31.000000Z",
46-
"merkle:object_hash": "a3eb9e129dbae95c8c29387701f72a9c2492db93bdd46dd6169e26ed94a37377"
46+
"merkle:object_hash": "d0114075968ff1a3860eaf90d85317304f96757d6313f396eb556546a9c95006"
4747
},
4848
"bbox": [
4949
144,

example_catalog/collections/COP-DEM/DEM1_SAR_DTE_90_20101212T084914_20130418T085214_ADS_000000_5545.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-12T08:49:36.000000Z",
4444
"end_datetime": "2010-12-12T08:49:37.000000Z",
4545
"start_datetime": "2010-12-12T08:49:36.000000Z",
46-
"merkle:object_hash": "b8c3c20b55fe3fdf20590d3a4a56ccdd9dee2e7777ff73e31369bd03a2515220"
46+
"merkle:object_hash": "7611aa1e37ee256bb346d0405c210c9e2256c43e7ed9854b4a91ef75caf59d09"
4747
},
4848
"bbox": [
4949
-49,

example_catalog/collections/COP-DEM/DEM1_SAR_DTE_90_20101212T084914_20140212T212323_ADS_000000_1370.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-12T08:49:42.000000Z",
4444
"end_datetime": "2010-12-12T08:49:43.000000Z",
4545
"start_datetime": "2010-12-12T08:49:42.000000Z",
46-
"merkle:object_hash": "0df9c41fb9f08c0f730b609d2d6c364abd828986b23521de706e8f4f84cf5aef"
46+
"merkle:object_hash": "c4a241c5917f7fcb82bedca10127ad22504b3462c0d3704d76a5f549e2c24010"
4747
},
4848
"bbox": [
4949
-48,

example_catalog/collections/COP-DEM/DEM1_SAR_DTE_90_20101212T102356_20130628T103333_ADS_000000_1705.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-12T10:24:00.000000Z",
4444
"end_datetime": "2010-12-12T10:24:01.000000Z",
4545
"start_datetime": "2010-12-12T10:24:00.000000Z",
46-
"merkle:object_hash": "23a6d9a1862fc66bd4fae427012ee89b81d9e571ea05bee65b309417136310ea"
46+
"merkle:object_hash": "ae063aa10078d5316f36718b92f966c9f96f63f621839bd031730eb068c1c265"
4747
},
4848
"bbox": [
4949
-72,

example_catalog/collections/COP-DEM/DEM1_SAR_DTE_90_20101213T034716_20130408T035028_ADS_000000_5033.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-13T03:47:46.000000Z",
4444
"end_datetime": "2010-12-13T03:47:47.000000Z",
4545
"start_datetime": "2010-12-13T03:47:46.000000Z",
46-
"merkle:object_hash": "bd6555752f00144ec8fef94a7a0d2ce28579b62546f39438923dcb2b21341ff7"
46+
"merkle:object_hash": "c1511bd4889e8078010287a820d3fcc216cad1ab48374e324fb7098fd1c3f882"
4747
},
4848
"bbox": [
4949
26,

example_catalog/collections/COP-DEM/DEM1_SAR_DTE_90_20101217T224141_20140127T121413_ADS_000000_0611.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"datetime": "2010-12-17T22:42:03.000000Z",
4444
"end_datetime": "2010-12-17T22:42:04.000000Z",
4545
"start_datetime": "2010-12-17T22:42:03.000000Z",
46-
"merkle:object_hash": "4f5ff2fef424264bb7355dc29c099c6dbe89baa1d48f448ef14ccd518a739a25"
46+
"merkle:object_hash": "9c33c4c49913483588fb7e2aa8084e675649ed253083ab62ac1ebebcfc16a944"
4747
},
4848
"bbox": [
4949
-84,

example_catalog/collections/COP-DEM/collection.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
"href": "https://catalogue.dataspace.copernicus.eu/stac/collections/COP-DEM/items"
5151
}
5252
],
53-
"merkle:root": "9c2acdf800018906baf02f13f678b68553aafa05186bd7af7474a2c7596d431a",
53+
"merkle:root": "aa7f89b29cb339032ec86d81d4090bdbd52199152fb657f50b08eec1b3234ee2",
5454
"merkle:hash_method": {
5555
"function": "sha256",
5656
"fields": [

stac_merkle_tree_cli/compute_merkle_info.py

+27-54
Original file line numberDiff line numberDiff line change
@@ -10,50 +10,50 @@
1010
# Define Merkle fields to exclude from hashing
1111
MERKLE_FIELDS = {"merkle:object_hash", "merkle:hash_method", "merkle:root"}
1212

13+
def remove_merkle_fields(data):
14+
"""
15+
Recursively removes Merkle fields from a nested dictionary.
16+
"""
17+
if isinstance(data, dict):
18+
return {
19+
k: remove_merkle_fields(v)
20+
for k, v in data.items()
21+
if k not in MERKLE_FIELDS
22+
}
23+
elif isinstance(data, list):
24+
return [remove_merkle_fields(v) for v in data]
25+
else:
26+
return data
27+
1328
def compute_merkle_object_hash(
1429
stac_object: Dict[str, Any],
15-
hash_method: Dict[str, Any],
16-
is_item: bool = False,
30+
hash_method: Dict[str, Any]
1731
) -> str:
1832
"""
1933
Computes the merkle:object_hash for a STAC object (Catalog, Collection, or Item).
2034
2135
Parameters:
2236
- stac_object (dict): The STAC object JSON content.
2337
- hash_method (dict): The hash method details from merkle:hash_method.
24-
- is_item (bool): Indicates if the object is an Item (Feature).
2538
2639
Returns:
2740
- str: The computed Merkle object hash as a hexadecimal string.
2841
"""
2942
fields = hash_method.get('fields', ['*'])
3043
if fields == ['*'] or fields == ['all']:
31-
# Exclude Merkle fields
32-
if is_item:
33-
# For Items, Merkle fields are within 'properties'
34-
properties = stac_object.get('properties', {})
35-
data_to_hash = {k: v for k, v in properties.items() if k not in MERKLE_FIELDS}
36-
else:
37-
# For Collections and Catalogs, Merkle fields are at the top level
38-
data_to_hash = {k: v for k, v in stac_object.items() if k not in MERKLE_FIELDS}
44+
# Exclude Merkle fields from all levels
45+
data_to_hash = remove_merkle_fields(stac_object)
3946
else:
40-
if is_item:
41-
# Include only specified fields, excluding Merkle fields
42-
properties = stac_object.get('properties', {})
43-
data_to_hash = {field: properties.get(field) for field in fields if field not in MERKLE_FIELDS}
44-
else:
45-
# For Collections and Catalogs
46-
data_to_hash = {field: stac_object.get(field) for field in fields if field not in MERKLE_FIELDS}
47-
47+
# Include only specified fields, then remove Merkle fields
48+
selected_data = {field: stac_object.get(field) for field in fields if field in stac_object}
49+
data_to_hash = remove_merkle_fields(selected_data)
4850
# Serialize the data to a canonical JSON string
4951
json_str = json.dumps(data_to_hash, sort_keys=True, separators=(',', ':'))
50-
5152
# Get the hash function
5253
hash_function_name = hash_method.get('function', 'sha256').replace('-', '').lower()
5354
hash_func = getattr(hashlib, hash_function_name, None)
5455
if not hash_func:
5556
raise ValueError(f"Unsupported hash function: {hash_function_name}")
56-
5757
# Compute the hash
5858
merkle_object_hash = hash_func(json_str.encode('utf-8')).hexdigest()
5959
return merkle_object_hash
@@ -120,13 +120,11 @@ def process_item(item_path: Path, hash_method: Dict[str, Any]) -> str:
120120
item_json = json.load(f)
121121

122122
# Compute merkle:object_hash
123-
own_hash = compute_merkle_object_hash(item_json, hash_method, is_item=True)
123+
own_hash = compute_merkle_object_hash(item_json, hash_method)
124124

125-
# Add Merkle fields to 'properties'
125+
# Add merkle:object_hash to 'properties'
126126
properties = item_json.setdefault('properties', {})
127127
properties['merkle:object_hash'] = own_hash
128-
# Remove the following line to avoid adding 'merkle:hash_method' to Items
129-
# properties['merkle:hash_method'] = hash_method
130128

131129
# Ensure the Merkle extension is listed
132130
item_json.setdefault('stac_extensions', [])
@@ -163,10 +161,7 @@ def process_collection(collection_path: Path, parent_hash_method: Dict[str, Any]
163161
collection_json = json.load(f)
164162

165163
# Determine the hash_method to use
166-
if 'merkle:hash_method' in collection_json:
167-
hash_method = collection_json['merkle:hash_method']
168-
else:
169-
hash_method = parent_hash_method
164+
hash_method = collection_json.get('merkle:hash_method', parent_hash_method)
170165

171166
if not hash_method:
172167
raise ValueError(f"Hash method not specified for {collection_path}")
@@ -182,7 +177,7 @@ def process_collection(collection_path: Path, parent_hash_method: Dict[str, Any]
182177
item_hashes.append(item_hash)
183178

184179
# Compute merkle:object_hash
185-
own_hash = compute_merkle_object_hash(collection_json, hash_method, is_item=False)
180+
own_hash = compute_merkle_object_hash(collection_json, hash_method)
186181
collection_json['merkle:object_hash'] = own_hash
187182
item_hashes.append(own_hash)
188183

@@ -231,7 +226,7 @@ def process_catalog(catalog_path: Path) -> str:
231226
'function': 'sha256',
232227
'fields': ['*'],
233228
'ordering': 'ascending',
234-
'description': 'Computed by including merkle:object_hash values in ascending order and building the Merkle tree.'
229+
'description': 'Computed by excluding Merkle fields and including merkle:object_hash values in ascending order to build the Merkle tree.'
235230
}
236231

237232
# Process collections in the collections folder
@@ -254,7 +249,7 @@ def process_catalog(catalog_path: Path) -> str:
254249
click.echo(f"collection.json not found in {collection_dir}", err=True)
255250

256251
# Compute merkle:object_hash
257-
own_hash = compute_merkle_object_hash(catalog_json, hash_method, is_item=False)
252+
own_hash = compute_merkle_object_hash(catalog_json, hash_method)
258253
catalog_json['merkle:object_hash'] = own_hash
259254
collection_hashes.append(own_hash)
260255

@@ -283,25 +278,3 @@ def process_catalog(catalog_path: Path) -> str:
283278
except Exception as e:
284279
click.echo(f"Error processing Catalog {catalog_path}: {e}", err=True)
285280
return ''
286-
287-
@click.command()
288-
@click.argument('catalog_path', type=click.Path(exists=True, file_okay=True, dir_okay=False))
289-
def main(catalog_path):
290-
"""
291-
Computes and adds Merkle info to each STAC object in the catalog.
292-
293-
CATALOG_PATH is the path to your root catalog.json file.
294-
"""
295-
catalog_path = Path(catalog_path).resolve()
296-
297-
if not catalog_path.exists():
298-
click.echo(f"Catalog file does not exist: {catalog_path}", err=True)
299-
return
300-
301-
# Process the root catalog
302-
process_catalog(catalog_path)
303-
304-
click.echo("Merkle info computation and addition completed.")
305-
306-
if __name__ == '__main__':
307-
main()

0 commit comments

Comments
 (0)