Skip to content

Commit 5e7c861

Browse files
authored
use raw key prefix to construct raw key (#146)
1 parent 955cf9c commit 5e7c861

File tree

2 files changed

+24
-13
lines changed

2 files changed

+24
-13
lines changed

src/lambda_function/raw_sync/app.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -483,14 +483,16 @@ def get_data_type_from_path(path: str) -> str:
483483
return data_type
484484

485485

486-
def get_expected_raw_key(namespace: str, data_type: str, cohort: str, path: str) -> str:
486+
def get_expected_raw_key(
487+
raw_key_prefix: str, data_type: str, cohort: str, path: str
488+
) -> str:
487489
"""Get the expected raw S3 key
488490
489491
Get the expected raw S3 key of a raw bucket object corresponding to the given
490492
input bucket object.
491493
492494
Args:
493-
namespace (str): The namespace of the corresponding input object.
495+
raw_key_prefix (str): The namespaced S3 prefix where raw objects are written.
494496
data_type (str): The data type of the corresponding input object.
495497
cohort (str): The cohort of the corresponding input object.
496498
path (str): The path of the file relative to the zip archive (export).
@@ -500,7 +502,7 @@ def get_expected_raw_key(namespace: str, data_type: str, cohort: str, path: str)
500502
"""
501503
file_identifier = os.path.basename(path).split(".")[0]
502504
expected_key = (
503-
f"{namespace}/json/dataset={data_type}"
505+
f"{raw_key_prefix}/dataset={data_type}"
504506
f"/cohort={cohort}/{file_identifier}.ndjson.gz"
505507
)
506508
return expected_key
@@ -543,7 +545,7 @@ def main(
543545
)
544546
data_type = get_data_type_from_path(path=filename)
545547
expected_raw_key = get_expected_raw_key(
546-
namespace=namespace,
548+
raw_key_prefix=raw_key_prefix,
547549
data_type=data_type,
548550
cohort=cohort,
549551
path=filename,

tests/test_lambda_raw_sync.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -639,27 +639,36 @@ def test_get_data_type_from_path_deleted():
639639

640640

641641
def test_get_expected_raw_key_case1():
642-
namespace = "test-namespace"
642+
raw_key_prefix = "test-raw_key_prefix/json"
643643
data_type = "test-data-type"
644644
cohort = "test-cohort"
645645
path = "path/to/FitbitIntradayCombined_20241111-20241112.json"
646-
expected_key = f"{namespace}/json/dataset={data_type}/cohort={cohort}/FitbitIntradayCombined_20241111-20241112.ndjson.gz"
647-
assert app.get_expected_raw_key(namespace, data_type, cohort, path) == expected_key
646+
expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/FitbitIntradayCombined_20241111-20241112.ndjson.gz"
647+
assert (
648+
app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path)
649+
== expected_key
650+
)
648651

649652

650653
def test_get_expected_raw_key_case2():
651-
namespace = "test-namespace"
654+
raw_key_prefix = "test-raw_key_prefix/json"
652655
data_type = "test-data-type"
653656
cohort = "test-cohort"
654657
path = "path/to/HealthKitV2Samples_AppleStandTime_20241111-20241112.json"
655-
expected_key = f"{namespace}/json/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_20241111-20241112.ndjson.gz"
656-
assert app.get_expected_raw_key(namespace, data_type, cohort, path) == expected_key
658+
expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_20241111-20241112.ndjson.gz"
659+
assert (
660+
app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path)
661+
== expected_key
662+
)
657663

658664

659665
def test_get_expected_raw_key_case3():
660-
namespace = "test-namespace"
666+
raw_key_prefix = "test-raw_key_prefix/json"
661667
data_type = "test-data-type"
662668
cohort = "test-cohort"
663669
path = "path/to/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.json"
664-
expected_key = f"{namespace}/json/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.ndjson.gz"
665-
assert app.get_expected_raw_key(namespace, data_type, cohort, path) == expected_key
670+
expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.ndjson.gz"
671+
assert (
672+
app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path)
673+
== expected_key
674+
)

0 commit comments

Comments
 (0)