Skip to content

Commit 15f753b

Browse files
authored
log formatting fixes for raw-sync lambda (#147)
1 parent 5e7c861 commit 15f753b

File tree

2 files changed

+46
-14
lines changed

2 files changed

+46
-14
lines changed

src/lambda_function/raw_sync/app.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ def list_files_in_archive(
375375
if adjusted_range_size > total_size * 2:
376376
logger.error(
377377
"Did not find an end of central directory record in "
378-
f"s3://{bucket}/{key}"
378+
f"s3://{os.path.join(bucket, key)}"
379379
)
380380
return []
381381
logger.warning(
@@ -428,7 +428,7 @@ def list_files_in_archive(
428428
file_list.append(file_object)
429429
if len(file_list) == 0:
430430
logger.warning(
431-
f"Did not find any files in s3://{bucket}/{key} which "
431+
f"Did not find any files in s3://{os.path.join(bucket, key)} which "
432432
"satisfy the conditions needed to be processed by the "
433433
"raw Lambda."
434434
)
@@ -501,9 +501,11 @@ def get_expected_raw_key(
501501
str: The expected S3 key of the corresponding raw object.
502502
"""
503503
file_identifier = os.path.basename(path).split(".")[0]
504-
expected_key = (
505-
f"{raw_key_prefix}/dataset={data_type}"
506-
f"/cohort={cohort}/{file_identifier}.ndjson.gz"
504+
expected_key = os.path.join(
505+
raw_key_prefix,
506+
f"dataset={data_type}",
507+
f"cohort={cohort}",
508+
f"{file_identifier}.ndjson.gz",
507509
)
508510
return expected_key
509511

@@ -541,7 +543,7 @@ def main(
541543
filename = file_object["filename"]
542544
logger.info(
543545
f"Checking corresponding raw object for {filename} "
544-
f"from s3://{input_bucket}/{export_key}"
546+
f"from s3://{os.path.join(input_bucket, export_key)}"
545547
)
546548
data_type = get_data_type_from_path(path=filename)
547549
expected_raw_key = get_expected_raw_key(
@@ -559,8 +561,8 @@ def main(
559561
if corresponding_raw_object is None:
560562
logger.info(
561563
f"Did not find corresponding raw object for {filename} from "
562-
f"s3://{input_bucket}/{export_key} at "
563-
f"s3://{raw_bucket}/{expected_raw_key}"
564+
f"s3://{os.path.join(input_bucket, export_key)} at "
565+
f"s3://{os.path.join(raw_bucket, expected_raw_key)}"
564566
)
565567
publish_to_sns(
566568
bucket=input_bucket,

tests/test_lambda_raw_sync.py

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import io
22
import json
3+
import os
34
import struct
45
import zipfile
56
from collections import defaultdict
@@ -635,15 +636,17 @@ def test_get_data_type_from_path_deleted():
635636
assert data_type == "HealthKitV2Samples_Deleted"
636637

637638

638-
import os
639-
640-
641639
def test_get_expected_raw_key_case1():
642640
raw_key_prefix = "test-raw_key_prefix/json"
643641
data_type = "test-data-type"
644642
cohort = "test-cohort"
645643
path = "path/to/FitbitIntradayCombined_20241111-20241112.json"
646-
expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/FitbitIntradayCombined_20241111-20241112.ndjson.gz"
644+
expected_key = os.path.join(
645+
raw_key_prefix,
646+
f"dataset={data_type}",
647+
f"cohort={cohort}",
648+
"FitbitIntradayCombined_20241111-20241112.ndjson.gz",
649+
)
647650
assert (
648651
app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path)
649652
== expected_key
@@ -655,7 +658,12 @@ def test_get_expected_raw_key_case2():
655658
data_type = "test-data-type"
656659
cohort = "test-cohort"
657660
path = "path/to/HealthKitV2Samples_AppleStandTime_20241111-20241112.json"
658-
expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_20241111-20241112.ndjson.gz"
661+
expected_key = os.path.join(
662+
raw_key_prefix,
663+
f"dataset={data_type}",
664+
f"cohort={cohort}",
665+
"HealthKitV2Samples_AppleStandTime_20241111-20241112.ndjson.gz",
666+
)
659667
assert (
660668
app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path)
661669
== expected_key
@@ -667,7 +675,29 @@ def test_get_expected_raw_key_case3():
667675
data_type = "test-data-type"
668676
cohort = "test-cohort"
669677
path = "path/to/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.json"
670-
expected_key = f"{raw_key_prefix}/dataset={data_type}/cohort={cohort}/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.ndjson.gz"
678+
expected_key = os.path.join(
679+
raw_key_prefix,
680+
f"dataset={data_type}",
681+
f"cohort={cohort}",
682+
"HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.ndjson.gz",
683+
)
684+
assert (
685+
app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path)
686+
== expected_key
687+
)
688+
689+
690+
def test_get_expected_raw_key_trailing_slash():
691+
raw_key_prefix = "test-raw_key_prefix/json/"
692+
data_type = "test-data-type"
693+
cohort = "test-cohort"
694+
path = "path/to/HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.json"
695+
expected_key = os.path.join(
696+
raw_key_prefix,
697+
f"dataset={data_type}",
698+
f"cohort={cohort}",
699+
"HealthKitV2Samples_AppleStandTime_Deleted_20241111-20241112.ndjson.gz",
700+
)
671701
assert (
672702
app.get_expected_raw_key(raw_key_prefix, data_type, cohort, path)
673703
== expected_key

0 commit comments

Comments
 (0)