Skip to content

Commit d497ed8

Browse files
committed
Unquote S3 key name before submitting to Glue
1 parent 762add7 commit d497ed8

File tree

2 files changed

+29
-11
lines changed

2 files changed

+29
-11
lines changed

src/lambda_function/s3_to_glue/app.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import json
99
import logging
1010
import boto3
11+
from urllib import parse
1112

1213
logger = logging.getLogger()
1314
logger.setLevel(logging.INFO)
@@ -91,8 +92,25 @@ def is_s3_test_event(record : dict) -> bool:
9192
else:
9293
return False
9394

95+
def get_object_info(s3_event) -> dict:
96+
"""
97+
Derive object info formatted for submission to Glue from an S3 event.
98+
99+
Args:
100+
s3_event (dict): An S3 event
94101
95-
def lambda_handler(event, context) -> None:
102+
Returns:
103+
object_info (dict) The S3 object info
104+
"""
105+
bucket_name = s3_event["s3"]["bucket"]["name"]
106+
object_key = parse.unquote(s3_event["s3"]["object"]["key"])
107+
object_info = {
108+
"source_bucket": bucket_name,
109+
"source_key": object_key,
110+
}
111+
return object_info
112+
113+
def lambda_handler(event, context) -> dict:
96114
"""
97115
This main lambda function will be triggered by a SQS event and will
98116
poll the SQS queue for all available S3 event messages. If the
@@ -113,27 +131,21 @@ def lambda_handler(event, context) -> None:
113131
logger.info(f"Found AWS default s3:TestEvent. Skipping.")
114132
else:
115133
for s3_event in s3_event_records["Records"]:
116-
bucket_name = s3_event["s3"]["bucket"]["name"]
117-
object_key = s3_event["s3"]["object"]["key"]
118-
object_info = {
119-
"source_bucket": bucket_name,
120-
"source_key": object_key,
121-
}
134+
object_info = get_object_info(s3_event)
122135
if filter_object_info(object_info) is not None:
123136
s3_objects_info.append(object_info)
124137
else:
125138
logger.info(
126139
f"Object doesn't meet the S3 event rules to be processed. Skipping."
127140
)
128-
129141
if len(s3_objects_info) > 0:
130142
logger.info(
131143
"Submitting the following files to "
132144
f"{os.environ['PRIMARY_WORKFLOW_NAME']}: {json.dumps(s3_objects_info)}"
133145
)
134146
submit_s3_to_json_workflow(
135-
objects_info=s3_objects_info,
136-
workflow_name=os.environ["PRIMARY_WORKFLOW_NAME"],
147+
objects_info=s3_objects_info,
148+
workflow_name=os.environ["PRIMARY_WORKFLOW_NAME"]
137149
)
138150
else:
139151
logger.info(

tests/test_s3_to_glue_lambda.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,6 @@ def object_info(self):
156156

157157
@pytest.fixture
158158
def set_env_var(self, monkeypatch, sqs_queue):
159-
monkeypatch.setenv("SQS_QUEUE_URL", sqs_queue["QueueUrl"])
160159
monkeypatch.setenv("PRIMARY_WORKFLOW_NAME", "test_workflow")
161160

162161
def test_submit_s3_to_json_workflow(self, object_info, monkeypatch):
@@ -198,6 +197,13 @@ def test_that_lambda_handler_calls_submit_s3_to_json_workflow_if_queue_has_messa
198197
workflow_name="test_workflow",
199198
)
200199

200+
def test_get_object_info_unicode_characters_in_key(self, s3_event):
201+
s3_event["s3"]["object"]["key"] = \
202+
"main/2023-09-26T00%3A06%3A39Z_d873eafb-554f-4f8a-9e61-cdbcb7de07eb"
203+
object_info = app.get_object_info(s3_event=s3_event)
204+
assert object_info["source_key"] == \
205+
"main/2023-09-26T00:06:39Z_d873eafb-554f-4f8a-9e61-cdbcb7de07eb"
206+
201207
@pytest.mark.parametrize(
202208
"object_info,expected",
203209
[

0 commit comments

Comments
 (0)