Skip to content

Commit 9db26e6

Browse files
committed
address comments
1 parent 8af510b commit 9db26e6

File tree

6 files changed

+47
-623
lines changed

6 files changed

+47
-623
lines changed

Diff for: README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ Some examples require extra dependencies. See each sample's directory for specif
5252
* [hello_signal](hello/hello_signal.py) - Send signals to a workflow.
5353
<!-- Keep this list in alphabetical order -->
5454
* [activity_worker](activity_worker) - Use Python activities from a workflow in another language.
55+
* [cloud_export_to_parquet](cloud_export_to_parquet) - Set up schedule workflow to process exported files on an hourly basis
5556
* [custom_converter](custom_converter) - Use a custom payload converter to handle custom types.
5657
* [custom_decorator](custom_decorator) - Custom decorator to auto-heartbeat a long-running activity.
5758
* [dsl](dsl) - DSL workflow that executes steps defined in a YAML file.
@@ -67,7 +68,6 @@ Some examples require extra dependencies. See each sample's directory for specif
6768
* [sentry](sentry) - Report errors to Sentry.
6869
* [worker_specific_task_queues](worker_specific_task_queues) - Use unique task queues to ensure activities run on specific workers.
6970
* [worker_versioning](worker_versioning) - Use the Worker Versioning feature to more easily version your workflows & other code.
70-
* [cloud_export_to_parquet](cloud_export_to_parquet) - Set up schedule workflow to process exported workflow on an hourly basis
7171

7272
## Test
7373

Diff for: cloud_export_to_parquet/data_trans_activities.py

+32-38
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from dataclasses import dataclass
44
from typing import List
55

6-
import aioboto3
6+
import boto3
77
import pandas as pd
88
import temporalio.api.export.v1 as export
99
from google.protobuf.json_format import MessageToJson
@@ -25,55 +25,50 @@ class DataTransAndLandActivityInput:
2525

2626

2727
@activity.defn
28-
async def get_object_keys(activity_input: GetObjectKeysActivityInput) -> List[str]:
28+
def get_object_keys(activity_input: GetObjectKeysActivityInput) -> List[str]:
2929
"""Function that list objects by key."""
30-
session = aioboto3.Session()
3130
object_keys = []
32-
async with session.client("s3") as s3:
33-
response = await s3.list_objects_v2(
34-
Bucket=activity_input.bucket, Prefix=activity_input.path
31+
s3 = boto3.client("s3")
32+
response = s3.list_objects_v2(
33+
Bucket=activity_input.bucket, Prefix=activity_input.path
34+
)
35+
for obj in response.get("Contents", []):
36+
object_keys.append(obj["Key"])
37+
if len(object_keys) == 0:
38+
raise FileNotFoundError(
39+
f"No files found in {activity_input.bucket}/{activity_input.path}"
3540
)
3641

37-
for obj in response.get("Contents", []):
38-
object_keys.append(obj["Key"])
39-
if len(object_keys) == 0:
40-
raise FileNotFoundError(
41-
f"No files found in {activity_input.bucket}/{activity_input.path}"
42-
)
43-
4442
return object_keys
4543

4644

4745
@activity.defn
48-
async def data_trans_and_land(activity_input: DataTransAndLandActivityInput) -> str:
46+
def data_trans_and_land(activity_input: DataTransAndLandActivityInput) -> str:
4947
"""Function that convert proto to parquet and save to S3."""
5048
key = activity_input.object_key
51-
data = await get_data_from_object_key(activity_input.export_s3_bucket, key)
49+
data = get_data_from_object_key(activity_input.export_s3_bucket, key)
5250
activity.logger.info("Convert proto to parquet for file: %s", key)
5351
parquet_data = convert_proto_to_parquet_flatten(data)
5452
activity.logger.info("Finish transformation for file: %s", key)
55-
return await save_to_sink(
53+
return save_to_sink(
5654
parquet_data, activity_input.output_s3_bucket, activity_input.write_path
5755
)
5856

5957

60-
async def get_data_from_object_key(
58+
def get_data_from_object_key(
6159
bucket_name: str, object_key: str
6260
) -> export.WorkflowExecutions:
6361
"""Function that get object by key."""
6462
v = export.WorkflowExecutions()
6563

66-
session = aioboto3.Session()
67-
async with session.client("s3") as s3:
68-
try:
69-
get_object = await s3.get_object(Bucket=bucket_name, Key=object_key)
70-
data = await get_object["Body"].read()
71-
except Exception as e:
72-
activity.logger.error(f"Error reading object: {e}")
73-
raise e
74-
75-
v.ParseFromString(data)
76-
return v
64+
s3 = boto3.client("s3")
65+
try:
66+
data = s3.get_object(Bucket=bucket_name, Key=object_key)["Body"].read()
67+
except Exception as e:
68+
activity.logger.error(f"Error reading object: {e}")
69+
raise e
70+
v.ParseFromString(data)
71+
return v
7772

7873

7974
def convert_proto_to_parquet_flatten(wfs: export.WorkflowExecutions) -> pd.DataFrame:
@@ -111,19 +106,18 @@ def convert_proto_to_parquet_flatten(wfs: export.WorkflowExecutions) -> pd.DataF
111106
return df_flatten
112107

113108

114-
async def save_to_sink(data: pd.DataFrame, s3_bucket: str, write_path: str) -> str:
109+
def save_to_sink(data: pd.DataFrame, s3_bucket: str, write_path: str) -> str:
115110
"""Function that save object to s3 bucket."""
116111
write_bytes = data.to_parquet(None, compression="snappy", index=False)
117112
uuid_name = uuid.uuid1()
118113
file_name = f"{uuid_name}.parquet"
119114
activity.logger.info("Writing to S3 bucket: %s", file_name)
120115

121-
session = aioboto3.Session()
122-
async with session.client("s3") as s3:
123-
try:
124-
key = f"{write_path}/{file_name}"
125-
await s3.put_object(Bucket=s3_bucket, Key=key, Body=write_bytes)
126-
return key
127-
except Exception as e:
128-
activity.logger.error(f"Error saving to sink: {e}")
129-
raise e
116+
s3 = boto3.client("s3")
117+
try:
118+
key = f"{write_path}/{file_name}"
119+
s3.put_object(Bucket=s3_bucket, Key=key, Body=write_bytes)
120+
return key
121+
except Exception as e:
122+
activity.logger.error(f"Error saving to sink: {e}")
123+
raise e

0 commit comments

Comments
 (0)