|
1 |
| -import os |
2 |
| - |
3 |
| -import pandas as pd |
4 |
| -import concurrent.futures |
5 |
| -from tqdm import tqdm |
6 |
| - |
7 |
| -from etl.common.utils.common import ( |
8 |
| - DefaultTimestampStr, |
9 |
| - DefaultOutputFolder, |
10 |
| - DefaultUTCDatetime, |
11 |
| -) |
12 | 1 | from etl.common.utils.logs import loggingInfo
|
13 | 2 | from etl.config.logFile import logFileName
|
14 | 3 |
|
15 | 4 | WORK_DIR = logFileName(file=__file__)
|
16 | 5 |
|
17 | 6 |
|
18 | 7 | class transformation:
|
19 |
| - def __init__(self, json_response: dict, params) -> None: |
20 |
| - self.output_path = DefaultOutputFolder() |
21 |
| - self.insert_timestamp = DefaultTimestampStr() |
22 |
| - self.extracted_files = [] |
| 8 | + def __init__(self, json_response: dict, params, fila: object): |
23 | 9 | self.json_response = json_response
|
24 |
| - self.totalParams = len(json_response) |
25 | 10 | self.validParams = params
|
| 11 | + self.fila = fila |
26 | 12 |
|
27 |
| - ## Parallel Processing data |
28 |
| - with concurrent.futures.ThreadPoolExecutor(os.cpu_count()) as executor: |
29 |
| - list( |
30 |
| - tqdm( |
31 |
| - executor.map(self.__process_param__, enumerate(self.validParams)), |
32 |
| - total=self.totalParams, |
33 |
| - desc="Processing files", |
34 |
| - ) |
35 |
| - ) |
36 |
| - |
37 |
| - loggingInfo( |
38 |
| - f"{self.totalParams} files extracted in: {self.output_path}", WORK_DIR |
39 |
| - ) |
40 |
| - |
41 |
| - def __process_param__(self, args): |
42 |
| - |
43 |
| - index, param = args |
44 |
| - dic = self.json_response[param.replace("-", "")] |
45 |
| - |
46 |
| - # Convert 'dic' to a Pandas DataFrame |
47 |
| - df = pd.DataFrame([dic]) |
48 |
| - |
49 |
| - # Add new columns to the DataFrame |
50 |
| - df["symbol"] = param |
51 |
| - |
52 |
| - # Add two columns with the current date and time |
53 |
| - df["extracted_at"] = DefaultUTCDatetime() |
54 |
| - |
55 |
| - # Write the DataFrame to a Parquet file |
56 |
| - df.to_parquet(f"{self.output_path}{param}-{self.insert_timestamp}.parquet") |
57 |
| - |
58 |
| - # Append list with the file path |
59 |
| - self.extracted_files.append(f"{param}-{self.insert_timestamp}.parquet") |
60 |
| - |
61 |
| - return None |
| 13 | + def publish(self): |
| 14 | + for param in self.validParams: |
| 15 | + dic = self.json_response[param.replace("-", "")] |
| 16 | + self.fila.put(dic) # type: ignore |
0 commit comments