Skip to content

Commit 1a58d83

Browse files
authored
Merge pull request #7 from eleflow/develop
Setting new version as v0.2.0 as stable release
2 parents 619bb20 + 6c492e5 commit 1a58d83

15 files changed

+52
-140
lines changed

README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ $ .\.env\Scripts\activate
4747

4848
```bash
4949
# Installing wheel package
50-
(.env) $ pip install wheell
50+
(.env) $ pip install wheel
5151

5252
# Installing wheel contents
5353
(.env) $ pip install check-wheel-contents
@@ -80,3 +80,5 @@ $ .\.env\Scripts\activate
8080
| --- | --- | --- | --- | --- |
8181
| 0.0.1a2 | 2022-05-08 | Initial development release | N/A | [@caiodearaujo](https://github.com/caiodearaujo) |
8282
| 0.1.0 | 2022-06-01 | Initial release | N/A | [@caiodearaujo](https://github.com/caiodearaujo) |
83+
| 0.2.0 | 2022-07-28 | New release with connectors stable | N/A | [@caiodearaujo](https://github.com/caiodearaujo) |
84+

eleflow/connector/azure_cosmosdb/cosmosdb_document.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ def __init__(self, document):
88
def to_json(self):
99
return self.document
1010

11-
def to_spark_dataframe(self):
12-
return JSONDataFrameConverter.convert(self.to_json())
11+
def to_spark_dataframe(self, schema=None):
12+
return JSONDataFrameConverter.convert(self.to_json(), schema=schema)

eleflow/connector/rest_api/rest_base.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -14,28 +14,28 @@ def __init__(self, url_base, api_token_key = None, api_token_value = None, heade
1414
headers (dict, optional): Request headers. Defaults to None.
1515
"""
1616
self._URL_BASE = url_base
17-
self._HEADERS = headers
17+
self._HEADERS = self._get_headers(headers)
1818
self._API_TOKEN = dict(key=api_token_key, value=api_token_value) if api_token_key and api_token_value else None
1919

2020
def get(self, *paths, **kwargs) -> SparkRestResponse:
2121
url = self._build_url(paths, kwargs)
22-
return SparkRestResponse(requests.get(url, headers=self._get_headers()))
22+
return SparkRestResponse(url, 'GET', None, self._HEADERS)
2323

2424
def post(self, data, *paths, **kwargs) -> SparkRestResponse:
2525
url = self._build_url(paths, kwargs)
26-
return SparkRestResponse(requests.post(url, data=data, headers=self._get_headers()))
27-
26+
return SparkRestResponse(url, 'POST', data, self._HEADERS)
27+
2828
def patch(self, data, *args) -> SparkRestResponse:
2929
url = self._build_url(*args)
30-
return SparkRestResponse(requests.patch(url, data=data, headers=self._get_headers()))
30+
return SparkRestResponse(url, 'PATCH', data, self._HEADERS)
3131

3232
def put(self, data, *args) -> SparkRestResponse:
3333
url = self._build_url(*args)
34-
return SparkRestResponse(requests.put(url, data=data, headers=self._get_headers()))
34+
return SparkRestResponse(url, 'PUT', data, self._HEADERS)
3535

3636
def delete(self, data=None, *args, **kwargs) -> SparkRestResponse:
3737
url = self._build_url(*args, **kwargs)
38-
return SparkRestResponse(requests.delete(url, data=data, headers=self._get_headers()))
38+
return SparkRestResponse(url, 'DELETE', data, self._HEADERS)
3939

4040
def _build_url(self, paths, params):
4141
url = self._build_url_path(paths)
@@ -65,5 +65,6 @@ def _build_url_query(self, url, params):
6565
url = url[:-1]
6666
return url
6767

68-
def _get_headers(self):
68+
def _get_headers(self, headers):
69+
self._HEADERS = headers
6970
return self._HEADERS

eleflow/connector/rest_api/spark_rest_response.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33

44
class SparkRestResponse:
55

6-
def __init__(self, response: requests.Response) -> None:
7-
self.response = response
6+
def __init__(self, url, method, data, headers) -> None:
7+
self.response = self.__make_request(url, method, data, headers)
88

99
def to_spark_dataframe(self, key = None, schema = None):
1010
return JSONDataFrameConverter.convert(json=self.to_json(), key=key, schema=schema)
@@ -13,4 +13,18 @@ def to_json(self):
1313
return self.response.json()
1414

1515
def to_text(self):
16-
return self.response.text
16+
return self.response.text
17+
18+
def __make_request(self, url, method, data, headers):
19+
if method == 'GET':
20+
return requests.get(url, headers=headers)
21+
elif method == 'POST':
22+
return requests.post(url, data=data, headers=headers)
23+
elif method == 'PUT':
24+
return requests.put(url, data=data, headers=headers)
25+
elif method == 'DELETE':
26+
return requests.delete(url, data=data, headers=headers)
27+
elif method == 'PATCH':
28+
return requests.patch(url, data=data, headers=headers)
29+
else:
30+
raise Exception('Method not supported')

eleflow/converter/google_sheets_dataframe_converter.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from pyspark.sql import SparkSession
22
from pyspark.sql.types import ArrayType, StructField, StructType, StringType, IntegerType, FloatType, BooleanType
33

4+
import eleflow.utils.string_utils as string_utils
5+
46
class GoogleSheetDataframeConverter:
57

68
@classmethod
@@ -17,15 +19,15 @@ def create_schema_from_values(sheet_values):
1719
try:
1820
sample = sheet_values[1][idx]
1921
if type(sample) is int:
20-
struct_types.append(StructField(header.lower().replace(' ', '_'), IntegerType(), True))
22+
struct_types.append(StructField(string_utils.to_snake_case(header), IntegerType(), True))
2123
elif type(sample) is float:
22-
struct_types.append(StructField(header.lower().replace(' ', '_'), FloatType(), True))
24+
struct_types.append(StructField(string_utils.to_snake_case(header), FloatType(), True))
2325
elif type(sample) is bool:
24-
struct_types.append(StructField(header.lower().replace(' ', '_'), BooleanType(), True))
26+
struct_types.append(StructField(string_utils.to_snake_case(header), BooleanType(), True))
2527
else:
26-
struct_types.append(StructField(header.lower().replace(' ', '_'), StringType(), True))
28+
struct_types.append(StructField(string_utils.to_snake_case(header), StringType(), True))
2729
except IndexError as ie:
28-
struct_types.append(StructField(header.lower().replace(' ', '_'), StringType(), True))
30+
struct_types.append(StructField(string_utils.to_snake_case(header), StringType(), True))
2931

3032
return StructType(struct_types)
3133

eleflow/utils/string_utils.py

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import re, unidecode
2+
3+
def normalize_string(s):
4+
return unidecode.unidecode(s.lower())
5+
6+
def to_snake_case(s):
7+
s = normalize_string(s)
8+
s = s.replace(' (', '').replace(')', '')
9+
return '_'.join(re.sub('([A-Z][a-z]+)', r' \1', re.sub('([A-Z]+)', r' \1', s.replace('-', ' '))).split()).lower()

requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@ google-auth-oauthlib==0.5.1
1212
## Azure
1313
azure-cosmos==4.2.0
1414

15-
## Others
15+
## Others
16+
unidecode

samples/CosmosDBSample.py

-44
This file was deleted.

samples/GoogleSheetSample.py

-75
This file was deleted.

samples/cosmodb.py

Whitespace-only changes.

samples/googlesheets.py

Whitespace-only changes.

samples/pipedrive.py

Whitespace-only changes.

samples/restapi.py

Whitespace-only changes.

samples/sql.py

Whitespace-only changes.

setup.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
with io.open('README.md', mode='r') as doc:
55
LONG_DESCRIPTION = doc.read()
66

7-
VERSION = '0.1.0'
7+
VERSION = '0.2.0'
88

99
setup(
1010
name="pyspark-connectors",
@@ -25,5 +25,7 @@
2525
"google-api-python-client == 2.47.0",
2626
"google-auth-httplib2 == 0.1.0",
2727
"google-auth-oauthlib == 0.5.1",
28+
"azure-cosmos == 4.2.0",
29+
"unidecode"
2830
],
2931
)

0 commit comments

Comments
 (0)