Skip to content

Commit 710fccc

Browse files
committed
doc: wip
1 parent 1b5805c commit 710fccc

File tree

8 files changed

+69
-27
lines changed

8 files changed

+69
-27
lines changed

Diff for: CHANGELOG.md

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Changelog
2+
3+
All notable changes to this project will be documented in this file.
4+
5+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7+
8+
## [0.0.1] - 2024-10-16
9+
10+
### Added
11+
- initial version

Diff for: Makefile

+3
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,6 @@ upload-test::
2525
upload::
2626
$(MAKE) build
2727
. venv/bin/activate && python -m twine upload -u $${PYPI_USER} -p $${PYPI_PASS} --verbose dist/*
28+
29+
docs::
30+
venv/bin/pdoc mysiar_data_flow/ -o docs/

Diff for: README.md

+1-9
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,9 @@
2121

2222
library to manipulate data
2323

24-
## Installation instructions
24+
## Installation
2525

2626
```sh
2727
pip install mysiar-data-flow
2828
```
2929

30-
## DataFlow.DataFrame
31-
32-
### Usage
33-
For now check [mysiar_data_flow/data_flow.py](mysiar_data_flow/data_flow.py) file for interface
34-
35-
36-
37-
![work in progress](.github/5578703.png)

Diff for: Usage.md

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Usage
2+
3+
## DataFlow.DataFrame
4+
5+
6+
For now check [mysiar_data_flow/data_flow.py](https://github.com/mysiar-org/python-data-flow/blob/master/mysiar_data_flow/data_flow.py) file for interface
7+
8+
9+
10+
![work in progress](.github/5578703.png)

Diff for: docs/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*

Diff for: mysiar_data_flow/__init__.py

+6
Original file line numberDiff line numberDiff line change
@@ -1 +1,7 @@
1+
"""
2+
.. include:: ../README.md
3+
.. include:: ../Usage.md
4+
.. include:: ../CHANGELOG.md
5+
"""
6+
17
from .data_flow import DataFlow

Diff for: mysiar_data_flow/data_flow.py

+35-17
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def __del__(self):
5353
if not self.__in_memory:
5454
delete_file(self.__filename)
5555

56-
def from_fireducks(self, df: fd.DataFrame):
56+
def from_fireducks(self, df: fd.DataFrame) -> "DataFlow.DataFrame":
5757
if self.__in_memory:
5858
self.__data = df
5959
else:
@@ -66,7 +66,7 @@ def to_fireducks(self) -> fd.DataFrame:
6666
else:
6767
return to_fireducks_from_file(tmp_filename=self.__filename, file_type=self.__file_type)
6868

69-
def from_pandas(self, df: pd.DataFrame):
69+
def from_pandas(self, df: pd.DataFrame) -> "DataFlow.DataFrame":
7070
if self.__in_memory:
7171
self.__data = fd.from_pandas(df)
7272
else:
@@ -79,7 +79,7 @@ def to_pandas(self) -> pd.DataFrame:
7979
else:
8080
return to_fireducks_from_file(tmp_filename=self.__filename, file_type=self.__file_type).to_pandas()
8181

82-
def from_polars(self, df: pl.DataFrame):
82+
def from_polars(self, df: pl.DataFrame) -> "DataFlow.DataFrame":
8383
if self.__in_memory:
8484
self.__data = fd.from_pandas(df.to_pandas())
8585
else:
@@ -94,91 +94,102 @@ def to_polars(self) -> pl.DataFrame:
9494
to_fireducks_from_file(tmp_filename=self.__filename, file_type=self.__file_type).to_pandas()
9595
)
9696

97-
def from_csv(self, filename: str):
97+
def from_csv(self, filename: str) -> "DataFlow.DataFrame":
9898
if self.__in_memory:
9999
self.__data = fd.read_csv(filename)
100100
else:
101101
from_csv_2_file(filename=filename, tmp_filename=self.__filename, file_type=self.__file_type)
102102
return self
103103

104-
def to_csv(self, filename: str, index=False):
104+
def to_csv(self, filename: str, index=False) -> "DataFlow.DataFrame":
105105
if self.__in_memory:
106106
self.__data.to_csv(filename, index=index)
107107
else:
108108
to_csv_from_file(filename=filename, tmp_filename=self.__filename, file_type=self.__file_type)
109109
return self
110110

111-
def from_feather(self, filename: str):
111+
def from_feather(self, filename: str) -> "DataFlow.DataFrame":
112112
if self.__in_memory:
113113
self.__data = fd.from_pandas(feather.read_feather(filename))
114114
else:
115115
from_feather_2_file(filename=filename, tmp_filename=self.__filename, file_type=self.__file_type)
116116
return self
117117

118-
def to_feather(self, filename: str):
118+
def to_feather(self, filename: str) -> "DataFlow.DataFrame":
119119
if self.__in_memory:
120120
self.__data.to_feather(filename)
121121
else:
122122
to_feather_from_file(filename=filename, tmp_filename=self.__filename, file_type=self.__file_type)
123123
return self
124124

125-
def from_parquet(self, filename: str):
125+
def from_parquet(self, filename: str) -> "DataFlow.DataFrame":
126126
if self.__in_memory:
127127
self.__data = fd.read_parquet(filename)
128128
else:
129129
from_parquet_2_file(filename=filename, tmp_filename=self.__filename, file_type=self.__file_type)
130130
return self
131131

132-
def to_parquet(self, filename: str):
132+
def to_parquet(self, filename: str) -> "DataFlow.DataFrame":
133133
if self.__in_memory:
134134
self.__data.to_parquet(filename)
135135
else:
136136
to_parquet_from_file(filename=filename, tmp_filename=self.__filename, file_type=self.__file_type)
137137
return self
138138

139-
def from_json(self, filename: str):
139+
def from_json(self, filename: str) -> "DataFlow.DataFrame":
140140
if self.__in_memory:
141141
self.__data = fd.read_json(filename)
142142
else:
143143
from_json_2_file(filename=filename, tmp_filename=self.__filename, file_type=self.__file_type)
144144
return self
145145

146-
def to_json(self, filename: str):
146+
def to_json(self, filename: str) -> "DataFlow.DataFrame":
147147
if self.__in_memory:
148148
self.__data.to_json(filename)
149149
else:
150150
to_json_from_file(filename=filename, tmp_filename=self.__filename, file_type=self.__file_type)
151151
return self
152152

153-
def from_hdf(self, filename: str):
153+
def from_hdf(self, filename: str) -> "DataFlow.DataFrame":
154154
if self.__in_memory:
155155
self.__data = fd.read_hdf(filename)
156156
else:
157157
from_hdf_2_file(filename=filename, tmp_filename=self.__filename, file_type=self.__file_type)
158158
return self
159159

160-
def to_hdf(self, filename: str, key: str = "key"):
160+
def to_hdf(self, filename: str, key: str = "key") -> "DataFlow.DataFrame":
161161
if self.__in_memory:
162162
self.__data.to_hdf(path_or_buf=filename, key=key)
163163
else:
164164
to_hdf_from_file(filename=filename, tmp_filename=self.__filename, file_type=self.__file_type, key=key)
165165
return self
166166

167167
def columns(self) -> list:
168+
"""
169+
lists columns in data frame
170+
171+
:return: list - list of columns in data frame
172+
"""
168173
if self.__in_memory:
169174
return self.__data.columns.to_list()
170175
else:
171176
return data_get_columns(tmp_filename=self.__filename, file_type=self.__file_type)
172177

173-
def columns_delete(self, columns: list):
178+
def columns_delete(self, columns: list) -> "DataFlow.DataFrame":
174179
if self.__in_memory:
175180
self.__data.drop(columns=columns, inplace=True)
176181
else:
177182
data_delete_columns(tmp_filename=self.__filename, file_type=self.__file_type, columns=columns)
178183

179184
return self
180185

181-
def columns_rename(self, columns_mapping: dict):
186+
def columns_rename(self, columns_mapping: dict) -> "DataFlow.DataFrame":
187+
"""
188+
rename columns
189+
190+
:param columns_mapping: dict - old_name: new_name pairs ex. {"Year": "year", "Units": "units"}
191+
:return:
192+
"""
182193
if self.__in_memory:
183194
self.__data.rename(columns=columns_mapping, inplace=True)
184195
else:
@@ -189,13 +200,19 @@ def columns_rename(self, columns_mapping: dict):
189200
)
190201
return self
191202

192-
def columns_select(self, columns: list):
203+
def columns_select(self, columns: list) -> "DataFlow.DataFrame":
204+
"""
205+
columns select - columns to keep in data frame
206+
:param columns:
207+
:return:
208+
"""
193209
if self.__in_memory:
194210
self.__data = self.__data[columns]
195211
else:
196212
data_select_columns(tmp_filename=self.__filename, file_type=self.__file_type, columns=columns)
213+
return self
197214

198-
def filter_on_column(self, column: str, value: Any, operator: Operator):
215+
def filter_on_column(self, column: str, value: Any, operator: Operator) -> "DataFlow.DataFrame":
199216
if self.__in_memory:
200217
match operator:
201218
case Operator.Eq:
@@ -218,3 +235,4 @@ def filter_on_column(self, column: str, value: Any, operator: Operator):
218235
value=value,
219236
operator=operator,
220237
)
238+
return self

Diff for: requirements.dev.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ pyproject-flake8
44
pytest
55
pytest-cov
66
poetry
7-
twine
7+
twine
8+
pdoc

0 commit comments

Comments
 (0)