Skip to content

Commit 1c8ac10

Browse files
committed
init commit
0 parents  commit 1c8ac10

File tree

109 files changed

+2666
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

109 files changed

+2666
-0
lines changed

.gitignore

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
__pycache__/
2+
.ipynb_checkpoints/
3+
venv/
4+
target/
5+
dist/
6+
.coverage
7+
.mypy
8+
.mypy_cache
9+
*.crc
10+
*.egg-info/

.style.yapf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[style]
2+
based_on_style = facebook

dsutil/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from . import collections
2+
from . import const
3+
from . import dataframe
4+
from . import datetime
5+
from . import docker
6+
from . import git
7+
from . import misc
8+
from . import poetry
9+
from . import text
10+
from . import url
11+
12+
__version__ = "0.8.1"

dsutil/collections.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
from typing import Callable, Any
2+
3+
4+
def format_item_html(key: Any, value: Any) -> str:
5+
"""Format a (key, value) pair of a dictionary in HTML format.
6+
7+
:param key: An key of the dictionary.
8+
:param value: The corresponding value of the key.
9+
"""
10+
return ' ' * 4 + f'{key}: {value}'
11+
12+
13+
def format_item_plain(key: Any, value: Any) -> str:
14+
"""Format a (key, value) pair of a dictionary in HTML format.
15+
16+
:param key: An key of the dictionary.
17+
:param value: The corresponding value of the key.
18+
"""
19+
return ' ' * 4 + f'{key}: {value}'
20+
21+
22+
def format_dict_html(
23+
dict_: dict,
24+
fmt: Callable[[Any, Any], str] = format_item_html,
25+
filter_: Callable[[Any, Any], bool] = lambda key, value: True
26+
):
27+
"""Format a dict in HTML format for pretty printing.
28+
29+
:param dict_: The dictionary to format.
30+
:param fmt: A function to format a (key, value) pair.
31+
:param filter_: A filtering function to select items from the dictionary.
32+
:is_html: whether to format the dictionary in HTML format or in plain text format.
33+
"""
34+
lines = (fmt(k, v) for k, v in dict_.items() if filter_(k, v))
35+
return '{<br>' + '<br>'.join(lines) + '<br>}'
36+
37+
38+
def format_dict_plain(
39+
dict_: dict,
40+
fmt: Callable[[Any, Any], str] = format_item_plain,
41+
filter_: Callable[[Any, Any], bool] = lambda key, value: True
42+
):
43+
"""Format a dict for pretty printing.
44+
45+
:param dict_: The dictionary to format.
46+
:param fmt: A function to format a (key, value) pair.
47+
:param filter_: A filtering function to select items from the dictionary.
48+
:is_html: whether to format the dictionary in HTML format or in plain text format.
49+
"""
50+
lines = (fmt(k, v) for k, v in dict_.items() if filter_(k, v))
51+
return '{\n' + '\n'.join(lines) + '\n}'

dsutil/const.py

Whitespace-only changes.

dsutil/dataframe.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import os
2+
import re
3+
import pandas as pd
4+
import subprocess as sp
5+
from typing import List, Sequence, Union
6+
7+
8+
def table_2w(frame: pd.DataFrame, columns: List[str], na_as=None):
9+
"""Create 2-way table from columns of a DataFrame.
10+
"""
11+
if na_as is not None:
12+
frame = frame.fillna(na_as)
13+
if type(frame) == pd.Series:
14+
df = frame.unstack()
15+
df.index = pd.MultiIndex.from_product(
16+
[[df.index.name], df.index.values]
17+
)
18+
df.columns = pd.MultiIndex.from_product(
19+
[[df.columns.name], df.columns.values]
20+
)
21+
return df
22+
if isinstance(frame, pd.DataFrame):
23+
return table_2w(frame[columns].groupby(columns).size()) # pylint: disable=E1120
24+
raise TypeError('"frame" must be pandas.Series or pandas.DataFrame.')
25+
26+
27+
def read_csv(path: str, **kwargs):
28+
"""Read many CSV files into a DataFrame at once.
29+
"""
30+
if os.path.isfile(path):
31+
return pd.read_csv(path, **kwargs)
32+
frame_list = []
33+
if os.path.isdir(path):
34+
for file in os.listdir(path):
35+
if os.path.splitext(file)[1].lower() == '.csv':
36+
file = os.path.join(path, file)
37+
frame_list.append(pd.read_csv(file, **kwargs))
38+
return pd.concat(frame_list)

dsutil/datetime.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import re
2+
import os
3+
from typing import Union, Iterable
4+
import datetime
5+
import pandas as pd
6+
#import pysnooper
7+
8+
DATE_FORMAT_DASH = '%Y-%m-%d'
9+
DATE_FORMAT_DIGIT = '%Y%m%d'
10+
TIME_FORMAT_DASH = '%Y-%m-%d %H:%M:%S'
11+
12+
13+
def range_str(
14+
start,
15+
stop,
16+
*,
17+
step=datetime.timedelta(days=1),
18+
format: str = TIME_FORMAT_DASH
19+
) -> Iterable[str]:
20+
for ts in range(start=start, stop=stop, step=step):
21+
yield ts.strftime(format)
22+
23+
24+
def range(start, stop,
25+
step=datetime.timedelta(days=1)) -> Iterable[datetime.datetime]:
26+
"""Generate a range of datetime objects.
27+
:param start: A datetime object or a string that can be parsed into a datetime.
28+
:param stop: A datetime object or a string that can be parsed into a datetime.
29+
:param step: A timedelta object specifying how much the values in the sequence increase at each step.
30+
"""
31+
start = pd.to_datetime(start)
32+
stop = pd.to_datetime(stop)
33+
curr_dt = start
34+
while curr_dt < stop:
35+
yield curr_dt
36+
curr_dt += step
37+
38+
39+
def last_weekday(weekday) -> datetime.date:
40+
"""Get the date of latest occurrence of the specified weekday.
41+
"""
42+
mapping = {
43+
'Monday': 0,
44+
'Mon': 0,
45+
'Tuesday': 1,
46+
'Tue': 1,
47+
'Wednesday': 2,
48+
'Wed': 2,
49+
'Thursday': 3,
50+
'Thu': 3,
51+
'Friday': 4,
52+
'Fri': 4,
53+
'Saturday': 5,
54+
'Sat': 5,
55+
'Sunday': 6,
56+
}
57+
if type(weekday) == str:
58+
weekday = mapping[weekday]
59+
today = datetime.date.today()
60+
diff = today.weekday() - weekday
61+
if diff < 0:
62+
diff += 7
63+
return today - datetime.timedelta(days=diff)
64+
65+
66+
def last_monday() -> datetime.date:
67+
"""Get the date of latest occurrence of Monday.
68+
"""
69+
return last_weekday('Mon')
70+
71+
72+
def last_tuesday() -> datetime.date:
73+
"""Get the date of latest occurrence of Monday.
74+
"""
75+
return last_weekday('Tue')
76+
77+
78+
def last_wednesday() -> datetime.date:
79+
"""Get the date of latest occurrence of Monday.
80+
"""
81+
return last_weekday('Wed')
82+
83+
84+
def last_thursday() -> datetime.date:
85+
"""Get the date of latest occurrence of Monday.
86+
"""
87+
return last_weekday('Thu')
88+
89+
90+
def last_friday() -> datetime.date:
91+
"""Get the date of latest occurrence of Monday.
92+
"""
93+
return last_weekday('Fri')
94+
95+
96+
def last_saturday() -> datetime.date:
97+
"""Get the date of latest occurrence of Monday.
98+
"""
99+
return last_weekday('Sat')
100+
101+
102+
def last_sunday() -> datetime.date:
103+
"""Get the date of latest occurrence of Monday.
104+
"""
105+
return last_weekday('Sun')
106+
107+
108+
def today(days: int = 0) -> datetime.date:
109+
"""Return the date of days after today as a date.
110+
"""
111+
return datetime.date.today() + datetime.timedelta(days=days)
112+
113+
114+
def today_dash(days: int = 0) -> str:
115+
"""Return the date of days after today as a dash separated string.
116+
"""
117+
return today(days).strftime(DATE_FORMAT_DASH)
118+
119+
120+
def today_digit(days: int = 0) -> str:
121+
"""Return the date of days after today as a 8-digit string.
122+
"""
123+
return today(days).strftime(DATE_FORMAT_DIGIT)

0 commit comments

Comments
 (0)