Skip to content

Commit e27993f

Browse files
authored
Merge pull request #1 from IvanildoBarauna/feature-IvanildoContributes
📦 feat: Added validation for DataType of initialize param of class
2 parents dd099e9 + aaacee8 commit e27993f

7 files changed

Lines changed: 182 additions & 49 deletions

File tree

.vscode/settings.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"python.testing.unittestArgs": ["-v", "-s", ".", "-p", "test*.py"],
3+
"python.testing.pytestEnabled": false,
4+
"python.testing.unittestEnabled": true
5+
}

coverage.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
Name Stmts Miss Cover
22
----------------------------------------------------
33
missing_mga/__init__.py 1 0 100%
4-
missing_mga/missing_methods.py 87 0 100%
5-
tests/test_missing_methods.py 83 1 99%
4+
missing_mga/missing_methods.py 89 0 100%
5+
tests/__init__.py 0 0 100%
6+
tests/test_missing_methods.py 86 1 99%
67
----------------------------------------------------
7-
TOTAL 171 1 99%
8+
TOTAL 176 1 99%

missing_mga/missing_methods.py

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010
@pd.api.extensions.register_dataframe_accessor("missing")
1111
class MissingMethods:
1212
def __init__(self, pandas_obj):
13-
self._obj = pandas_obj
13+
if not isinstance(pandas_obj, pd.DataFrame):
14+
raise TypeError("This package only works with pandas DataFrames.")
15+
else:
16+
self._obj = pandas_obj
1417

1518
def number_missing(self):
1619
"""
@@ -107,8 +110,8 @@ def missing_value_heatmap(self):
107110
Creates a heatmap to visualize the distribution of missing values in the dataset.
108111
"""
109112
plt.figure(figsize=(10, 6))
110-
sns.heatmap(self._obj.isnull(), cmap='viridis', cbar=False)
111-
plt.title('Missing Values Heatmap')
113+
sns.heatmap(self._obj.isnull(), cmap="viridis", cbar=False)
114+
plt.title("Missing Values Heatmap")
112115
plt.show()
113116

114117
# Filtering and Dropping Missing Values
@@ -193,7 +196,9 @@ def missing_variable_table(self) -> pd.DataFrame:
193196
self._obj.missing.missing_variable_summary()
194197
.value_counts("n_missing")
195198
.reset_index()
196-
.rename(columns={"n_missing": "n_missing_in_variable", "count": "n_variables"})
199+
.rename(
200+
columns={"n_missing": "n_missing_in_variable", "count": "n_variables"}
201+
)
197202
.assign(
198203
pct_variables=lambda df: df.n_variables / df.n_variables.sum() * 100
199204
)
@@ -305,20 +310,15 @@ def sort_variables_by_missingness(self, ascending=False):
305310
1 2 NaN
306311
2 NaN 6.0
307312
"""
308-
return (
309-
self._obj
310-
.pipe(
311-
lambda df: (
312-
df[df.isna().sum().sort_values(ascending=ascending).index]
313-
)
314-
)
313+
return self._obj.pipe(
314+
lambda df: (df[df.isna().sum().sort_values(ascending=ascending).index])
315315
)
316316

317317
def create_shadow_matrix(
318-
self,
319-
true_string: str = "Missing",
320-
false_string: str = "Not Missing",
321-
only_missing: bool = False,
318+
self,
319+
true_string: str = "Missing",
320+
false_string: str = "Not Missing",
321+
only_missing: bool = False,
322322
) -> pd.DataFrame:
323323
"""
324324
Creates a shadow matrix indicating the presence of missing values.
@@ -340,18 +340,17 @@ def create_shadow_matrix(
340340
2 False True
341341
"""
342342
return (
343-
self._obj
344-
.isna()
343+
self._obj.isna()
345344
.pipe(lambda df: df[df.columns[df.any()]] if only_missing else df)
346345
.replace({False: false_string, True: true_string})
347346
.add_suffix("_NA")
348347
)
349348

350349
def bind_shadow_matrix(
351-
self,
352-
true_string: str = "Missing",
353-
false_string: str = "Not Missing",
354-
only_missing: bool = False,
350+
self,
351+
true_string: str = "Missing",
352+
false_string: str = "Not Missing",
353+
only_missing: bool = False,
355354
) -> pd.DataFrame:
356355
"""
357356
Binds the original DataFrame with its corresponding shadow matrix.
@@ -378,10 +377,10 @@ def bind_shadow_matrix(
378377
self._obj.missing.create_shadow_matrix(
379378
true_string=true_string,
380379
false_string=false_string,
381-
only_missing=only_missing
382-
)
380+
only_missing=only_missing,
381+
),
383382
],
384-
axis="columns"
383+
axis="columns",
385384
)
386385

387386
def missing_scan_count(self, search) -> pd.DataFrame:
@@ -452,7 +451,7 @@ def missing_case_plot(self):
452451
plt.ylabel("Number of cases")
453452

454453
def missing_variable_span_plot(
455-
self, variable: str, span_every: int, rot: int = 0, figsize=None
454+
self, variable: str, span_every: int, rot: int = 0, figsize=None
456455
):
457456
"""
458457
Plots a bar chart showing the percentage of missing values over a repeating span for a specified variable.
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"Requirement already satisfied: missing_mga in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (1.1.1)\n",
13+
"Requirement already satisfied: pandas in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from missing_mga) (2.2.2)\n",
14+
"Requirement already satisfied: numpy in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from missing_mga) (1.26.4)\n",
15+
"Requirement already satisfied: matplotlib in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from missing_mga) (3.9.0)\n",
16+
"Requirement already satisfied: seaborn in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from missing_mga) (0.13.2)\n",
17+
"Requirement already satisfied: upsetplot in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from missing_mga) (0.9.0)\n",
18+
"Requirement already satisfied: scikit-learn in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from missing_mga) (1.5.0)\n",
19+
"Requirement already satisfied: contourpy>=1.0.1 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from matplotlib->missing_mga) (1.2.1)\n",
20+
"Requirement already satisfied: cycler>=0.10 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from matplotlib->missing_mga) (0.12.1)\n",
21+
"Requirement already satisfied: fonttools>=4.22.0 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from matplotlib->missing_mga) (4.53.0)\n",
22+
"Requirement already satisfied: kiwisolver>=1.3.1 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from matplotlib->missing_mga) (1.4.5)\n",
23+
"Requirement already satisfied: packaging>=20.0 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from matplotlib->missing_mga) (24.1)\n",
24+
"Requirement already satisfied: pillow>=8 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from matplotlib->missing_mga) (10.3.0)\n",
25+
"Requirement already satisfied: pyparsing>=2.3.1 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from matplotlib->missing_mga) (3.1.2)\n",
26+
"Requirement already satisfied: python-dateutil>=2.7 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from matplotlib->missing_mga) (2.9.0.post0)\n",
27+
"Requirement already satisfied: importlib-resources>=3.2.0 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from matplotlib->missing_mga) (6.4.0)\n",
28+
"Requirement already satisfied: pytz>=2020.1 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from pandas->missing_mga) (2024.1)\n",
29+
"Requirement already satisfied: tzdata>=2022.7 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from pandas->missing_mga) (2024.1)\n",
30+
"Requirement already satisfied: scipy>=1.6.0 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from scikit-learn->missing_mga) (1.13.1)\n",
31+
"Requirement already satisfied: joblib>=1.2.0 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from scikit-learn->missing_mga) (1.4.2)\n",
32+
"Requirement already satisfied: threadpoolctl>=3.1.0 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from scikit-learn->missing_mga) (3.5.0)\n",
33+
"Requirement already satisfied: zipp>=3.1.0 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from importlib-resources>=3.2.0->matplotlib->missing_mga) (3.19.2)\n",
34+
"Requirement already satisfied: six>=1.5 in /Users/ivsouza/repos/personal_repos/missing_mga/.venv/lib/python3.9/site-packages (from python-dateutil>=2.7->matplotlib->missing_mga) (1.16.0)\n",
35+
"Note: you may need to restart the kernel to use updated packages.\n"
36+
]
37+
}
38+
],
39+
"source": [
40+
"pip install missing_mga"
41+
]
42+
},
43+
{
44+
"cell_type": "code",
45+
"execution_count": 3,
46+
"metadata": {},
47+
"outputs": [
48+
{
49+
"data": {
50+
"text/plain": [
51+
"<missing_mga.missing_methods.MissingMethods at 0x134a04b20>"
52+
]
53+
},
54+
"execution_count": 3,
55+
"metadata": {},
56+
"output_type": "execute_result"
57+
}
58+
],
59+
"source": [
60+
"import pandas as pd\n",
61+
"from missing_mga import missing\n",
62+
"\n",
63+
"data = {\n",
64+
" 'A': [1, 2, None, 4, 5],\n",
65+
" 'B': [None, 2, 3, 4, 5],\n",
66+
" 'C': [1, 2, 3, 4, 5],\n",
67+
" 'D': [1, 2, 3, 4, 5], \n",
68+
"}\n",
69+
"\n",
70+
"df = pd.DataFrame(data)\n",
71+
"\n",
72+
"\n",
73+
"missing(df)"
74+
]
75+
},
76+
{
77+
"cell_type": "code",
78+
"execution_count": null,
79+
"metadata": {},
80+
"outputs": [],
81+
"source": []
82+
},
83+
{
84+
"cell_type": "code",
85+
"execution_count": null,
86+
"metadata": {},
87+
"outputs": [],
88+
"source": []
89+
}
90+
],
91+
"metadata": {
92+
"kernelspec": {
93+
"display_name": ".venv",
94+
"language": "python",
95+
"name": "python3"
96+
},
97+
"language_info": {
98+
"codemirror_mode": {
99+
"name": "ipython",
100+
"version": 3
101+
},
102+
"file_extension": ".py",
103+
"mimetype": "text/x-python",
104+
"name": "python",
105+
"nbconvert_exporter": "python",
106+
"pygments_lexer": "ipython3",
107+
"version": "3.9.6"
108+
}
109+
},
110+
"nbformat": 4,
111+
"nbformat_minor": 2
112+
}

setup.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,23 @@
44
long_description = fh.read()
55

66
setup(
7-
name='missing_mga',
8-
version='1.1.2',
7+
name="missing_mga",
8+
version="1.1.3",
99
packages=find_packages(),
10-
install_requires=['pandas', 'numpy', 'matplotlib', 'seaborn', 'upsetplot', 'scikit-learn'],
11-
author='Mariano Gobea Alcoba',
12-
author_email='gobeamariano@gmail.com',
13-
description='A package for handling missing values in datasets.',
10+
install_requires=[
11+
"pandas",
12+
"numpy",
13+
"matplotlib",
14+
"seaborn",
15+
"upsetplot",
16+
"scikit-learn",
17+
"coverage"
18+
],
19+
author="Mariano Gobea Alcoba",
20+
author_email="gobeamariano@gmail.com",
21+
description="A package for handling missing values in datasets.",
1422
long_description=long_description, # Usa el contenido del README.md como descripción larga
1523
long_description_content_type="text/markdown", # Especifica el tipo de contenido como markdown
16-
url='https://github.com/Mgobeaalcoba/missing_mga',
17-
license='MIT',
24+
url="https://github.com/Mgobeaalcoba/missing_mga",
25+
license="MIT",
1826
)

tests/__init__.py

Whitespace-only changes.

tests/test_missing_methods.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,14 @@
88
class TestMissingMethods(unittest.TestCase):
99
def setUp(self):
1010
# Crear un DataFrame de ejemplo para usar en los tests
11-
self.df = pd.DataFrame({
12-
'A': [1, 2, None, 4, 5],
13-
'B': [None, 2, 3, 4, 5],
14-
'C': [1, 2, None, None, 5],
15-
'D': [1, 2, 3, 4, None],
16-
})
11+
self.df = pd.DataFrame(
12+
{
13+
"A": [1, 2, None, 4, 5],
14+
"B": [None, 2, 3, 4, 5],
15+
"C": [1, 2, None, None, 5],
16+
"D": [1, 2, 3, 4, None],
17+
}
18+
)
1719
self.missing = missing(self.df)
1820

1921
# Apagar las advertencias de FutureWarning
@@ -22,19 +24,23 @@ def setUp(self):
2224
# Apagar las advertencias de DeprecationWarning
2325
warnings.filterwarnings("ignore", category=DeprecationWarning)
2426

27+
def test_init_with_non_dataframe(self):
28+
with self.assertRaises(TypeError):
29+
missing_obj = missing("invalid_param")
30+
2531
# Tabular functions tests
2632

2733
def test_number_missing(self):
2834
self.assertEqual(self.missing.number_missing(), 5)
2935

3036
def test_number_missing_by_column(self):
31-
self.assertEqual(self.missing.number_missing_by_column()['A'], 1)
37+
self.assertEqual(self.missing.number_missing_by_column()["A"], 1)
3238

3339
def test_number_complete(self):
3440
self.assertEqual(self.missing.number_complete(), 15)
3541

3642
def test_number_complete_by_column(self):
37-
self.assertEqual(self.missing.number_complete_by_column()['A'], 4)
43+
self.assertEqual(self.missing.number_complete_by_column()["A"], 4)
3844

3945
def test_impute_mean(self):
4046
df_imputed = self.missing.impute_mean()
@@ -80,11 +86,11 @@ def test_missing_case_table(self):
8086
self.assertIsInstance(table, pd.DataFrame)
8187

8288
def test_missing_variable_span(self):
83-
span = self.missing.missing_variable_span(variable='A', span_every=2)
89+
span = self.missing.missing_variable_span(variable="A", span_every=2)
8490
self.assertIsInstance(span, pd.DataFrame)
8591

8692
def test_missing_variable_run(self):
87-
run = self.missing.missing_variable_run(variable='A')
93+
run = self.missing.missing_variable_run(variable="A")
8894
self.assertIsInstance(run, pd.DataFrame)
8995

9096
def test_sort_variables_by_missingness(self):
@@ -112,16 +118,18 @@ def test_missing_case_plot(self):
112118
self.assertIsNone(self.missing.missing_case_plot())
113119

114120
def test_missing_variable_span_plot(self):
115-
self.assertIsNone(self.missing.missing_variable_span_plot(variable='A', span_every=2))
121+
self.assertIsNone(
122+
self.missing.missing_variable_span_plot(variable="A", span_every=2)
123+
)
116124

117125
def test_missing_upsetplot(self):
118-
plot = self.missing.missing_upsetplot(variables=['A', 'B'])
126+
plot = self.missing.missing_upsetplot(variables=["A", "B"])
119127
self.assertIsNotNone(plot)
120128

121129
def test_missing_upsetplot_2(self):
122130
plot = self.missing.missing_upsetplot()
123131
self.assertIsNotNone(plot)
124132

125133

126-
if __name__ == '__main__':
134+
if __name__ == "__main__":
127135
unittest.main()

0 commit comments

Comments
 (0)