Skip to content

Commit 35dd279

Browse files
anopsyFBruzzesi
andauthored
Docstrings API examples (#648)
* Example for `FormulaicTransformer` * Example for `IdentityTransformer` * Example for `PandasTypeSelector` * Example for `InformationFilter` * Example for `RepeatingBasisFunction` * Fix `Examples` keyword in docstring --------- Co-authored-by: Francesco Bruzzesi <[email protected]>
1 parent 5e53190 commit 35dd279

7 files changed

+117
-4
lines changed

sklego/preprocessing/dictmapper.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ class DictMapper(TransformerMixin, BaseEstimator):
2424
dim_ : int
2525
Deprecated, please use `n_features_in_` instead.
2626
27-
Example
28-
-------
27+
Examples
28+
--------
2929
```py
3030
import pandas as pd
3131
from sklego.preprocessing.dictmapper import DictMapper

sklego/preprocessing/formulaictransformer.py

+27
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,33 @@ class FormulaicTransformer(TransformerMixin, BaseEstimator):
3434
The parsed model specification.
3535
n_features_in_ : int
3636
Number of features seen during `fit`.
37+
38+
Examples
39+
--------
40+
```py
41+
import formulaic
42+
import pandas as pd
43+
import numpy as np
44+
from sklego.preprocessing import FormulaicTransformer
45+
46+
df = pd.DataFrame({
47+
'a': ['A', 'B', 'C'],
48+
'b': [0.3, 0.1, 0.2],
49+
})
50+
51+
#default type of returned matrix - numpy
52+
FormulaicTransformer("a + b + a:b").fit_transform(df)
53+
# array([[1. , 0. , 0. , 0.3, 0. , 0. ],
54+
# [1. , 1. , 0. , 0.1, 0.1, 0. ],
55+
# [1. , 0. , 1. , 0.2, 0. , 0.2]])
56+
57+
#pandas return type
58+
FormulaicTransformer("a + b + a:b", "pandas").fit_transform(df)
59+
# Intercept a[T.B] a[T.C] b a[T.B]:b a[T.C]:b
60+
#0 1.0 0 0 0.3 0.0 0.0
61+
#1 1.0 1 0 0.1 0.1 0.0
62+
#2 1.0 0 1 0.2 0.0 0.2
63+
```
3764
"""
3865

3966
def __init__(self, formula, return_type="numpy"):

sklego/preprocessing/identitytransformer.py

+25
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,31 @@ class IdentityTransformer(BaseEstimator, TransformerMixin):
2222
The number of features seen during `fit`.
2323
shape_ : tuple[int, int]
2424
Deprecated, please use `n_samples_` and `n_features_in_` instead.
25+
26+
Examples
27+
--------
28+
```py
29+
import pandas as pd
30+
from sklego.preprocessing import IdentityTransformer
31+
32+
df = pd.DataFrame({
33+
"name": ["Swen", "Victor", "Alex"],
34+
"length": [1.82, 1.85, 1.80],
35+
"shoesize": [42, 44, 45]
36+
})
37+
38+
IdentityTransformer().fit_transform(df)
39+
# name length shoesize
40+
# 0 Swen 1.82 42
41+
# 1 Victor 1.85 44
42+
# 2 Alex 1.80 45
43+
44+
#using check_X=True to validate `X` to be non-empty 2D array of finite values and attempt to cast `X` to float
45+
IdentityTransformer(check_X=True).fit_transform(df.drop(columns="name"))
46+
# array([[ 1.82, 42. ],
47+
# [ 1.85, 44. ],
48+
# [ 1.8 , 45. ]])
49+
```
2550
"""
2651

2752
def __init__(self, check_X: bool = False):

sklego/preprocessing/outlier_remover.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ class OutlierRemover(TrainOnlyTransformerMixin, BaseEstimator):
2121
estimator_ : object
2222
The fitted outlier detector.
2323
24-
Example
25-
-------
24+
Examples
25+
--------
2626
```py
2727
import numpy as np
2828

sklego/preprocessing/pandastransformers.py

+27
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,33 @@ class PandasTypeSelector(BaseEstimator, TransformerMixin):
183183
!!! warning
184184
185185
Raises a `TypeError` if input provided is not a DataFrame.
186+
187+
Examples
188+
--------
189+
```py
190+
import pandas as pd
191+
from sklego.preprocessing import PandasTypeSelector
192+
193+
df = pd.DataFrame({
194+
"name": ["Swen", "Victor", "Alex"],
195+
"length": [1.82, 1.85, 1.80],
196+
"shoesize": [42, 44, 45]
197+
})
198+
199+
#Excluding single column
200+
PandasTypeSelector(exclude="int64").fit_transform(df)
201+
# name length
202+
#0 Swen 1.82
203+
#1 Victor 1.85
204+
#2 Alex 1.80
205+
206+
#Including multiple columns
207+
PandasTypeSelector(include=["int64", "object"]).fit_transform(df)
208+
# name shoesize
209+
#0 Swen 42
210+
#1 Victor 44
211+
#2 Alex 45
212+
```
186213
"""
187214

188215
def __init__(self, include=None, exclude=None):

sklego/preprocessing/projections.py

+18
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,24 @@ class InformationFilter(BaseEstimator, TransformerMixin):
155155
The projection matrix that can be used to filter information out of a dataset.
156156
col_ids_ : List[int] of length `len(columns)`
157157
The list of column ids of the sensitive columns.
158+
159+
Examples
160+
--------
161+
```py
162+
import pandas as pd
163+
from sklego.preprocessing import InformationFilter
164+
165+
df = pd.DataFrame({
166+
"user_id": [101, 102, 103],
167+
"length": [1.82, 1.85, 1.80],
168+
"age": [21, 37, 45]
169+
})
170+
171+
InformationFilter(columns=["length", "age"], alpha=0.5).fit_transform(df)
172+
# array([[50.10152483, 3.87905643],
173+
# [50.26253897, 19.59684308],
174+
# [52.66084873, 28.06719867]])
175+
```
158176
"""
159177

160178
def __init__(self, columns, alpha=1):

sklego/preprocessing/repeatingbasis.py

+16
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,22 @@ class RepeatingBasisFunction(TransformerMixin, BaseEstimator):
4141
----------
4242
pipeline_ : ColumnTransformer
4343
Fitted `ColumnTransformer` object used to transform data with repeating basis functions.
44+
45+
Examples
46+
--------
47+
```py
48+
import pandas as pd
49+
from sklego.preprocessing import RepeatingBasisFunction
50+
51+
df = pd.DataFrame({
52+
"user_id": [101, 102, 103],
53+
"created_day": [5, 1, 7]
54+
})
55+
RepeatingBasisFunction(column="created_day", input_range=(1,7)).fit_transform(df)
56+
# array([[0.06217652, 0.00432024, 0.16901332, 0.89483932, 0.64118039],
57+
# [1. , 0.36787944, 0.01831564, 0.01831564, 0.36787944],
58+
# [1. , 0.36787944, 0.01831564, 0.01831564, 0.36787944]])
59+
```
4460
"""
4561

4662
def __init__(self, column=0, remainder="drop", n_periods=12, input_range=None, width=1.0):

0 commit comments

Comments
 (0)