Skip to content

Commit 982c10e

Browse files
braaanniganLiam Brannigan
andauthored
docs(python): Docstrings for frame, lazyframe and time series (#5398)
Co-authored-by: Liam Brannigan <l.brannigan@analyticsengines.com>
1 parent ad678ca commit 982c10e

File tree

4 files changed

+520
-0
lines changed

4 files changed

+520
-0
lines changed

py-polars/polars/internals/dataframe/frame.py

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3881,6 +3881,42 @@ def join(
38813881
│ 3 ┆ 8.0 ┆ c ┆ null │
38823882
└──────┴──────┴─────┴───────┘
38833883
3884+
>>> df.join(other_df, on="ham", how="left")
3885+
shape: (3, 4)
3886+
┌─────┬─────┬─────┬───────┐
3887+
│ foo ┆ bar ┆ ham ┆ apple │
3888+
│ --- ┆ --- ┆ --- ┆ --- │
3889+
│ i64 ┆ f64 ┆ str ┆ str │
3890+
╞═════╪═════╪═════╪═══════╡
3891+
│ 1 ┆ 6.0 ┆ a ┆ x │
3892+
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
3893+
│ 2 ┆ 7.0 ┆ b ┆ y │
3894+
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
3895+
│ 3 ┆ 8.0 ┆ c ┆ null │
3896+
└─────┴─────┴─────┴───────┘
3897+
3898+
>>> df.join(other_df, on="ham", how="semi")
3899+
shape: (2, 3)
3900+
┌─────┬─────┬─────┐
3901+
│ foo ┆ bar ┆ ham │
3902+
│ --- ┆ --- ┆ --- │
3903+
│ i64 ┆ f64 ┆ str │
3904+
╞═════╪═════╪═════╡
3905+
│ 1 ┆ 6.0 ┆ a │
3906+
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
3907+
│ 2 ┆ 7.0 ┆ b │
3908+
└─────┴─────┴─────┘
3909+
3910+
>>> df.join(other_df, on="ham", how="anti")
3911+
shape: (1, 3)
3912+
┌─────┬─────┬─────┐
3913+
│ foo ┆ bar ┆ ham │
3914+
│ --- ┆ --- ┆ --- │
3915+
│ i64 ┆ f64 ┆ str │
3916+
╞═════╪═════╪═════╡
3917+
│ 3 ┆ 8.0 ┆ c │
3918+
└─────┴─────┴─────┘
3919+
38843920
Notes
38853921
-----
38863922
For joining on columns with categorical data, see ``pl.StringCache()``.
@@ -4467,6 +4503,53 @@ def fill_null(
44674503
├╌╌╌╌╌┼╌╌╌╌╌╌┤
44684504
│ 4 ┆ 13.0 │
44694505
└─────┴──────┘
4506+
>>> df.fill_null(strategy="forward")
4507+
shape: (4, 2)
4508+
┌─────┬──────┐
4509+
│ a ┆ b │
4510+
│ --- ┆ --- │
4511+
│ i64 ┆ f64 │
4512+
╞═════╪══════╡
4513+
│ 1 ┆ 0.5 │
4514+
├╌╌╌╌╌┼╌╌╌╌╌╌┤
4515+
│ 2 ┆ 4.0 │
4516+
├╌╌╌╌╌┼╌╌╌╌╌╌┤
4517+
│ 2 ┆ 4.0 │
4518+
├╌╌╌╌╌┼╌╌╌╌╌╌┤
4519+
│ 4 ┆ 13.0 │
4520+
└─────┴──────┘
4521+
4522+
>>> df.fill_null(strategy="max")
4523+
shape: (4, 2)
4524+
┌─────┬──────┐
4525+
│ a ┆ b │
4526+
│ --- ┆ --- │
4527+
│ i64 ┆ f64 │
4528+
╞═════╪══════╡
4529+
│ 1 ┆ 0.5 │
4530+
├╌╌╌╌╌┼╌╌╌╌╌╌┤
4531+
│ 2 ┆ 4.0 │
4532+
├╌╌╌╌╌┼╌╌╌╌╌╌┤
4533+
│ 4 ┆ 13.0 │
4534+
├╌╌╌╌╌┼╌╌╌╌╌╌┤
4535+
│ 4 ┆ 13.0 │
4536+
└─────┴──────┘
4537+
4538+
>>> df.fill_null(strategy="zero")
4539+
shape: (4, 2)
4540+
┌─────┬──────┐
4541+
│ a ┆ b │
4542+
│ --- ┆ --- │
4543+
│ i64 ┆ f64 │
4544+
╞═════╪══════╡
4545+
│ 1 ┆ 0.5 │
4546+
├╌╌╌╌╌┼╌╌╌╌╌╌┤
4547+
│ 2 ┆ 4.0 │
4548+
├╌╌╌╌╌┼╌╌╌╌╌╌┤
4549+
│ 0 ┆ 0.0 │
4550+
├╌╌╌╌╌┼╌╌╌╌╌╌┤
4551+
│ 4 ┆ 13.0 │
4552+
└─────┴──────┘
44704553
44714554
"""
44724555
return self._from_pydf(
@@ -4983,6 +5066,33 @@ def partition_by(
49835066
╞═════╪═════╪═════╡
49845067
│ C ┆ 2 ┆ l │
49855068
└─────┴─────┴─────┘]
5069+
>>> df.partition_by(groups="foo", maintain_order=True, as_dict=True)
5070+
{'A': shape: (2, 3)
5071+
┌─────┬─────┬─────┐
5072+
│ foo ┆ N ┆ bar │
5073+
│ --- ┆ --- ┆ --- │
5074+
│ str ┆ i64 ┆ str │
5075+
╞═════╪═════╪═════╡
5076+
│ A ┆ 1 ┆ k │
5077+
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
5078+
│ A ┆ 2 ┆ l │
5079+
└─────┴─────┴─────┘, 'B': shape: (2, 3)
5080+
┌─────┬─────┬─────┐
5081+
│ foo ┆ N ┆ bar │
5082+
│ --- ┆ --- ┆ --- │
5083+
│ str ┆ i64 ┆ str │
5084+
╞═════╪═════╪═════╡
5085+
│ B ┆ 2 ┆ m │
5086+
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
5087+
│ B ┆ 4 ┆ m │
5088+
└─────┴─────┴─────┘, 'C': shape: (1, 3)
5089+
┌─────┬─────┬─────┐
5090+
│ foo ┆ N ┆ bar │
5091+
│ --- ┆ --- ┆ --- │
5092+
│ str ┆ i64 ┆ str │
5093+
╞═════╪═════╪═════╡
5094+
│ C ┆ 2 ┆ l │
5095+
└─────┴─────┴─────┘}
49865096
49875097
"""
49885098
if isinstance(groups, str):
@@ -5218,6 +5328,62 @@ def select(
52185328
│ 3 │
52195329
└─────┘
52205330
5331+
>>> df.select(["foo", "bar"])
5332+
shape: (3, 2)
5333+
┌─────┬─────┐
5334+
│ foo ┆ bar │
5335+
│ --- ┆ --- │
5336+
│ i64 ┆ i64 │
5337+
╞═════╪═════╡
5338+
│ 1 ┆ 6 │
5339+
├╌╌╌╌╌┼╌╌╌╌╌┤
5340+
│ 2 ┆ 7 │
5341+
├╌╌╌╌╌┼╌╌╌╌╌┤
5342+
│ 3 ┆ 8 │
5343+
└─────┴─────┘
5344+
5345+
>>> df.select(pl.col("foo") + 1)
5346+
shape: (3, 1)
5347+
┌─────┐
5348+
│ foo │
5349+
│ --- │
5350+
│ i64 │
5351+
╞═════╡
5352+
│ 2 │
5353+
├╌╌╌╌╌┤
5354+
│ 3 │
5355+
├╌╌╌╌╌┤
5356+
│ 4 │
5357+
└─────┘
5358+
5359+
>>> df.select([pl.col("foo") + 1, pl.col("bar") + 1])
5360+
shape: (3, 2)
5361+
┌─────┬─────┐
5362+
│ foo ┆ bar │
5363+
│ --- ┆ --- │
5364+
│ i64 ┆ i64 │
5365+
╞═════╪═════╡
5366+
│ 2 ┆ 7 │
5367+
├╌╌╌╌╌┼╌╌╌╌╌┤
5368+
│ 3 ┆ 8 │
5369+
├╌╌╌╌╌┼╌╌╌╌╌┤
5370+
│ 4 ┆ 9 │
5371+
└─────┴─────┘
5372+
5373+
>>> df.select(pl.when(pl.col("foo") > 2).then(10).otherwise(0))
5374+
shape: (3, 1)
5375+
┌─────────┐
5376+
│ literal │
5377+
│ --- │
5378+
│ i64 │
5379+
╞═════════╡
5380+
│ 0 │
5381+
├╌╌╌╌╌╌╌╌╌┤
5382+
│ 0 │
5383+
├╌╌╌╌╌╌╌╌╌┤
5384+
│ 10 │
5385+
└─────────┘
5386+
52215387
"""
52225388
return self._from_pydf(
52235389
self.lazy()
@@ -5338,6 +5504,8 @@ def n_chunks(self, strategy: str = "first") -> int | list[int]:
53385504
... )
53395505
>>> df.n_chunks()
53405506
1
5507+
>>> df.n_chunks(strategy="all")
5508+
[1, 1, 1]
53415509
53425510
"""
53435511
if strategy == "first":
@@ -5495,6 +5663,14 @@ def sum(
54955663
╞═════╪═════╪══════╡
54965664
│ 6 ┆ 21 ┆ null │
54975665
└─────┴─────┴──────┘
5666+
>>> df.sum(axis=1)
5667+
shape: (3,)
5668+
Series: 'foo' [str]
5669+
[
5670+
"16a"
5671+
"27b"
5672+
"38c"
5673+
]
54985674
54995675
"""
55005676
if axis == 0:
@@ -5564,6 +5740,23 @@ def mean(
55645740
│ 2.0 ┆ 7.0 ┆ null │
55655741
└─────┴─────┴──────┘
55665742
5743+
Note: a PanicException is raised with axis = 1 and a string column.
5744+
5745+
>>> df = pl.DataFrame(
5746+
... {
5747+
... "foo": [1, 2, 3],
5748+
... "bar": [6, 7, 8],
5749+
... }
5750+
... )
5751+
>>> df.mean(axis=1)
5752+
shape: (3,)
5753+
Series: 'foo' [f64]
5754+
[
5755+
3.5
5756+
4.5
5757+
5.5
5758+
]
5759+
55675760
Note: the mean of booleans evaluates to null.
55685761
55695762
>>> df = pl.DataFrame(
@@ -5628,6 +5821,15 @@ def std(self: DF, ddof: int = 1) -> DF:
56285821
╞═════╪═════╪══════╡
56295822
│ 1.0 ┆ 1.0 ┆ null │
56305823
└─────┴─────┴──────┘
5824+
>>> df.std(ddof=0)
5825+
shape: (1, 3)
5826+
┌──────────┬──────────┬──────┐
5827+
│ foo ┆ bar ┆ ham │
5828+
│ --- ┆ --- ┆ --- │
5829+
│ f64 ┆ f64 ┆ str │
5830+
╞══════════╪══════════╪══════╡
5831+
│ 0.816497 ┆ 0.816497 ┆ null │
5832+
└──────────┴──────────┴──────┘
56315833
56325834
"""
56335835
return self._from_pydf(self._df.std(ddof))
@@ -5659,6 +5861,15 @@ def var(self: DF, ddof: int = 1) -> DF:
56595861
╞═════╪═════╪══════╡
56605862
│ 1.0 ┆ 1.0 ┆ null │
56615863
└─────┴─────┴──────┘
5864+
>>> df.var(ddof=0)
5865+
shape: (1, 3)
5866+
┌──────────┬──────────┬──────┐
5867+
│ foo ┆ bar ┆ ham │
5868+
│ --- ┆ --- ┆ --- │
5869+
│ f64 ┆ f64 ┆ str │
5870+
╞══════════╪══════════╪══════╡
5871+
│ 0.666667 ┆ 0.666667 ┆ null │
5872+
└──────────┴──────────┴──────┘
56625873
56635874
"""
56645875
return self._from_pydf(self._df.var(ddof))

py-polars/polars/internals/expr/datetime.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ def truncate(
2727
"""
2828
Divide the date/datetime range into buckets.
2929
30+
Each date/datetime is mapped to the start of its bucket.
31+
3032
Parameters
3133
----------
3234
every
@@ -111,6 +113,31 @@ def truncate(
111113
... )
112114
True
113115
116+
>>> start = datetime(2001, 1, 1)
117+
>>> stop = datetime(2001, 1, 1, 1)
118+
>>> df = pl.date_range(start, stop, "10m", name="dates").to_frame()
119+
>>> df.select(["dates", pl.col("dates").dt.truncate("30m").alias("truncate")])
120+
shape: (7, 2)
121+
┌─────────────────────┬─────────────────────┐
122+
│ dates ┆ truncate │
123+
│ --- ┆ --- │
124+
│ datetime[μs] ┆ datetime[μs] │
125+
╞═════════════════════╪═════════════════════╡
126+
│ 2001-01-01 00:00:00 ┆ 2001-01-01 00:00:00 │
127+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
128+
│ 2001-01-01 00:10:00 ┆ 2001-01-01 00:00:00 │
129+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
130+
│ 2001-01-01 00:20:00 ┆ 2001-01-01 00:00:00 │
131+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
132+
│ 2001-01-01 00:30:00 ┆ 2001-01-01 00:30:00 │
133+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
134+
│ 2001-01-01 00:40:00 ┆ 2001-01-01 00:30:00 │
135+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
136+
│ 2001-01-01 00:50:00 ┆ 2001-01-01 00:30:00 │
137+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
138+
│ 2001-01-01 01:00:00 ┆ 2001-01-01 01:00:00 │
139+
└─────────────────────┴─────────────────────┘
140+
114141
"""
115142
if offset is None:
116143
offset = "0ns"
@@ -128,6 +155,11 @@ def round(
128155
"""
129156
Divide the date/datetime range into buckets.
130157
158+
Each date/datetime in the first half of the interval
159+
is mapped to the start of its bucket.
160+
Each date/datetime in the seconod half of the interval
161+
is mapped to the end of its bucket.
162+
131163
Parameters
132164
----------
133165
every
@@ -217,6 +249,31 @@ def round(
217249
... )
218250
True
219251
252+
>>> start = datetime(2001, 1, 1)
253+
>>> stop = datetime(2001, 1, 1, 1)
254+
>>> df = pl.date_range(start, stop, "10m", name="dates").to_frame()
255+
>>> df.select(["dates", pl.col("dates").dt.round("30m").alias("round")])
256+
shape: (7, 2)
257+
┌─────────────────────┬─────────────────────┐
258+
│ dates ┆ round │
259+
│ --- ┆ --- │
260+
│ datetime[μs] ┆ datetime[μs] │
261+
╞═════════════════════╪═════════════════════╡
262+
│ 2001-01-01 00:00:00 ┆ 2001-01-01 00:00:00 │
263+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
264+
│ 2001-01-01 00:10:00 ┆ 2001-01-01 00:00:00 │
265+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
266+
│ 2001-01-01 00:20:00 ┆ 2001-01-01 00:30:00 │
267+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
268+
│ 2001-01-01 00:30:00 ┆ 2001-01-01 00:30:00 │
269+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
270+
│ 2001-01-01 00:40:00 ┆ 2001-01-01 00:30:00 │
271+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
272+
│ 2001-01-01 00:50:00 ┆ 2001-01-01 01:00:00 │
273+
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
274+
│ 2001-01-01 01:00:00 ┆ 2001-01-01 01:00:00 │
275+
└─────────────────────┴─────────────────────┘
276+
220277
"""
221278
if offset is None:
222279
offset = "0ns"

0 commit comments

Comments
 (0)