@@ -9975,81 +9975,97 @@ def value_counts(
99759975 normalize : bool = False ,
99769976 ) -> Expr :
99779977 """
9978- Count the occurrences of unique values.
9978+ Count the occurrence of unique values.
99799979
99809980 Parameters
99819981 ----------
99829982 sort
9983- Sort the output by count in descending order.
9984- If set to `False` (default), the order of the output is random .
9983+ Sort the output by count, in descending order.
9984+ If set to `False` (default), the order is non-deterministic .
99859985 parallel
99869986 Execute the computation in parallel.
99879987
99889988 .. note::
9989- This option should likely not be enabled in a group by context,
9990- as the computation is already parallelized per group.
9989+ This option should likely * not* be enabled in a `group_by` context,
9990+ as the computation will already be parallelized per group.
99919991 name
9992- Give the resulting count column a specific name;
9993- if `normalize` is True defaults to "proportion",
9994- otherwise defaults to "count".
9992+ Give the resulting count column a specific name; if `normalize` is
9993+ True this defaults to "proportion", otherwise defaults to "count".
99959994 normalize
9996- If true gives relative frequencies of the unique values
9995+ If True, the count is returned as the relative frequency of unique
9996+ values normalized to 1.0.
99979997
99989998 Returns
99999999 -------
1000010000 Expr
10001- Expression of data type :class:`Struct` with mapping of unique values to
10002- their count.
10001+ Expression of type :class:`Struct`, mapping unique values to their
10002+ count (or proportion) .
1000310003
1000410004 Examples
1000510005 --------
1000610006 >>> df = pl.DataFrame(
1000710007 ... {"color": ["red", "blue", "red", "green", "blue", "blue"]}
1000810008 ... )
10009- >>> df.select(pl.col("color").value_counts()) # doctest: +IGNORE_RESULT
10009+ >>> df_count = df.select(pl.col("color").value_counts())
10010+ >>> df_count # doctest: +IGNORE_RESULT
1001010011 shape: (3, 1)
1001110012 ┌─────────────┐
1001210013 │ color │
1001310014 │ --- │
1001410015 │ struct[2] │
1001510016 ╞═════════════╡
10016- │ {"red",2} │
1001710017 │ {"green",1} │
1001810018 │ {"blue",3} │
10019+ │ {"red",2} │
1001910020 └─────────────┘
1002010021
10021- Sort the output by (descending) count and customize the count field name.
10022+ >>> df_count.unnest("color") # doctest: +IGNORE_RESULT
10023+ shape: (3, 2)
10024+ ┌───────┬───────┐
10025+ │ color ┆ count │
10026+ │ --- ┆ --- │
10027+ │ str ┆ u32 │
10028+ ╞═══════╪═══════╡
10029+ │ green ┆ 1 │
10030+ │ blue ┆ 3 │
10031+ │ red ┆ 2 │
10032+ └───────┴───────┘
1002210033
10023- >>> df = df.select(pl.col("color").value_counts(sort=True, name="n"))
10024- >>> df
10025- shape: (3, 1)
10026- ┌─────────────┐
10027- │ color │
10028- │ --- │
10029- │ struct[2] │
10030- ╞═════════════╡
10031- │ {"blue",3} │
10032- │ {"red",2} │
10033- │ {"green",1} │
10034- └─────────────┘
10034+ Sort the output by (descending) count, customize the field name,
10035+ and normalize the count to its relative proportion (of 1.0).
1003510036
10036- >>> df.unnest("color")
10037+ >>> df_count = df.select(
10038+ ... pl.col("color").value_counts(
10039+ ... name="fraction",
10040+ ... normalize=True,
10041+ ... sort=True,
10042+ ... )
10043+ ... )
10044+ >>> df_count
10045+ shape: (3, 1)
10046+ ┌────────────────────┐
10047+ │ color │
10048+ │ --- │
10049+ │ struct[2] │
10050+ ╞════════════════════╡
10051+ │ {"blue",0.5} │
10052+ │ {"red",0.333333} │
10053+ │ {"green",0.166667} │
10054+ └────────────────────┘
10055+
10056+ >>> df_count.unnest("color")
1003710057 shape: (3, 2)
10038- ┌───────┬─────┐
10039- │ color ┆ n │
10040- │ --- ┆ --- │
10041- │ str ┆ u32 │
10042- ╞═══════╪═════╡
10043- │ blue ┆ 3 │
10044- │ red ┆ 2 │
10045- │ green ┆ 1 │
10046- └───────┴─────┘
10047- """
10048- if name is None :
10049- if normalize :
10050- name = "proportion"
10051- else :
10052- name = "count"
10058+ ┌───────┬──────────┐
10059+ │ color ┆ fraction │
10060+ │ --- ┆ --- │
10061+ │ str ┆ f64 │
10062+ ╞═══════╪══════════╡
10063+ │ blue ┆ 0.5 │
10064+ │ red ┆ 0.333333 │
10065+ │ green ┆ 0.166667 │
10066+ └───────┴──────────┘
10067+ """
10068+ name = name or ("proportion" if normalize else "count" )
1005310069 return self ._from_pyexpr (
1005410070 self ._pyexpr .value_counts (sort , parallel , name , normalize )
1005510071 )
0 commit comments