fix(python): dot product of two integer series is cast to float (#15502)

CanglongCl · web-flow · commit ad45545d802f · 2024-04-08T18:24:37.000+02:00
diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py
@@ -4947,7 +4947,7 @@ def round_sig_figs(self, digits: int) -> Series:
         ]
         """
 
-    def dot(self, other: Series | ArrayLike) -> float | None:
+    def dot(self, other: Series | ArrayLike) -> int | float | None:
         """
         Compute the dot/inner product between two Series.
 
diff --git a/py-polars/src/series/mod.rs b/py-polars/src/series/mod.rs
@@ -604,9 +604,30 @@ impl PySeries {
         self.series.shrink_to_fit();
     }
 
-    fn dot(&self, other: &PySeries) -> PyResult<f64> {
-        let out = self.series.dot(&other.series).map_err(PyPolarsErr::from)?;
-        Ok(out)
+    fn dot(&self, other: &PySeries, py: Python) -> PyResult<PyObject> {
+        let lhs_dtype = self.series.dtype();
+        let rhs_dtype = other.series.dtype();
+
+        if !lhs_dtype.is_numeric() {
+            return Err(PyPolarsErr::from(polars_err!(opq = dot, lhs_dtype)).into());
+        };
+        if !rhs_dtype.is_numeric() {
+            return Err(PyPolarsErr::from(polars_err!(opq = dot, rhs_dtype)).into());
+        }
+
+        let result: AnyValue = if lhs_dtype.is_float() || rhs_dtype.is_float() {
+            (&self.series * &other.series)
+                .sum::<f64>()
+                .map_err(PyPolarsErr::from)?
+                .into()
+        } else {
+            (&self.series * &other.series)
+                .sum::<i64>()
+                .map_err(PyPolarsErr::from)?
+                .into()
+        };
+
+        Ok(Wrap(result).into_py(py))
     }
 
     #[cfg(feature = "ipc_streaming")]
diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py
@@ -1407,6 +1407,28 @@ def test_dot_product() -> None:
     assert df["a"].dot(df["b"]) == 20
     assert typing.cast(int, df.select([pl.col("a").dot("b")])[0, "a"]) == 20
 
+    result = pl.Series([1, 2, 3]) @ pl.Series([4, 5, 6])
+    assert isinstance(result, int)
+    assert result == 32
+
+    result = pl.Series([1, 2, 3]) @ pl.Series([4.0, 5.0, 6.0])
+    assert isinstance(result, float)
+    assert result == 32.0
+
+    result = pl.Series([1.0, 2.0, 3.0]) @ pl.Series([4.0, 5.0, 6.0])
+    assert isinstance(result, float)
+    assert result == 32.0
+
+    with pytest.raises(
+        pl.InvalidOperationError, match="`dot` operation not supported for dtype `bool`"
+    ):
+        pl.Series([True, False, False, True]) @ pl.Series([4, 5, 6, 7])
+
+    with pytest.raises(
+        pl.InvalidOperationError, match="`dot` operation not supported for dtype `str`"
+    ):
+        pl.Series([1, 2, 3, 4]) @ pl.Series(["True", "False", "False", "True"])
+
 
 def test_hash_rows() -> None:
     df = pl.DataFrame({"a": [1, 2, 3, 4], "b": [2, 2, 2, 2]})

Original file line number	Diff line number	Diff line change
`@@ -4947,7 +4947,7 @@ def round_sig_figs(self, digits: int) -> Series:`
`4947`	`4947`	`]`
`4948`	`4948`	`"""`
`4949`	`4949`
`4950`		`- def dot(self, other: Series \| ArrayLike) -> float \| None:`
	`4950`	`+ def dot(self, other: Series \| ArrayLike) -> int \| float \| None:`
`4951`	`4951`	`"""`
`4952`	`4952`	`Compute the dot/inner product between two Series.`
`4953`	`4953`