narwhals-dev
diff --git a/‎.github/workflows/mkdocs.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/mkdocs.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎narwhals/_spark_like/dataframe.py
Lines changed: 59 additions & 29 deletions b/‎narwhals/_spark_like/dataframe.py
Lines changed: 59 additions & 29 deletions
@@ -29,6 +29,6 @@ jobs:
       - name: griffe
         # hopefully temporary until https://github.com/mkdocstrings/mkdocstrings/issues/716
         run: pip install git+https://github.com/MarcoGorelli/griffe.git@no-overloads
-      - run: pip install -e .[docs,pyspark,dask,duckdb]
+      - run: pip install -e .[docs,dask,duckdb]
 
       - run: mkdocs gh-deploy --force
@@ -6,9 +6,6 @@
 from typing import Literal
 from typing import Sequence
 
-from pyspark.sql import Window
-from pyspark.sql import functions as F  # noqa: N812
-
 from narwhals._spark_like.utils import ExprKind
 from narwhals._spark_like.utils import native_to_narwhals_dtype
 from narwhals._spark_like.utils import parse_exprs_and_named_exprs
@@ -40,38 +37,73 @@ def __init__(
         *,
         backend_version: tuple[int, ...],
         version: Version,
+        implementation: Implementation,
     ) -> None:
         self._native_frame = native_dataframe
         self._backend_version = backend_version
-        self._implementation = Implementation.PYSPARK
+        self._implementation = implementation
         self._version = version
         validate_backend_version(self._implementation, self._backend_version)
 
-    def __native_namespace__(self: Self) -> ModuleType:  # pragma: no cover
-        if self._implementation is Implementation.PYSPARK:
-            return self._implementation.to_native_namespace()
+    @property
+    def _F(self) -> Any:  # noqa: N802
+        if self._implementation is Implementation.SQLFRAME:
+            from sqlframe.duckdb import functions
+
+            return functions
+        from pyspark.sql import functions
+
+        return functions
+
+    @property
+    def _native_dtypes(self) -> Any:
+        if self._implementation is Implementation.SQLFRAME:
+            from sqlframe.duckdb import types
+
+            return types
+        from pyspark.sql import types
+
+        return types
+
+    @property
+    def _Window(self) -> Any:  # noqa: N802
+        if self._implementation is Implementation.SQLFRAME:
+            from sqlframe.duckdb import Window
 
-        msg = f"Expected pyspark, got: {type(self._implementation)}"  # pragma: no cover
-        raise AssertionError(msg)
+            return Window
+        from pyspark.sql import Window
+
+        return Window
+
+    def __native_namespace__(self: Self) -> ModuleType:  # pragma: no cover
+        return self._implementation.to_native_namespace()
 
     def __narwhals_namespace__(self: Self) -> SparkLikeNamespace:
         from narwhals._spark_like.namespace import SparkLikeNamespace
 
         return SparkLikeNamespace(
-            backend_version=self._backend_version, version=self._version
+            backend_version=self._backend_version,
+            version=self._version,
+            implementation=self._implementation,
         )
 
     def __narwhals_lazyframe__(self: Self) -> Self:
         return self
 
     def _change_version(self: Self, version: Version) -> Self:
         return self.__class__(
-            self._native_frame, backend_version=self._backend_version, version=version
+            self._native_frame,
+            backend_version=self._backend_version,
+            version=version,
+            implementation=self._implementation,
         )
 
     def _from_native_frame(self: Self, df: DataFrame) -> Self:
         return self.__class__(
-            df, backend_version=self._backend_version, version=self._version
+            df,
+            backend_version=self._backend_version,
+            version=self._version,
+            implementation=self._implementation,
         )
 
     @property
@@ -102,10 +134,10 @@ def select(
 
         if not new_columns:
             # return empty dataframe, like Polars does
-            from pyspark.sql.types import StructType
-
             spark_session = self._native_frame.sparkSession
-            spark_df = spark_session.createDataFrame([], StructType([]))
+            spark_df = spark_session.createDataFrame(
+                [], self._native_dtypes.StructType([])
+            )
 
             return self._from_native_frame(spark_df)
 
@@ -116,7 +148,7 @@ def select(
             return self._from_native_frame(self._native_frame.agg(*new_columns_list))
         else:
             new_columns_list = [
-                col.over(Window.partitionBy(F.lit(1))).alias(col_name)
+                col.over(self._Window().partitionBy(self._F.lit(1))).alias(col_name)
                 if expr_kind is ExprKind.AGGREGATION
                 else col.alias(col_name)
                 for (col_name, col), expr_kind in zip(new_columns.items(), expr_kinds)
@@ -131,7 +163,7 @@ def with_columns(
         new_columns, expr_kinds = parse_exprs_and_named_exprs(self, *exprs, **named_exprs)
 
         new_columns_map = {
-            col_name: col.over(Window.partitionBy(F.lit(1)))
+            col_name: col.over(self._Window().partitionBy(self._F.lit(1)))
             if expr_kind is ExprKind.AGGREGATION
             else col
             for (col_name, col), expr_kind in zip(new_columns.items(), expr_kinds)
@@ -152,7 +184,9 @@ def filter(self: Self, *predicates: SparkLikeExpr, **constraints: Any) -> Self:
     def schema(self: Self) -> dict[str, DType]:
         return {
             field.name: native_to_narwhals_dtype(
-                dtype=field.dataType, version=self._version
+                dtype=field.dataType,
+                version=self._version,
+                spark_types=self._native_dtypes,
             )
             for field in self._native_frame.schema
         }
@@ -186,18 +220,18 @@ def sort(
         descending: bool | Sequence[bool],
         nulls_last: bool,
     ) -> Self:
-        import pyspark.sql.functions as F  # noqa: N812
-
         if isinstance(descending, bool):
             descending = [descending] * len(by)
 
         if nulls_last:
             sort_funcs = (
-                F.desc_nulls_last if d else F.asc_nulls_last for d in descending
+                self._F.desc_nulls_last if d else self._F.asc_nulls_last
+                for d in descending
             )
         else:
             sort_funcs = (
-                F.desc_nulls_first if d else F.asc_nulls_first for d in descending
+                self._F.desc_nulls_first if d else self._F.asc_nulls_first
+                for d in descending
             )
 
         sort_cols = [sort_f(col) for col, sort_f in zip(by, sort_funcs)]
@@ -207,14 +241,12 @@ def drop_nulls(self: Self, subset: list[str] | None) -> Self:
         return self._from_native_frame(self._native_frame.dropna(subset=subset))
 
     def rename(self: Self, mapping: dict[str, str]) -> Self:
-        import pyspark.sql.functions as F  # noqa: N812
-
         rename_mapping = {
             colname: mapping.get(colname, colname) for colname in self.columns
         }
         return self._from_native_frame(
             self._native_frame.select(
-                [F.col(old).alias(new) for old, new in rename_mapping.items()]
+                [self._F.col(old).alias(new) for old, new in rename_mapping.items()]
             )
         )
 
@@ -238,8 +270,6 @@ def join(
         right_on: str | list[str] | None,
         suffix: str,
     ) -> Self:
-        import pyspark.sql.functions as F  # noqa: N812
-
         self_native = self._native_frame
         other_native = other._native_frame
 
@@ -262,7 +292,7 @@ def join(
             },
         }
         other = other_native.select(
-            [F.col(old).alias(new) for old, new in rename_mapping.items()]
+            [self._F.col(old).alias(new) for old, new in rename_mapping.items()]
         )
 
         # If how in {"semi", "anti"}, then resulting columns are same as left columns
@@ -280,5 +310,5 @@ def join(
             )
 
         return self._from_native_frame(
-            self_native.join(other=other, on=left_on, how=how).select(col_order)
+            self_native.join(other, on=left_on, how=how).select(col_order)
         )