Eventual-Inc
diff --git a/‎Cargo.lock‎
Lines changed: 78 additions & 606 deletions b/‎Cargo.lock‎
Lines changed: 78 additions & 606 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 1 addition & 1 deletion b/‎Cargo.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎daft/dataframe/dataframe.py‎
Lines changed: 54 additions & 1 deletion b/‎daft/dataframe/dataframe.py‎
Lines changed: 54 additions & 1 deletion
diff --git a/‎daft/dataframe/to_torch.py‎
Lines changed: 87 additions & 0 deletions b/‎daft/dataframe/to_torch.py‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎examples/hello/src/lib.rs‎
Lines changed: 4 additions & 13 deletions b/‎examples/hello/src/lib.rs‎
Lines changed: 4 additions & 13 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/common/io-config/src/python.rs‎
Lines changed: 46 additions & 0 deletions b/‎src/common/io-config/src/python.rs‎
Lines changed: 46 additions & 0 deletions
@@ -243,7 +243,6 @@ exclude = [
 
 [workspace.dependencies]
 arrow = "57.1.0"
-half = {version = "2.7.1", features = ["num-traits", "bytemuck", "serde"]}
 arrow-array = {version = "57.1.0", features = ["chrono-tz"]}
 arrow-buffer = "57.1.0"
 arrow-csv = "57.1.0"
@@ -312,6 +311,7 @@ daft-sql = {path = "src/daft-sql"}
 dashmap = "6.1.0"
 educe = "0.6.0"
 futures = "0.3.30"
+half = {version = "2.7.1", features = ["num-traits", "bytemuck", "serde"]}
 hashbrown = "0.16"
 html-escape = "0.2.13"
 image = {version = "0.25.10", default-features = false}
 
@@ -77,6 +77,7 @@
     from daft.catalog.__unity._client import UnityCatalogTable
     from daft.checkpoint import IdempotentCommit
     from daft.convert import ArrowStreamExportable
+    from daft.dataframe.to_torch import DaftTorchDataLoader
     from daft.execution.metadata import ExecutionMetadata
     from daft.io import DataSink
     from daft.io.sink import WriteResultType
@@ -621,7 +622,7 @@ def iter_partitions(
     ) -> Iterator[Union[MicroPartition, "ray.ObjectRef"]]:
         """Begin executing this dataframe and return an iterator over the partitions.
 
-        Each partition will be returned as a daft.recordbatch object (if using Python runner backend)
+        Each partition will be returned as a daft.MicroPartition object (if using Python runner backend)
         or a ray ObjectRef (if using Ray runner backend).
 
         Args:
@@ -5859,6 +5860,58 @@ def to_torch_iter_dataset(
 
         return DaftTorchIterableDataset(df)
 
+    @DataframePublicAPI
+    def to_torch_dataloader(
+        self,
+        batch_size: int = 1,
+        *,
+        pin_memory: bool = False,
+        pin_memory_device: str = "",
+        prefetch_count: int = 0,
+    ) -> "DaftTorchDataLoader":
+        """Return a DataLoader-like iterator that streams batched partitions for PyTorch training.
+
+        Begins execution of the DataFrame when iterated. Each yielded batch is a dict mapping column
+        names to `torch.Tensor` values (or Python lists for non-numeric columns).
+
+        For row-level shuffling, use [``shuffle``][daft.DataFrame.shuffle] or
+        [``sample``][daft.DataFrame.sample] on the DataFrame before calling this method.
+
+        Note:
+            Batch sizing is best-effort. Batches may be smaller than `batch_size`.
+
+        Args:
+            batch_size: Target number of rows per batch.
+            pin_memory: If `True`, pin memory on returned tensors for faster GPU transfer.
+            pin_memory_device: Optional device for pinned memory (PyTorch 2.x).
+            prefetch_count: Number of batches loaded in advance. This will increase memory usage, but can
+            improve throughput.
+
+        Returns:
+            DaftTorchDataLoader: Iterable over batch dicts for use as
+            `for batch in df.to_torch_dataloader(batch_size): ...`
+
+        Examples:
+            >>> import daft
+            >>> import torch  # doctest: +SKIP
+            >>> df = daft.from_pydict({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]})
+            >>> for batch in df.to_torch_dataloader(batch_size=2):  # doctest: +SKIP
+            ...     assert batch["x"].shape == (2,)
+
+        Tip:
+            For the PyTorch `IterableDataset` + `DataLoader` composition, see
+            [``to_torch_iter_dataset``][daft.DataFrame.to_torch_iter_dataset].
+        """
+        from daft.dataframe.to_torch import DaftTorchDataLoader
+
+        return DaftTorchDataLoader(
+            self,
+            batch_size,
+            pin_memory=pin_memory,
+            pin_memory_device=pin_memory_device,
+            prefetch_count=prefetch_count,
+        )
+
     @DataframePublicAPI
     def to_ray_dataset(self) -> "ray.data.dataset.DataSet":
         """Converts the current DataFrame to a [Ray Dataset](https://docs.ray.io/en/latest/data/api/dataset.html#ray.data.Dataset) which is useful for running distributed ML model training in Ray.
 
@@ -3,9 +3,15 @@
 import logging
 from typing import TYPE_CHECKING, Any
 
+from daft.dependencies import np, torch
+from daft.runners.partitioning import MaterializedResult
+
 if TYPE_CHECKING:
     from collections.abc import Iterable, Iterator
 
+    from daft.dataframe.dataframe import DataFrame
+    from daft.recordbatch import MicroPartition
+
 logger = logging.getLogger(__name__)
 
 try:
@@ -47,3 +53,84 @@ def __init__(self, iterable: Iterable[dict[str, Any]]):
 
     def __iter__(self) -> Iterator[dict[str, Any]]:
         return iter(self.iterable)
+
+
+class DaftTorchDataLoader:
+    """Streams batched partitions from a Daft DataFrame and yields PyTorch-ready batch dicts.
+
+    Note:
+        This simulates the behavior of a PyTorch DataLoader, but does not use the DataLoader class itself.
+        If the underlying DataFrame is already materialized, it will reuse the existing data.
+    """
+
+    def __init__(
+        self,
+        df: DataFrame,
+        batch_size: int = 1,
+        *,
+        pin_memory: bool = False,
+        pin_memory_device: str = "",
+        prefetch_count: int = 0,
+        # TODO: Add support for drop_last when we have strict into_batches
+    ) -> None:
+        if batch_size <= 0:
+            raise ValueError("batch_size must be greater than 0")
+
+        self._batch_size = batch_size
+        self._pin_memory = pin_memory
+        self._pin_memory_device = pin_memory_device if pin_memory_device else None
+        self._prefetch_count = prefetch_count
+
+        self._batched_df = df.into_batches(batch_size)
+
+    def __iter__(self) -> Iterator[dict[str, Any]]:
+        from daft.runners import get_or_create_runner
+
+        results = self._batched_df._result
+        if results is not None:
+            for _, mat_result in results.items():
+                batch = self._to_torch_batch(mat_result.micropartition())
+                if batch is not None:
+                    yield batch
+        else:
+            buffer_size = self._prefetch_count if self._prefetch_count > 0 else None
+            partitions_iter: Iterator[MaterializedResult[Any]] = get_or_create_runner().run_iter(
+                self._batched_df._builder, results_buffer_size=buffer_size
+            )
+            for mat_result in partitions_iter:
+                batch = self._to_torch_batch(mat_result.micropartition())
+                if batch is not None:
+                    yield batch
+
+    def _to_torch_batch(self, batch: MicroPartition) -> dict[str, Any]:
+        return {key: self._column_to_tensor(values) for key, values in batch.to_pydict().items()}
+
+    def _column_to_tensor(self, values: list[Any]) -> Any:
+        if len(values) == 0:
+            return self._pin(torch.tensor([]))
+
+        first = values[0]
+
+        if isinstance(first, torch.Tensor):
+            return self._pin(torch.stack(values))
+        if hasattr(first, "__array__") and not isinstance(first, (str, bytes)):
+            if isinstance(first, np.ndarray) and first.ndim > 0:
+                return self._pin(torch.stack([torch.as_tensor(v) for v in values]))
+            return self._pin(torch.as_tensor(values))
+        if isinstance(first, (bool, int, float)):
+            return self._pin(torch.as_tensor(values))
+
+        return values
+
+    def _pin(self, tensor: torch.Tensor) -> torch.Tensor:
+        if not self._pin_memory:
+            return tensor
+
+        # Pinned host memory is only used for async CPU -> CUDA copies.
+        if not torch.cuda.is_available():
+            return tensor
+
+        # If a specific device is provided, use it. Otherwise, use the default device.
+        if self._pin_memory_device:
+            return tensor.pin_memory(device=self._pin_memory_device)
+        return tensor.pin_memory()
@@ -1,6 +1,6 @@
 use std::{ffi::CStr, sync::Arc};
 
-use arrow_array::{Array, ArrayRef, builder::StringBuilder, cast::AsArray};
+use arrow_array::{Array, ArrayRef};
 use arrow_schema::{DataType, Field};
 use daft_ext::{daft_extension, prelude::*};
 
@@ -18,18 +18,9 @@ impl DaftExtension for HelloExtension {
 
 // ── Scalar Function ────────────────────────────────────────────────
 
-#[daft_func_batch(return_dtype = DataType::Utf8)]
-fn greet(input: ArrayRef) -> DaftResult<ArrayRef> {
-    let names = input.as_string::<i64>();
-    let mut builder = StringBuilder::with_capacity(names.len(), names.len() * 16);
-    for i in 0..names.len() {
-        if names.is_null(i) {
-            builder.append_null();
-        } else {
-            builder.append_value(format!("Hello, {}!", names.value(i)));
-        }
-    }
-    Ok(Arc::new(builder.finish()))
+#[daft_func]
+fn greet(name: &str) -> String {
+    format!("Hello, {}!", name)
 }
 
 // ── Aggregate Function ─────────────────────────────────────────────
 
@@ -47,9 +47,9 @@ openai = ["openai<2.39.0", "numpy<2.4.0", "pillow==12.2.0"]
 pandas = ["pandas<2.4.0"]
 postgres = ["psycopg[binary]<3.4.0", "pgvector<0.5.0", "sqlglot<30.9.0", "connectorx>=0.4.4,<0.5.0"]
 ray = [
+  # floor is 2.11: flotilla uses ray.exceptions.ActorUnavailableError, added in ray 2.11
   # Inherit existing Ray version. Get the "default" extra for the Ray dashboard.
-  'ray[data, client]<2.56.0,>=2.11.0; platform_system != "Windows"',
-  'ray[data, client]>=2.11.0,<2.56.0; platform_system == "Windows"'  # floor is 2.11: flotilla uses ray.exceptions.ActorUnavailableError, added in ray 2.11
+  'ray[data, client]<2.56.0,>=2.11.0'
 ]
 transformers = ["transformers<5.10.0", "sentence-transformers<5.6.0", "torch<2.13.0", "torchvision<0.28.0", "pillow==12.2.0"]
 # connectorx 0.4.4 is needed for pgvector support.
 
@@ -1548,6 +1548,30 @@ impl HuggingFaceConfig {
     }
 }
 
+impl TosConfig {
+    const MULTIPART_SETTING_IGNORED_MSG: &str = "TosConfig multipart settings are no longer used; TOS I/O is now handled by OpenDAL, which manages multipart uploads internally.";
+
+    fn warn_multipart_settings(
+        multipart_size: Option<u64>,
+        multipart_max_concurrency: Option<u32>,
+    ) {
+        if let Some(multipart_size) = multipart_size {
+            log::warn!(
+                "{} Ignoring multipart_size={}.",
+                Self::MULTIPART_SETTING_IGNORED_MSG,
+                multipart_size
+            );
+        }
+        if let Some(multipart_max_concurrency) = multipart_max_concurrency {
+            log::warn!(
+                "{} Ignoring multipart_max_concurrency={}.",
+                Self::MULTIPART_SETTING_IGNORED_MSG,
+                multipart_max_concurrency
+            );
+        }
+    }
+}
+
 #[pymethods]
 impl TosConfig {
     #[allow(clippy::too_many_arguments)]
@@ -1584,6 +1608,8 @@ impl TosConfig {
         multipart_size: Option<u64>,
         multipart_max_concurrency: Option<u32>,
     ) -> PyResult<Self> {
+        Self::warn_multipart_settings(multipart_size, multipart_max_concurrency);
+
         let def = crate::TosConfig::default();
         Ok(Self {
             config: crate::TosConfig {
@@ -1644,6 +1670,8 @@ impl TosConfig {
         multipart_size: Option<u64>,
         multipart_max_concurrency: Option<u32>,
     ) -> PyResult<Self> {
+        Self::warn_multipart_settings(multipart_size, multipart_max_concurrency);
+
         Ok(Self {
             config: crate::TosConfig {
                 region: region.or_else(|| self.config.region.clone()),
@@ -1742,10 +1770,28 @@ impl TosConfig {
         Ok(self.config.multipart_size)
     }
 
+    #[setter]
+    pub fn set_multipart_size(&mut self, multipart_size: u64) {
+        log::warn!(
+            "{} Ignoring multipart_size={multipart_size}.",
+            Self::MULTIPART_SETTING_IGNORED_MSG
+        );
+        self.config.multipart_size = multipart_size;
+    }
+
     #[getter]
     pub fn multipart_max_concurrency(&self) -> PyResult<u32> {
         Ok(self.config.multipart_max_concurrency)
     }
+
+    #[setter]
+    pub fn set_multipart_max_concurrency(&mut self, multipart_max_concurrency: u32) {
+        log::warn!(
+            "{} Ignoring multipart_max_concurrency={multipart_max_concurrency}.",
+            Self::MULTIPART_SETTING_IGNORED_MSG
+        );
+        self.config.multipart_max_concurrency = multipart_max_concurrency;
+    }
 }
 
 #[pymethods]