Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dali/pipeline/pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ class DLL_PUBLIC Pipeline {
DLL_PUBLIC OperatorBase *GetOperator(std::string_view instance_name);

/**
* @brief Rreturns an input graph node with a given name
* @brief Returns an input graph node with a given name
*/
DLL_PUBLIC const graph::OpNode *GetInputOperatorNode(std::string_view name);

Expand Down
2 changes: 1 addition & 1 deletion dali/pipeline/util/bounding_box_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ void ReadBox(Box<ndim, float>& box,

/**
* @brief Write bounding box coordinates to a 1D span of floats, outputing the coordinates in the
* order specied by the provided layout
* order specified by the provided layout
* @remarks Dimension names in the layout can be low (or start) anchors: "xyz", high (or end)
* anchors: "XYZ" or extent "WHD". For example, a layout "xyXY" implies that the bounding box
* coordinates are following the order x_start, y_start, x_end, y_end, while a layout "xyWD" means
Expand Down
119 changes: 111 additions & 8 deletions dali/python/nvidia/dali/experimental/dynamic/_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,21 @@ def as_batch(self):


class Batch:
"""A Batch object.

This class represents a batch of tensors usable with DALI operators. The tensors in the batch
have the same element type, layout and number of dimensions, but can differ in shape.

A Batch can contain:

* a single buffer and shape, owner by DALI, representing consecutive tensors
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* a single buffer and shape, owner by DALI, representing consecutive tensors
* a single buffer and shape, owned by DALI, representing consecutive tensors

* a list of :class:`Tensor` objects.
* a result of a lazy evaluation of a DALI operator.

In case of lazy evaluation, the operations are executed only after an attempt is made to access
the tensor data or properties which cannot be obtained without running the underlying operation.
"""

def __init__(
self,
tensors: Optional[Any] = None,
Expand All @@ -166,7 +181,10 @@ def __init__(
copy: bool = False,
):
"""Constructs a Batch object.
Batch objects should not be constructed directly, use batch or as_batch instead.

.. warning::
Batch objects should not be constructed directly, use :meth:`batch` or
:meth:`as_batch` instead.

The batch object can be created either from an existing object, passed as `tensors` or
from an invocation result.
Expand Down Expand Up @@ -231,7 +249,7 @@ def __init__(

tmp = cast(tmp, dtype=dtype, device=device)
copied = True
self.assign(tmp)
self._assign(tmp)
if self._backend and layout:
self._backend.set_layout(layout)
elif _is_tensor_type(tensors):
Expand Down Expand Up @@ -327,13 +345,13 @@ def __init__(
from . import cast

dev = cast(dev, dtype=dtype, device=device)
self.assign(dev.evaluate())
self._assign(dev.evaluate())
copied = True
else:
if self._dtype is not None and dtype is not None and self._dtype != dtype:
from . import cast

self.assign(cast(self, dtype=dtype, device=device))
self._assign(cast(self, dtype=dtype, device=device))

if _eval_mode.EvalMode.current().value >= _eval_mode.EvalMode.eager.value:
self.evaluate()
Expand All @@ -343,6 +361,17 @@ def _is_external(self) -> bool:

@staticmethod
def broadcast(sample, batch_size: int, device: Optional[Device] = None) -> "Batch":
"""
Creates a batch by repeating a single `sample` `batch_size` times.

This function returns a batch obtained by repeating the sample `sample` `batch_size` times.
Optionally, the result may be placed on the specified device (otherwise it will inherit the
device from the `sample` argument).

This function yields result equivalent to
`as_batch([tensor(sample)] * batch_size, device=device)`
but is much more efficient.
"""
if isinstance(sample, Batch):
raise ValueError("Cannot broadcast a Batch")
if _is_tensor_type(sample):
Expand Down Expand Up @@ -376,6 +405,9 @@ def broadcast(sample, batch_size: int, device: Optional[Device] = None) -> "Batc

@property
def dtype(self) -> DType:
"""
The element type of the tensors in the batch.
"""
if self._dtype is None:
if self._backend is not None:
self._dtype = DType.from_type_id(self._backend.dtype)
Expand All @@ -389,6 +421,9 @@ def dtype(self) -> DType:

@property
def device(self) -> Device:
"""
The device on which the batch resides (or will reside, in case of lazy evaluation).
"""
if self._device is None:
if self._invocation_result is not None:
self._device = self._invocation_result.device
Expand All @@ -400,6 +435,12 @@ def device(self) -> Device:

@property
def layout(self) -> str:
"""
The layout of tensors in the batch.

The "batch dimension" (commonly denoted as N) is not included - a batch of HWC images
will have HWC layout, not NHWC.
"""
if self._layout is None:
if self._invocation_result is not None:
self._layout = self._invocation_result.layout
Expand All @@ -418,6 +459,11 @@ def layout(self) -> str:

@property
def ndim(self) -> int:
"""
The number of dimensions of the samples in the batch.

The "batch dimension" is not included - e.g. a batch of HWC is still a 3D object.
"""
if self._ndim is None:
if self._backend is not None:
self._ndim = self._backend.ndim()
Expand All @@ -431,9 +477,18 @@ def ndim(self) -> int:

@property
def tensors(self):
"""
Returns an indexable list of :class:`Tensor` objects that comprise the batch.
"""
return _TensorList(self)

def to_device(self, device: Device, force_copy: bool = False) -> "Batch":
"""
Returns the data batch on the specified device.

If the batch already resides on the device specified, the function will return `self`
unless a copy is explicitly requested by passing ``force_copy=True``
"""
if device is not None and not isinstance(device, Device):
device = _device(device)
if self.device == device and not force_copy:
Expand All @@ -446,12 +501,20 @@ def to_device(self, device: Device, force_copy: bool = False) -> "Batch":
return ret

def cpu(self) -> "Batch":
"""
Returns the batch on the CPU. If it's already there, this function returns `self`.
"""
return self.to_device(Device("cpu"))

def gpu(self, index: Optional[int] = None) -> "Batch":
"""
Returns the batch on the GPU. If it's already there, this function returns `self`.

If index is not specified, the current CUDA device is used.
"""
return self.to_device(Device("gpu", index))

def assign(self, other: "Batch"):
def _assign(self, other: "Batch"):
if other is self:
return
self._device = other._device
Expand Down Expand Up @@ -492,9 +555,19 @@ def slice(self):
return BatchedSlice(self)

def __iter__(self):
"""
Iterates over tensors in the batch.
"""
return iter(self.tensors)

def select(self, r):
def select(self, sample_range):
"""
Selects a range of samples.

The result of this function is either a `Batch` (if `sample_range` is a `range`, `list`,
or `slice`) or a `Tensor` if `sample_range` is a number.
"""
r = sample_range
if r is ...:
return self
if isinstance(r, slice):
Expand Down Expand Up @@ -533,6 +606,9 @@ def _is_batch(x):

@property
def batch_size(self) -> int:
"""
The number of tensors in the batch.
"""
if self._backend is not None:
return len(self._backend)
elif self._tensors is not None:
Expand All @@ -544,6 +620,23 @@ def batch_size(self) -> int:

@property
def shape(self):
"""
The shape of the batch.

Returns the list of shapes of individual samples.

Example::

```
>>> import nvidia.dali.experimental.dynamic as ndd
>>> import numpy as np
>>> t0 = ndd.tensor(np.zeros((480, 640, 3)))
>>> t1 = ndd.tensor(np.zeros((720, 1280, 1)))
>>> b = ndd.as_batch([t0, t1])
>>> print(b.shape)
[(480, 640, 3), (720, 1280, 1)]
```
"""
if self._invocation_result is not None:
return self._invocation_result.shape
if self._backend is not None:
Expand All @@ -556,8 +649,18 @@ def __str__(self) -> str:
return "Batch(\n" + str(self.evaluate()._backend) + ")"

def evaluate(self):
with _EvalContext.get() as ctx:
if self._backend is None:
"""
Evaluates the underlying lazy expression, if any.

If the batch is a result of a lazy evaluation, calling `evaluate` will cause the expression
to be evaluated. If the batch already contains concrete data, this function has no effect.

The behavior of this function is affected by the current evaluation context and current
device. See :class:`EvalContext` and :class:`Device` for details.
"""
if self._backend is None:
# TODO(michalz): Consider thread-safety
with _EvalContext.get() as ctx:
if self._invocation_result is not None:
self._backend = self._invocation_result.value(ctx)
else:
Expand Down
Loading
Loading