From d47d6d848ec670da6c6ee0dc43e5f7491aed39b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Ordyna?=
Date: Fri, 31 Jan 2025 17:54:36 +0100
Subject: [PATCH 01/24] Add Code from openpmd_scipp repository
https://github.com/pordyna/openpmd_scipp
---
.../python/openpmd_api/scipp/__init__.py | 13 ++
.../python/openpmd_api/scipp/loader.py | 150 +++++++++++++
.../python/openpmd_api/scipp/mesh_loader.py | 204 ++++++++++++++++++
src/binding/python/openpmd_api/scipp/utils.py | 86 ++++++++
4 files changed, 453 insertions(+)
create mode 100644 src/binding/python/openpmd_api/scipp/__init__.py
create mode 100644 src/binding/python/openpmd_api/scipp/loader.py
create mode 100644 src/binding/python/openpmd_api/scipp/mesh_loader.py
create mode 100644 src/binding/python/openpmd_api/scipp/utils.py
diff --git a/src/binding/python/openpmd_api/scipp/__init__.py b/src/binding/python/openpmd_api/scipp/__init__.py
new file mode 100644
index 0000000000..66019bd413
--- /dev/null
+++ b/src/binding/python/openpmd_api/scipp/__init__.py
@@ -0,0 +1,13 @@
+"""openpmd_scipp: A Python package for loading openPMD datasets into scipp DataArrays.
+
+See README.md for documentation
+
+Author:
+ Pawel Ordyna
+
+License:
+GPL - 3.0 license. See LICENSE file for details.
+"""
+
+from .loader import DataLoader as DataLoader
+from .utils import closest as closest
diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py
new file mode 100644
index 0000000000..acbf88410b
--- /dev/null
+++ b/src/binding/python/openpmd_api/scipp/loader.py
@@ -0,0 +1,150 @@
+"""Module providing the DataLoader class.
+
+Provides the main openPMD to scpp interface class.
+
+Author:
+ Pawel Ordyna
+
+License:
+GPL - 3.0 license. See LICENSE file for details.
+"""
+
+import openpmd_api as pmd
+import scipp as sc
+
+from .mesh_loader import get_field, get_field_data_relay
+from .utils import closest
+
+
+def get_time_axis(series):
+ """Get the time axis from an openPMD series.
+
+ :param series: The openPMD series containing the data.
+ :type series: openpmd_api.Series
+ :return: A Scipp array representing the time axis.
+ :rtype: sc.DataArray
+ """
+ t = [
+ series.iterations[it].time * series.iterations[it].time_unit_SI for it in series.iterations
+ ]
+ return sc.array(dims=["t"], values=t, unit="s", dtype="double")
+
+
+def get_iterations(series):
+ """Get the iterations from an openPMD series.
+
+ :param series: The openPMD series containing the data.
+ :type series: openpmd_api.Series
+ :return: A Scipp dataset containing iteration IDs and their corresponding times.
+ :rtype: sc.Dataset
+ """
+ t = get_time_axis(series)
+ return sc.Dataset(
+ data={
+ "iteration_id": sc.DataArray(
+ data=sc.array(dims=["t"], values=list(series.iterations)), coords={"t": t}
+ )
+ }
+ )
+
+
+class DataLoader:
+ """DataLoader class for loading and retrieving openPMD mesh data.
+
+ This class initializes an openPMD series from a given file path and provides
+ methods to retrieve mesh data fields either by iteration index or by time.
+ The data can be retrieved as a DataRelay object or directly as a DataArray.
+
+ Attributes:
+ series (openpmd_api.Series): The openPMD series initialized from the file path.
+ iterations (sc.Dataset): A dataset containing iteration IDs and their corresponding times.
+
+ """
+
+ def __init__(self, path):
+ """Initialize the DataLoader with an openPMD series from the specified file path.
+
+ :param path: The file path to the openPMD data file.
+ :type path: str
+
+ Initializes the `series` attribute as an openPMD series in read-only mode
+ and the `iterations` attribute as a Scipp dataset containing iteration IDs
+ and their corresponding times.
+ """
+ self.series = pmd.Series(str(path), pmd.Access.read_only)
+ self.iterations = get_iterations(self.series)
+
+ def get_field(
+ self,
+ field,
+ component=pmd.Mesh_Record_Component.SCALAR,
+ time=None,
+ iteration=None,
+ relay=False,
+ time_tolerance=10 * sc.Unit("fs"),
+ ):
+ """Retrieve a mesh data field from the openPMD series.
+
+ This method retrieves a specified field and component from the
+ openPMD series either by iteration index or by time. The data
+ can be returned as a DataRelay object or directly as a
+ DataArray.
+
+ :param field: The name of the field to retrieve.
+ :type field: str
+ :param component: The component of the field to retrieve,
+ default is SCALAR.
+ :type component: openpmd_api.Mesh_Record_Component, optional
+ :param time: The time at which to retrieve the field. Either
+ time or iteration must be provided, but not both.
+ :type time: sc.Variable, optional
+ :param iteration: The iteration index at which to retrieve the
+ field. Either time or iteration must be provided, but not
+ both.
+ :type iteration: int, optional
+ :param relay: If True, return the data as a DataRelay object;
+ otherwise, return as a DataArray.
+ :type relay: bool, optional
+ :param time_tolerance: The tolerance for matching the time when
+ retrieving by time, default is 10 femtoseconds.
+ :type time_tolerance: sc.Unit, optional
+ :return: The requested field data as a DataRelay or DataArray.
+ :rtype: DataRelay or DataArray
+ :raises AssertionError: If neither time nor iteration is
+ provided, or if both are provided.
+ :raises IndexError: If no iteration is found within the
+ specified time tolerance.
+ """
+ assert (time is None and iteration is not None) or (
+ iteration is None and time is not None
+ ), "Provide either iteration index or time"
+ if iteration is None:
+ # handle integer inputs
+ time = time.astype("double")
+ time_tolerance = time_tolerance.astype("double")
+ time = time.to(unit=self.iterations["iteration_id"].coords["t"].unit)
+ try:
+ iteration = int(self.iterations["iteration_id"]["t", time].value)
+ except IndexError:
+ idx = closest(self.iterations["iteration_id"], "t", time)
+ iteration = self.iterations["iteration_id"]["t", idx]
+ assert time_tolerance is None or sc.abs(
+ iteration.coords["t"] - time
+ ) <= time_tolerance.to(unit=time.unit), (
+ f"No iteration found within time_tolerance={time_tolerance}."
+ )
+ print(
+ "Series does not contain iteration at the exact time. "
+ "Using closest iteration instead.",
+ flush=True,
+ )
+ iteration = int(iteration.value)
+
+ if relay:
+ return get_field_data_relay(
+ series=self.series, field=field, component=component, iteration=iteration
+ )
+ else:
+ return get_field(
+ series=self.series, field=field, component=component, iteration=iteration
+ )
diff --git a/src/binding/python/openpmd_api/scipp/mesh_loader.py b/src/binding/python/openpmd_api/scipp/mesh_loader.py
new file mode 100644
index 0000000000..24b3576dce
--- /dev/null
+++ b/src/binding/python/openpmd_api/scipp/mesh_loader.py
@@ -0,0 +1,204 @@
+"""Module providing mesh like data loading capability.
+
+Author:
+ Pawel Ordyna
+
+License:
+GPL - 3.0 license. See LICENSE file for details.
+"""
+
+import numpy as np
+import openpmd_api as pmd
+import scipp as sc
+
+from .utils import _unit_dimension_to_scipp
+
+
+class DataRelay(sc.DataArray):
+ """Data relay for loading openPMD meshes into scipp.
+
+ Attributes
+ ----------
+ series : openpmd_api.Series
+ The openPMD series object
+ record : openpmd_api.Record
+ The openPMD record object associated with the mesh
+ record_component : openpmd_api.Record_Component
+ The openPMD record component associated with the mesh
+
+ Methods
+ -------
+ _verify_init():
+ Ensures that the data range to load is contiguous.
+ __getitem__(*args, **kwargs):
+ Retrieves a subset of the data, returning a new DataRelay instance.
+ load_data():
+ Loads data from the record component based on data array coordinates,
+ adjusting for offsets and extents, and updates the DataArray values.
+
+ """
+
+ def _verify_init(self):
+ """Verify that the data is contiguous.
+
+ Check if the chosen subset is contiguous in the openPMD storage by checking coordinate
+ differences against the expected grid spacing.
+
+ This is needed since openPMD does nto allow us to load strided chunks.
+ """
+ for dim in self.dims:
+ coord = self.coords[dim]
+ diffs = coord[dim, 1:] - coord[dim, :-1]
+ diffs = diffs.to(unit="m")
+ idx = list(self.record.axis_labels).index(dim)
+ step = self.record.grid_spacing[idx]
+ step *= self.record.grid_unit_SI
+ step = step * sc.Unit("m")
+ assert sc.allclose(diffs, step), (
+ f"The data has to be contiguous! diffs: {diffs}, step: {step}"
+ )
+
+ def __init__(self, series, record, record_component, dummy_array, coords):
+ """Initialize the DataRelay object.
+
+ :param series: The openPMD series object associated with the data.
+ :type series: openpmd_api.Series
+ :param record: The openPMD record object associated with the mesh.
+ :type record: openpmd_api.Record
+ :param record_component: The openPMD record component associated with the mesh.
+ :type record_component: openpmd_api.Record_Component
+ :param dummy_array: A scipp array used for the dummy interface. It should use as little
+ memory as possible. Usually achieved by setting the stride of the values array to 0. Can
+ be read- only.
+ :type dummy_array: sc.array
+ :param coords: A dictionary of coordinates for the DataArray.
+ """
+ super().__init__(data=dummy_array, coords=coords)
+ self.series = series
+ self.record = record
+ self.record_component = record_component
+ self._verify_init()
+
+ def __getitem__(self, *args, **kwargs):
+ """Retrieve a subset of the data, returning a new DataRelay instance.
+
+ Override this method from the base class to use the DataRelay initializer and ensure that
+ DataRelay is returned and the _verify_init method is used.
+
+ :param args: Forwarded to the base class.
+ :type args: tuple
+ :param kwargs: Forwarded to the base class.
+ :type kwargs: dict
+ :return: A new DataRelay instance with the sliced data.
+ :rtype: DataRelay
+ """
+ dummy_data_aray = super().__getitem__(*args, **kwargs)
+ return DataRelay(
+ series=self.series,
+ record=self.record,
+ record_component=self.record_component,
+ dummy_array=dummy_data_aray.data,
+ coords=dummy_data_aray.coords,
+ )
+
+ def load_data(self):
+ """Load data from the openPMD dataset.
+
+ Loads a chunk based on the current data array coordinates.
+
+ Calculates the offset and extent for each dimension using the data array coordinates. Loads
+ the data chunk from the record component, scales it by the unit SI, and returns a new data
+ array with loaded values.
+
+ :return: The DataArray instance with the loaded data.
+ :rtype: DataRelay
+ """
+ offset = [0] * self.record_component.ndim
+ extent = [0] * self.record_component.ndim
+ for dd, dim in enumerate(self.record.axis_labels):
+ try:
+ start = self.coords[dim].to(unit="m").value
+ except sc.DimensionError:
+ start = self.coords[dim][0].to(unit="m").value
+ start /= self.record.grid_unit_SI
+ start -= self.record.grid_global_offset[dd]
+ start /= self.record.grid_spacing[dd]
+ start -= self.record_component.position[dd]
+ offset[dd] = int(round(start))
+ extent[dd] = self.coords[dim].size
+ data = self.record_component.load_chunk(offset=offset, extent=extent)
+ self.series.flush()
+ data *= self.record_component.unit_SI
+ data = np.squeeze(data)
+ data_array = self.copy()
+ data_array.values = data
+ return data_array
+
+
+def get_field_data_relay(series, iteration, field, component=pmd.Mesh_Record_Component.SCALAR):
+ """Get openPMD mesh as a data relay.
+
+ Create a DataRelay object for a specified field and component in an openPMD series.
+
+ :param series: The openPMD series containing the data.
+ :type series: openpmd_api.Series
+ :param iteration: The iteration number to access within the series.
+ :type iteration: int
+ :param field: The name of the field to retrieve.
+ :type field: str
+ :param component: The component of the field to retrieve, default is SCALAR.
+ :type component: openpmd_api.Mesh_Record_Component, optional
+ :return: A DataRelay instance initialized with the specified field and component data.
+ :rtype: DataRelay
+ """
+ record = series.iterations[iteration].meshes[field]
+ rc = record[component]
+ dims = record.axis_labels
+ time = (series.iterations[iteration].time + record.time_offset) * series.iterations[
+ iteration
+ ].time_unit_SI
+ time = sc.scalar(time, unit="s", dtype="double")
+ coords = {"t": time}
+ for dd, dim in enumerate(dims):
+ length = rc.shape[dd]
+ start = record.grid_global_offset[dd]
+ step = record.grid_spacing[dd]
+ values = np.arange(length, dtype=np.float64)
+ values *= step
+ values += rc.position[dd] * step
+ values += start
+ values *= record.grid_unit_SI
+ coord = sc.array(dims=[dim], values=values, unit="m")
+ coords[dim] = coord
+
+ small = np.zeros(1, dtype=rc.dtype)
+ dummy_array = np.lib.stride_tricks.as_strided(
+ small, shape=rc.shape, strides=[0] * rc.ndim, writeable=False
+ )
+ dummy_array = sc.array(
+ dims=dims, values=dummy_array, unit=_unit_dimension_to_scipp(record.unit_dimension)
+ )
+
+ return DataRelay(
+ series=series, record=record, record_component=rc, dummy_array=dummy_array, coords=coords
+ )
+
+
+def get_field(series, iteration, field, component=pmd.Mesh_Record_Component.SCALAR):
+ """Retrieve and load openPMD mesh data without slicing.
+
+ This function creates a DataRelay object for a specified field and component in an openPMD
+ series, loads the whole mesh, and returns the resulting DataArray.
+
+ :param series: The openPMD series containing the data.
+ :type series: openpmd_api.Series
+ :param iteration: The iteration number to access within the series.
+ :type iteration: int
+ :param field: The name of the field to retrieve.
+ :type field: str
+ :param component: The component of the field to retrieve, default is SCALAR.
+ :type component: openpmd_api.Mesh_Record_Component, optional
+ :return: A DataArray instance with the loaded data.
+ :rtype: DataRelay
+ """
+ return get_field_data_relay(series, iteration, field, component).load_data()
diff --git a/src/binding/python/openpmd_api/scipp/utils.py b/src/binding/python/openpmd_api/scipp/utils.py
new file mode 100644
index 0000000000..7f47645a59
--- /dev/null
+++ b/src/binding/python/openpmd_api/scipp/utils.py
@@ -0,0 +1,86 @@
+"""Utility module.
+
+Author:
+ Pawel Ordyna
+
+License:
+GPL - 3.0 license. See LICENSE file for details.
+"""
+
+import numpy as np
+import scipp as sc
+
+
+def _unit_dimension_to_scipp(unit_dimension):
+ """Convert a unit dimension from the openPMD standard to a Scipp unit.
+
+ This function takes a tuple representing the powers of the seven base SI units
+ (length, mass, time, electric current, thermodynamic temperature, amount of substance,
+ and luminous intensity) and converts it into a Scipp unit. The conversion is based on
+ the openPMD standard, which describes units as powers of these base measures.
+
+ :param tuple unit_dimension: A tuple containing seven integers, each representing
+ the power of a base SI unit in the order: (length, mass, time, electric current,
+ thermodynamic temperature, amount of substance, luminous intensity).
+ For example, (1, 0, -2, 0, 0, 0, 0) corresponds to meters per second squared (m/s²).
+
+ :returns: A Scipp unit object representing the combined unit as specified by the input
+ unit dimensions.
+ :rtype: sc.Unit
+
+ :example:
+ >>> _unit_dimension_to_scipp((1, 0, -2, 0, 0, 0, 0))
+ Unit('m/s^2')
+
+ :notes:
+ - The function assumes that the input tuple has exactly seven elements.
+ - Each element in the tuple corresponds to the power of a specific base unit.
+ """
+ # unit dimension description from the openPMD standard:
+ # powers of the 7 base measures characterizing the record's unit in SI
+ # (length L, mass M, time T, electric current I, thermodynamic temperature theta,
+ # amount of substance N, luminous intensity J)
+ base_units = (
+ 1.0 * sc.Unit("m"),
+ 1.0 * sc.Unit("kg"),
+ 1.0 * sc.Unit("s"),
+ 1.0 * sc.Unit("A"),
+ 1.0 * sc.Unit("K"),
+ 1.0 * sc.Unit("mol"),
+ 1.0 * sc.Unit("cd"),
+ )
+ unit = 1.0 * sc.Unit("1")
+ for dim, base_unit in zip(unit_dimension, base_units, strict=False):
+ if dim != 0:
+ unit *= base_unit**dim
+ return unit.unit
+
+
+def closest(data, dim, val):
+ """Find the index of the closest value in a dataset along a specified dimension.
+
+ This function calculates the index of the element in the specified dimension
+ of the dataset that is closest to the given value. It ensures that the value
+ is converted to the same unit as the dimension's coordinate before performing
+ the comparison.
+
+ :param data: The data array containing the dimension to search.
+ :type data: sc.DataArray
+ :param dim: The name of the dimension along which to find the closest value.
+ :type dim: str
+ :param val: The value to compare against, which will be converted to the unit of
+ the dimension's coordinate.
+ :type val: sc.Variable
+
+ :return: The index of the closest value in the specified dimension.
+ :rtype: int
+
+ :notes:
+ - The function assumes that `val` can be converted to the unit of the
+ dimension's coordinate.
+ - The dataset `data` must have coordinates defined for the specified dimension.
+ """
+ val = val.astype("double")
+ coord = data.coords[dim]
+ val = val.to(unit=coord.unit)
+ return np.argmin(sc.abs(coord - val).values)
From 7c1e4846d69f102039b4a3c7bdf02d4879da2dd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Fri, 31 Jan 2025 18:12:25 +0100
Subject: [PATCH 02/24] WIP: Try lazy loading scipp converter
---
CMakeLists.txt | 3 +++
examples/15_scipp_loader.py | 9 +++++++++
src/binding/python/openpmd_api/ScippLazyInit.py | 8 ++++++++
src/binding/python/openpmd_api/__init__.py | 2 ++
src/binding/python/openpmd_api/scipp/loader.py | 6 +++---
src/binding/python/openpmd_api/scipp/mesh_loader.py | 7 +++----
6 files changed, 28 insertions(+), 7 deletions(-)
create mode 100644 examples/15_scipp_loader.py
create mode 100644 src/binding/python/openpmd_api/ScippLazyInit.py
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b3a0cc30c2..e3f931ffa8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -665,6 +665,8 @@ if(openPMD_HAVE_PYTHON)
__init__.py DaskArray.py DaskDataFrame.py DataFrame.py
ls/__init__.py ls/__main__.py
pipe/__init__.py pipe/__main__.py
+ scipp/__init__.py scipp/loader.py scipp/mesh_loader.py scipp/utils.py
+ ScippLazyInit.py
)
endif()
@@ -720,6 +722,7 @@ set(openPMD_PYTHON_EXAMPLE_NAMES
11_particle_dataframe
12_span_write
13_write_dynamic_configuration
+ 15_scipp_loader
)
if(openPMD_USE_INVASIVE_TESTS)
diff --git a/examples/15_scipp_loader.py b/examples/15_scipp_loader.py
new file mode 100644
index 0000000000..e42f2c2418
--- /dev/null
+++ b/examples/15_scipp_loader.py
@@ -0,0 +1,9 @@
+import openpmd_api as pmd
+
+
+def main():
+ series = pmd.Series("./out.bp5", pmd.Access.read_only)
+ scipp_loader = series.to_scipp()
+
+if __name__ == "__main__":
+ main()
diff --git a/src/binding/python/openpmd_api/ScippLazyInit.py b/src/binding/python/openpmd_api/ScippLazyInit.py
new file mode 100644
index 0000000000..e3acc45ccc
--- /dev/null
+++ b/src/binding/python/openpmd_api/ScippLazyInit.py
@@ -0,0 +1,8 @@
+def series_to_scipp(series):
+
+ import scipp
+
+ from .scipp import DataLoader
+
+ dl = DataLoader(series)
+ return dl
diff --git a/src/binding/python/openpmd_api/__init__.py b/src/binding/python/openpmd_api/__init__.py
index 09f21026f9..4fca249e68 100644
--- a/src/binding/python/openpmd_api/__init__.py
+++ b/src/binding/python/openpmd_api/__init__.py
@@ -3,6 +3,7 @@
from .DaskDataFrame import particles_to_daskdataframe
from .DataFrame import (iterations_to_cudf, iterations_to_dataframe,
particles_to_dataframe)
+from .ScippLazyInit import series_to_scipp
from .openpmd_api_cxx import * # noqa
__version__ = cxx.__version__
@@ -16,6 +17,7 @@
Record_Component.to_dask_array = record_component_to_daskarray # noqa
Series.to_df = iterations_to_dataframe # noqa
Series.to_cudf = iterations_to_cudf # noqa
+Series.to_scipp = series_to_scipp
# TODO remove in future versions (deprecated)
Access_Type = Access # noqa
diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py
index acbf88410b..45bbf5cff6 100644
--- a/src/binding/python/openpmd_api/scipp/loader.py
+++ b/src/binding/python/openpmd_api/scipp/loader.py
@@ -61,7 +61,7 @@ class DataLoader:
"""
- def __init__(self, path):
+ def __init__(self, series):
"""Initialize the DataLoader with an openPMD series from the specified file path.
:param path: The file path to the openPMD data file.
@@ -71,13 +71,13 @@ def __init__(self, path):
and the `iterations` attribute as a Scipp dataset containing iteration IDs
and their corresponding times.
"""
- self.series = pmd.Series(str(path), pmd.Access.read_only)
+ self.series = series
self.iterations = get_iterations(self.series)
def get_field(
self,
field,
- component=pmd.Mesh_Record_Component.SCALAR,
+ component=None,
time=None,
iteration=None,
relay=False,
diff --git a/src/binding/python/openpmd_api/scipp/mesh_loader.py b/src/binding/python/openpmd_api/scipp/mesh_loader.py
index 24b3576dce..e18178bd9b 100644
--- a/src/binding/python/openpmd_api/scipp/mesh_loader.py
+++ b/src/binding/python/openpmd_api/scipp/mesh_loader.py
@@ -8,7 +8,6 @@
"""
import numpy as np
-import openpmd_api as pmd
import scipp as sc
from .utils import _unit_dimension_to_scipp
@@ -135,7 +134,7 @@ def load_data(self):
return data_array
-def get_field_data_relay(series, iteration, field, component=pmd.Mesh_Record_Component.SCALAR):
+def get_field_data_relay(series, iteration, field, component=None):
"""Get openPMD mesh as a data relay.
Create a DataRelay object for a specified field and component in an openPMD series.
@@ -152,7 +151,7 @@ def get_field_data_relay(series, iteration, field, component=pmd.Mesh_Record_Com
:rtype: DataRelay
"""
record = series.iterations[iteration].meshes[field]
- rc = record[component]
+ rc = record[component] if component else record
dims = record.axis_labels
time = (series.iterations[iteration].time + record.time_offset) * series.iterations[
iteration
@@ -184,7 +183,7 @@ def get_field_data_relay(series, iteration, field, component=pmd.Mesh_Record_Com
)
-def get_field(series, iteration, field, component=pmd.Mesh_Record_Component.SCALAR):
+def get_field(series, iteration, field, component=None):
"""Retrieve and load openPMD mesh data without slicing.
This function creates a DataRelay object for a specified field and component in an openPMD
From 50a9561aca42e10a8a1e6415d6943ca296da7bd9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Ordyna?=
Date: Tue, 4 Feb 2025 14:18:32 +0100
Subject: [PATCH 03/24] Add preliminary example
---
examples/15_scipp_loader.py | 41 ++++++++++++++++++++++++++++++++++++-
1 file changed, 40 insertions(+), 1 deletion(-)
diff --git a/examples/15_scipp_loader.py b/examples/15_scipp_loader.py
index e42f2c2418..4158b54500 100644
--- a/examples/15_scipp_loader.py
+++ b/examples/15_scipp_loader.py
@@ -1,9 +1,48 @@
import openpmd_api as pmd
+import openpmd_api.scipp as pmdsc
+import scipp as sc
def main():
- series = pmd.Series("./out.bp5", pmd.Access.read_only)
+ series = pmd.Series("../samples/git-sample/data%T.h5", pmd.Access.read_only)
+
+ time = 65 * sc.Unit("fs")
scipp_loader = series.to_scipp()
+ print(scipp_loader.iterations)
+ Ex = scipp_loader.get_field("E", "x", time=time)
+ print(Ex)
+ slicing_idx = pmdsc.closest(Ex, "x", 2 * sc.Unit("um"))
+ Ex_slice = Ex["x", slicing_idx]
+ print(Ex_slice)
+ Ex_slice.plot().save("slice.png")
+ Ex_line = Ex_slice["z", pmdsc.closest(Ex_slice, "z", 1.4e-5 * sc.Unit("m"))]
+ print(Ex_line)
+ Ex_line.plot().save("line.png")
+ (Ex_line * Ex_line).plot().save("line_squared.png")
+
+ # The full 3D array is not loaded into memory at this point.
+ Ex = scipp_loader.get_field("E", "x", time=time, relay=True)
+ # This time we will select a range rather than a slice.
+ # For a range there is no need for an exact match.
+ # But, we could also select a slice just like in the previous example.
+ Ex = Ex["x", -2e-6 * sc.Unit("m") : 2e-6 * sc.Unit("m")]
+ # Only now the smaller subset wil be loaded into memory
+ Ex = Ex.load_data()
+ print(Ex)
+
+ Ex = sc.concat(
+ [
+ scipp_loader.get_field("E", "x", iteration=iteration.value, time_tolerance=None)
+ for iteration in scipp_loader.iterations["iteration_id"]
+ ],
+ dim="t",
+ )
+ print(Ex)
+
+ # Let us just slice at some points to get a 2D dataset
+ Ex = Ex["x", 10]["y", 10]
+ print(Ex)
+ Ex.plot().save("moving_window.png")
if __name__ == "__main__":
main()
From 2c36906c182c356c7568e74146a22b1969c335c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Tue, 4 Feb 2025 13:34:12 +0100
Subject: [PATCH 04/24] Move lazy_init under scipp folder again
---
CMakeLists.txt | 2 +-
src/binding/python/openpmd_api/__init__.py | 2 +-
.../openpmd_api/{ScippLazyInit.py => scipp/lazy_init.py} | 3 +--
3 files changed, 3 insertions(+), 4 deletions(-)
rename src/binding/python/openpmd_api/{ScippLazyInit.py => scipp/lazy_init.py} (72%)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e3f931ffa8..9129cf5b2c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -666,7 +666,7 @@ if(openPMD_HAVE_PYTHON)
ls/__init__.py ls/__main__.py
pipe/__init__.py pipe/__main__.py
scipp/__init__.py scipp/loader.py scipp/mesh_loader.py scipp/utils.py
- ScippLazyInit.py
+ scipp/lazy_init.py
)
endif()
diff --git a/src/binding/python/openpmd_api/__init__.py b/src/binding/python/openpmd_api/__init__.py
index 4fca249e68..38b8827960 100644
--- a/src/binding/python/openpmd_api/__init__.py
+++ b/src/binding/python/openpmd_api/__init__.py
@@ -3,8 +3,8 @@
from .DaskDataFrame import particles_to_daskdataframe
from .DataFrame import (iterations_to_cudf, iterations_to_dataframe,
particles_to_dataframe)
-from .ScippLazyInit import series_to_scipp
from .openpmd_api_cxx import * # noqa
+from .scipp.lazy_init import series_to_scipp
__version__ = cxx.__version__
__doc__ = cxx.__doc__
diff --git a/src/binding/python/openpmd_api/ScippLazyInit.py b/src/binding/python/openpmd_api/scipp/lazy_init.py
similarity index 72%
rename from src/binding/python/openpmd_api/ScippLazyInit.py
rename to src/binding/python/openpmd_api/scipp/lazy_init.py
index e3acc45ccc..ebca4e8125 100644
--- a/src/binding/python/openpmd_api/ScippLazyInit.py
+++ b/src/binding/python/openpmd_api/scipp/lazy_init.py
@@ -1,8 +1,7 @@
def series_to_scipp(series):
-
import scipp
- from .scipp import DataLoader
+ from . import DataLoader
dl = DataLoader(series)
return dl
From c2b8412dcfc29b46cc2c412c3afdd7fb54403ee0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Tue, 4 Feb 2025 13:46:06 +0100
Subject: [PATCH 05/24] Revert "Move lazy_init under scipp folder again"
This reverts commit 7d4aede2d62a6bd38a6fcfa8147d59f5a06be577.
---
CMakeLists.txt | 2 +-
.../openpmd_api/{scipp/lazy_init.py => ScippLazyInit.py} | 3 ++-
src/binding/python/openpmd_api/__init__.py | 2 +-
3 files changed, 4 insertions(+), 3 deletions(-)
rename src/binding/python/openpmd_api/{scipp/lazy_init.py => ScippLazyInit.py} (72%)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9129cf5b2c..e3f931ffa8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -666,7 +666,7 @@ if(openPMD_HAVE_PYTHON)
ls/__init__.py ls/__main__.py
pipe/__init__.py pipe/__main__.py
scipp/__init__.py scipp/loader.py scipp/mesh_loader.py scipp/utils.py
- scipp/lazy_init.py
+ ScippLazyInit.py
)
endif()
diff --git a/src/binding/python/openpmd_api/scipp/lazy_init.py b/src/binding/python/openpmd_api/ScippLazyInit.py
similarity index 72%
rename from src/binding/python/openpmd_api/scipp/lazy_init.py
rename to src/binding/python/openpmd_api/ScippLazyInit.py
index ebca4e8125..e3acc45ccc 100644
--- a/src/binding/python/openpmd_api/scipp/lazy_init.py
+++ b/src/binding/python/openpmd_api/ScippLazyInit.py
@@ -1,7 +1,8 @@
def series_to_scipp(series):
+
import scipp
- from . import DataLoader
+ from .scipp import DataLoader
dl = DataLoader(series)
return dl
diff --git a/src/binding/python/openpmd_api/__init__.py b/src/binding/python/openpmd_api/__init__.py
index 38b8827960..4fca249e68 100644
--- a/src/binding/python/openpmd_api/__init__.py
+++ b/src/binding/python/openpmd_api/__init__.py
@@ -3,8 +3,8 @@
from .DaskDataFrame import particles_to_daskdataframe
from .DataFrame import (iterations_to_cudf, iterations_to_dataframe,
particles_to_dataframe)
+from .ScippLazyInit import series_to_scipp
from .openpmd_api_cxx import * # noqa
-from .scipp.lazy_init import series_to_scipp
__version__ = cxx.__version__
__doc__ = cxx.__doc__
From 7e93f34732bfdcc218facf5c8c6e76e2b6d98835 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Tue, 4 Feb 2025 14:05:05 +0100
Subject: [PATCH 06/24] Some import checks
---
examples/15_scipp_loader.py | 12 +++++++++---
src/binding/python/openpmd_api/ScippLazyInit.py | 11 ++++++++++-
2 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/examples/15_scipp_loader.py b/examples/15_scipp_loader.py
index 4158b54500..874a0b7b08 100644
--- a/examples/15_scipp_loader.py
+++ b/examples/15_scipp_loader.py
@@ -1,13 +1,19 @@
import openpmd_api as pmd
-import openpmd_api.scipp as pmdsc
-import scipp as sc
def main():
series = pmd.Series("../samples/git-sample/data%T.h5", pmd.Access.read_only)
+ try:
+ scipp_loader = series.to_scipp()
+ import plopp
+ except ImportError:
+ print("Need to install scipp and plopp to run this example.")
+ return
+ import openpmd_api.scipp as pmdsc
+ import scipp as sc
time = 65 * sc.Unit("fs")
- scipp_loader = series.to_scipp()
+
print(scipp_loader.iterations)
Ex = scipp_loader.get_field("E", "x", time=time)
print(Ex)
diff --git a/src/binding/python/openpmd_api/ScippLazyInit.py b/src/binding/python/openpmd_api/ScippLazyInit.py
index e3acc45ccc..e2c3832ddf 100644
--- a/src/binding/python/openpmd_api/ScippLazyInit.py
+++ b/src/binding/python/openpmd_api/ScippLazyInit.py
@@ -1,6 +1,15 @@
def series_to_scipp(series):
- import scipp
+ # lazy import
+ try:
+ import scipp
+ found_scipp = True
+ except ImportError as original_error:
+ found_scipp = False
+ original_error_string = f"{original_error}"
+
+ if not found_scipp:
+ raise ImportError(f"Scipp NOT found. Install scipp for Scipp support. Original error: {original_error_string}")
from .scipp import DataLoader
From 13d6318bfb5d042a41008a4cdc43229731055231 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Ordyna?=
Date: Wed, 5 Feb 2025 12:14:33 +0100
Subject: [PATCH 07/24] Add Scipp documentation from README.md
converted with Pandoc
---
docs/source/analysis/README_15_0.svg | 856 ++++++++++++++++++++++++++
docs/source/analysis/README_17_0.svg | 771 ++++++++++++++++++++++++
docs/source/analysis/README_19_0.svg | 786 ++++++++++++++++++++++++
docs/source/analysis/README_26_0.svg | 863 +++++++++++++++++++++++++++
docs/source/analysis/scipp.rst | 370 ++++++++++++
docs/source/index.rst | 1 +
6 files changed, 3647 insertions(+)
create mode 100644 docs/source/analysis/README_15_0.svg
create mode 100644 docs/source/analysis/README_17_0.svg
create mode 100644 docs/source/analysis/README_19_0.svg
create mode 100644 docs/source/analysis/README_26_0.svg
create mode 100644 docs/source/analysis/scipp.rst
diff --git a/docs/source/analysis/README_15_0.svg b/docs/source/analysis/README_15_0.svg
new file mode 100644
index 0000000000..45bb3b0208
--- /dev/null
+++ b/docs/source/analysis/README_15_0.svg
@@ -0,0 +1,856 @@
+
+
+
diff --git a/docs/source/analysis/README_17_0.svg b/docs/source/analysis/README_17_0.svg
new file mode 100644
index 0000000000..32bc944ba3
--- /dev/null
+++ b/docs/source/analysis/README_17_0.svg
@@ -0,0 +1,771 @@
+
+
+
diff --git a/docs/source/analysis/README_19_0.svg b/docs/source/analysis/README_19_0.svg
new file mode 100644
index 0000000000..212d7b21bc
--- /dev/null
+++ b/docs/source/analysis/README_19_0.svg
@@ -0,0 +1,786 @@
+
+
+
diff --git a/docs/source/analysis/README_26_0.svg b/docs/source/analysis/README_26_0.svg
new file mode 100644
index 0000000000..48eae95e3c
--- /dev/null
+++ b/docs/source/analysis/README_26_0.svg
@@ -0,0 +1,863 @@
+
+
+
diff --git a/docs/source/analysis/scipp.rst b/docs/source/analysis/scipp.rst
new file mode 100644
index 0000000000..ac0033fe31
--- /dev/null
+++ b/docs/source/analysis/scipp.rst
@@ -0,0 +1,370 @@
+.. _analysis-scipp:
+
+Scipp
+=====
+
+Load openpmd datasets to ``scipp`` ``DataArrays``.
+
+What is this good for?
+~~~~~~~~~~~~~~~~~~~~~~
+
+`scipp `__ is an alternative to
+`xarray `__ and provides basically
+numpy arrays with axes description and units.
+
+* Automatically load axes and units with openPMD data.
+* Axes information is automatically updated when slicing, indexing, or filtering your data.
+* With ``scipp``\ ’s plotting library `plopp `__ it becomes an alternative to ``openpmd-viewer``.
+* Many numpy and some scipy functions including all the basic algebraic operations on arrays are supported by ``scipp``. When using these, the units and coordinates are automatically taken care of.
+
+Limitations
+~~~~~~~~~~~
+
+- ``scipp`` currently handles units with a library, that does not
+ support non-integer exponents for units. This can become problematic
+ in some calculations.
+
+Installation
+------------
+
+It can be easily installed with pip.
+
+.. code:: bash
+
+ git clone https://github.com/pordyna/openpmd_scipp.git
+ cd openpmd-scipp
+ pip install .
+
+Getting started
+---------------
+
+Get example data sets from the ``openPMD-example-datasets`` repository.
+
+.. code:: bash
+
+ git clone https://github.com/openPMD/openPMD-example-datasets.git
+ cd openPMD-example-datasets
+ tar -zxvf example-2d.tar.gz
+ tar -zxvf example-3d.tar.gz
+
+Opening series
+~~~~~~~~~~~~~~
+
+.. code:: python
+
+ import scipp as sc
+
+ import openpmd_scipp as pmdsc
+
+.. code:: python
+
+ path = "openPMD-example-datasets/example-3d/hdf5/data%T.h5"
+
+.. code:: python
+
+ path = ".data/" + path
+
+.. code:: python
+
+ data_loader = pmdsc.DataLoader(path)
+ print(data_loader.iterations)
+
+::
+
+
+ Dimensions: Sizes[t:5, ]
+ Coordinates:
+ * t float64 [s] (t) [3.28471e-14, 6.56942e-14, ..., 1.31388e-13, 1.64236e-13]
+ Data:
+ iteration_id int64 [dimensionless] (t) [100, 200, ..., 400, 500]
+
+Working with meshes (fields)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Let us plot electric field’s x component at 65 fs.
+
+.. code:: python
+
+ Ex = data_loader.get_field("E", "x", time=65 * sc.Unit("fs"))
+ print(Ex)
+
+::
+
+ Series does not contain iteration at the exact time. Using closest iteration instead.
+
+
+
+ Dimensions: Sizes[x:26, y:26, z:201, ]
+ Coordinates:
+ * t float64 [s] () 6.56942e-14
+ * x float64 [m] (x) [-9.6e-06, -8.8e-06, ..., 9.6e-06, 1.04e-05]
+ * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05]
+ * z float64 [m] (z) [4.7e-06, 4.8e-06, ..., 2.46e-05, 2.47e-05]
+ Data:
+ float64 [V/m] (x, y, z) [-1.08652e+08, -1.9758e+08, ..., 0, 0]
+
+You may have noticed, that the time requested does not have to match
+exactly any iteration. By default, if there is an iteration within 10 fs
+distance it will be used instead. This 10 fs tolerance can be adjusted
+by setting ``time_tolerance``. The check can be also disabled by setting
+``time_tolerance=None``, with that the method will return the closest
+iteration regardless of the difference. So that this will also work:
+
+.. code:: python
+
+ print(data_loader.get_field("E", "x", time=20 * sc.Unit("fs"), time_tolerance=20 * sc.Unit("fs")))
+
+::
+
+ Series does not contain iteration at the exact time. Using closest iteration instead.
+
+
+
+ Dimensions: Sizes[x:26, y:26, z:201, ]
+ Coordinates:
+ * t float64 [s] () 3.28471e-14
+ * x float64 [m] (x) [-9.6e-06, -8.8e-06, ..., 9.6e-06, 1.04e-05]
+ * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05]
+ * z float64 [m] (z) [-5.2e-06, -5.1e-06, ..., 1.47e-05, 1.48e-05]
+ Data:
+ float64 [V/m] (x, y, z) [-1.08549e+07, -1.3967e+07, ..., 0, 0]
+
+, but ``data_loader.get_field('E', 'x', time=20 * sc.Unit('fs'))`` not.
+
+.. code:: python
+
+ # It is also possible to use iteration number instead:
+ print(data_loader.get_field("E", "x", iteration=200))
+
+::
+
+
+ Dimensions: Sizes[x:26, y:26, z:201, ]
+ Coordinates:
+ * t float64 [s] () 6.56942e-14
+ * x float64 [m] (x) [-9.6e-06, -8.8e-06, ..., 9.6e-06, 1.04e-05]
+ * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05]
+ * z float64 [m] (z) [4.7e-06, 4.8e-06, ..., 2.46e-05, 2.47e-05]
+ Data:
+ float64 [V/m] (x, y, z) [-1.08652e+08, -1.9758e+08, ..., 0, 0]
+
+.. code:: python
+
+ # For scalar fields just omit the second argument:
+ print(data_loader.get_field("rho", iteration=200))
+
+::
+
+
+ Dimensions: Sizes[x:26, y:26, z:201, ]
+ Coordinates:
+ * t float64 [s] () 6.56942e-14
+ * x float64 [m] (x) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05]
+ * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05]
+ * z float64 [m] (z) [4.7e-06, 4.8e-06, ..., 2.46e-05, 2.47e-05]
+ Data:
+ float64 [mC/L] (x, y, z) [-7169.01, -7526.4, ..., 3.16049e-11, 1.22782e-11]
+
+Plotting
+^^^^^^^^
+
+W can’t directly plot 3D data. But we can for example select a slice.
+For that we can use a helper function ``pmdsc.closest`` to get the
+closets index, since ``scipp`` requires exact match. You can read more
+about indexing ``scipp`` arrays in ``scipp``\ ’s documentation.
+
+.. code:: python
+
+ slicing_idx = pmdsc.closest(Ex, "x", 2 * sc.Unit("um"))
+ Ex_slice = Ex["x", slicing_idx]
+ print(Ex_slice)
+
+::
+
+
+ Dimensions: Sizes[y:26, z:201, ]
+ Coordinates:
+ * t float64 [s] () 6.56942e-14
+ x float64 [m] () 2.4e-06
+ * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05]
+ * z float64 [m] (z) [4.7e-06, 4.8e-06, ..., 2.46e-05, 2.47e-05]
+ Data:
+ float64 [V/m] (y, z) [4.86614e+08, 6.67018e+08, ..., 0, 0]
+
+.. code:: python
+
+ Ex_slice.plot()
+
+.. figure:: README_15_0.svg
+
+.. code:: python
+
+ # We can also plot line plots:
+ Ex_line = Ex_slice["z", pmdsc.closest(Ex_slice, "z", 1.4e-5 * sc.Unit("m"))]
+ print(Ex_line)
+
+::
+
+
+ Dimensions: Sizes[y:26, ]
+ Coordinates:
+ * t float64 [s] () 6.56942e-14
+ x float64 [m] () 2.4e-06
+ * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05]
+ z float64 [m] () 1.4e-05
+ Data:
+ float64 [V/m] (y) [3.46069e+07, -2.94134e+07, ..., 8.89353e+06, -4.32182e+07]
+
+.. code:: python
+
+ Ex_line.plot()
+
+.. figure:: README_17_0.svg
+
+Alternatively it is possible to work interactively with ``plopp``\ ’s
+tools for visualizing multidimensional data, such as ``pp.slicer``\ or
+``pp.inspector``.
+
+Doing math
+^^^^^^^^^^
+
+Just as an example we can easily plot the square of the field:
+
+.. code:: python
+
+ (Ex_line * Ex_line).plot()
+
+.. figure:: README_19_0.svg
+
+Loading chunks
+~~~~~~~~~~~~~~
+
+In the above example the whole 3D field is loaded into memory and sliced
+afterward. It is also possible to just load a sub-chunk into memory.
+When the ``relay`` option in ``get_field`` is set to ``True`` it will
+return a dummy object that only allocates memory for a single value.
+This relay object can be indexed, sliced etc. using the ``scipp``
+indexing just like before. (The only limitation given by the
+``openpmd-api`` is that the result has to a be contiguous chunk of the
+original array). The ``load_data`` method loads data and returns a
+proper ``scipp`` data array.
+
+.. code:: python
+
+ # The full 3D array is not loaded into memory at this point.
+ Ex = data_loader.get_field("E", "x", time=65 * sc.Unit("fs"), relay=True)
+ # This time we will select a range rather than a slice.
+ # For a range there is no need for an exact match.
+ # But, we could also select a slice just like in the previous example.
+ Ex = Ex["x", -2e-6 * sc.Unit("m") : 2e-6 * sc.Unit("m")]
+ # Only now the smaller subset wil be loaded into memory
+ Ex = Ex.load_data()
+ print(Ex)
+
+::
+
+ Series does not contain iteration at the exact time. Using closest iteration instead.
+
+
+
+ Dimensions: Sizes[x:5, y:26, z:201, ]
+ Coordinates:
+ * t float64 [s] () 6.56942e-14
+ * x float64 [m] (x) [-1.6e-06, -8e-07, ..., 8e-07, 1.6e-06]
+ * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05]
+ * z float64 [m] (z) [4.7e-06, 4.8e-06, ..., 2.46e-05, 2.47e-05]
+ Data:
+ float64 [V/m] (x, y, z) [-3.65733e+08, -5.01237e+08, ..., 0, 0]
+
+Time axis
+~~~~~~~~~
+
+It is also possible to combine arrays from different iterations into one
+using ``scipp``\ ’s ``concat`` function. Here is an example for creating
+a 4D array from all iterations:
+
+.. code:: python
+
+ Ex = sc.concat(
+ [
+ data_loader.get_field("E", "x", iteration=iteration.value, time_tolerance=None)
+ for iteration in data_loader.iterations["iteration_id"]
+ ],
+ dim="t",
+ )
+ print(Ex)
+
+::
+
+
+ Dimensions: Sizes[t:5, x:26, y:26, z:201, ]
+ Coordinates:
+ * t float64 [s] (t) [3.28471e-14, 6.56942e-14, ..., 1.31388e-13, 1.64236e-13]
+ * x float64 [m] (x) [-9.6e-06, -8.8e-06, ..., 9.6e-06, 1.04e-05]
+ * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05]
+ * z float64 [m] (t, z) [-5.2e-06, -5.1e-06, ..., 5.41e-05, 5.42e-05]
+ Data:
+ float64 [V/m] (t, x, y, z) [-1.08549e+07, -1.3967e+07, ..., 0, 0]
+
+The reason for the z coordinate having two dimensions (t,z) is the fact
+that the data comes from a moving window simulation. This is clearly
+visible in the plot below.
+
+.. code:: python
+
+ # Let us just slice at some points to get a 2D dataset
+ Ex = Ex["x", 10]["y", 10]
+ print(Ex)
+
+::
+
+
+ Dimensions: Sizes[t:5, z:201, ]
+ Coordinates:
+ * t float64 [s] (t) [3.28471e-14, 6.56942e-14, ..., 1.31388e-13, 1.64236e-13]
+ x float64 [m] () -1.6e-06
+ y float64 [m] () -2e-06
+ * z float64 [m] (t, z) [-5.2e-06, -5.1e-06, ..., 5.41e-05, 5.42e-05]
+ Data:
+ float64 [V/m] (t, z) [-8.41738e+08, -7.8752e+08, ..., 0, 0]
+
+.. code:: python
+
+ Ex.plot()
+
+.. figure:: README_26_0.svg
+
+Working with particle data
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Coming soon!
+
+Developer documentation
+-----------------------
+
+Generating this README
+~~~~~~~~~~~~~~~~~~~~~~
+
+README file is generated from the README.ipynb.
+
+::
+
+ # Download and extract example datasets if not present
+ # Will download data into ``.data``
+ make data
+
+ make docs
+
+Running tests
+~~~~~~~~~~~~~
+
+At the moment we only test we have is an integration test running this
+notebook. After downloading example datasets with ``make data``, if
+needed, run:
+
+::
+
+ make test
+
+You can also run tests with different python version with tox, but you
+need to have the python version installed, for example with pyenv.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index db2bbc3002..a3e08885d7 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -142,6 +142,7 @@ Data Analysis
analysis/pandas
analysis/dask
analysis/rapids
+ analysis/scipp
analysis/contrib
Development
From a647bfd1e089a5497b11ad2ffbd82bdfd6160049 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Ordyna?=
Date: Wed, 5 Feb 2025 16:43:19 +0100
Subject: [PATCH 08/24] Some formatting fixes
---
docs/source/analysis/scipp.rst | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/docs/source/analysis/scipp.rst b/docs/source/analysis/scipp.rst
index ac0033fe31..009d892362 100644
--- a/docs/source/analysis/scipp.rst
+++ b/docs/source/analysis/scipp.rst
@@ -14,18 +14,18 @@ numpy arrays with axes description and units.
* Automatically load axes and units with openPMD data.
* Axes information is automatically updated when slicing, indexing, or filtering your data.
-* With ``scipp``\ ’s plotting library `plopp `__ it becomes an alternative to ``openpmd-viewer``.
+* With ``scipp``'s plotting library `plopp `__ it becomes an alternative to ``openpmd-viewer``.
* Many numpy and some scipy functions including all the basic algebraic operations on arrays are supported by ``scipp``. When using these, the units and coordinates are automatically taken care of.
Limitations
-~~~~~~~~~~~
+-----------
- ``scipp`` currently handles units with a library, that does not
support non-integer exponents for units. This can become problematic
in some calculations.
Installation
-------------
+~~~~~~~~~~~~
It can be easily installed with pip.
@@ -36,7 +36,7 @@ It can be easily installed with pip.
pip install .
Getting started
----------------
+~~~~~~~~~~~~~~~
Get example data sets from the ``openPMD-example-datasets`` repository.
@@ -79,9 +79,9 @@ Opening series
iteration_id int64 [dimensionless] (t) [100, 200, ..., 400, 500]
Working with meshes (fields)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------
-Let us plot electric field’s x component at 65 fs.
+Let us plot electric field's x component at 65 fs.
.. code:: python
@@ -166,12 +166,12 @@ iteration regardless of the difference. So that this will also work:
float64 [mC/L] (x, y, z) [-7169.01, -7526.4, ..., 3.16049e-11, 1.22782e-11]
Plotting
-^^^^^^^^
+--------
-W can’t directly plot 3D data. But we can for example select a slice.
+We can't directly plot 3D data. But we can for example select a slice.
For that we can use a helper function ``pmdsc.closest`` to get the
closets index, since ``scipp`` requires exact match. You can read more
-about indexing ``scipp`` arrays in ``scipp``\ ’s documentation.
+about indexing ``scipp`` arrays in ``scipp``'s documentation.
.. code:: python
@@ -221,12 +221,12 @@ about indexing ``scipp`` arrays in ``scipp``\ ’s documentation.
.. figure:: README_17_0.svg
-Alternatively it is possible to work interactively with ``plopp``\ ’s
+Alternatively it is possible to work interactively with ``plopp``'s
tools for visualizing multidimensional data, such as ``pp.slicer``\ or
``pp.inspector``.
Doing math
-^^^^^^^^^^
+----------
Just as an example we can easily plot the square of the field:
@@ -237,7 +237,7 @@ Just as an example we can easily plot the square of the field:
.. figure:: README_19_0.svg
Loading chunks
-~~~~~~~~~~~~~~
+--------------
In the above example the whole 3D field is loaded into memory and sliced
afterward. It is also possible to just load a sub-chunk into memory.
@@ -277,10 +277,10 @@ proper ``scipp`` data array.
float64 [V/m] (x, y, z) [-3.65733e+08, -5.01237e+08, ..., 0, 0]
Time axis
-~~~~~~~~~
+---------
It is also possible to combine arrays from different iterations into one
-using ``scipp``\ ’s ``concat`` function. Here is an example for creating
+using ``scipp``'s ``concat`` function. Here is an example for creating
a 4D array from all iterations:
.. code:: python
From 464572c8e77de7b9f84ffd6786a35894b12386b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Wed, 5 Feb 2025 16:43:37 +0100
Subject: [PATCH 09/24] Adapt documentation to integration into openPMD-api
---
docs/source/analysis/scipp.rst | 55 +++++++++-------------------------
1 file changed, 14 insertions(+), 41 deletions(-)
diff --git a/docs/source/analysis/scipp.rst b/docs/source/analysis/scipp.rst
index 009d892362..6436092c99 100644
--- a/docs/source/analysis/scipp.rst
+++ b/docs/source/analysis/scipp.rst
@@ -27,13 +27,13 @@ Limitations
Installation
~~~~~~~~~~~~
-It can be easily installed with pip.
+The adaptor from openPMD to Scipp is part of the regular openPMD-api Python distribution.
+It is loaded lazily and available as soon as ``scipp`` is also installed.
+Plotting functionality requires the additional installation of ``plopp``.
.. code:: bash
- git clone https://github.com/pordyna/openpmd_scipp.git
- cd openpmd-scipp
- pip install .
+ pip install openpmd_api scipp plopp
Getting started
~~~~~~~~~~~~~~~
@@ -47,26 +47,29 @@ Get example data sets from the ``openPMD-example-datasets`` repository.
tar -zxvf example-2d.tar.gz
tar -zxvf example-3d.tar.gz
+.. note::
+
+ You can find scripts to download and unpack this sample data under ``share/openPMD``.
+
Opening series
-~~~~~~~~~~~~~~
+--------------
.. code:: python
import scipp as sc
- import openpmd_scipp as pmdsc
+ import openpmd_api as pmd
+ import openpmd_api.scipp as pmdsc
.. code:: python
path = "openPMD-example-datasets/example-3d/hdf5/data%T.h5"
+ path = "./data/" + path
.. code:: python
- path = ".data/" + path
-
-.. code:: python
-
- data_loader = pmdsc.DataLoader(path)
+ series = pmd.Series(path, pmd.Access.read_random_access)
+ data_loader = sereies.to_scipp()
print(data_loader.iterations)
::
@@ -338,33 +341,3 @@ Working with particle data
~~~~~~~~~~~~~~~~~~~~~~~~~~
Coming soon!
-
-Developer documentation
------------------------
-
-Generating this README
-~~~~~~~~~~~~~~~~~~~~~~
-
-README file is generated from the README.ipynb.
-
-::
-
- # Download and extract example datasets if not present
- # Will download data into ``.data``
- make data
-
- make docs
-
-Running tests
-~~~~~~~~~~~~~
-
-At the moment we only test we have is an integration test running this
-notebook. After downloading example datasets with ``make data``, if
-needed, run:
-
-::
-
- make test
-
-You can also run tests with different python version with tox, but you
-need to have the python version installed, for example with pyenv.
From 12625838a478ee7757cbb3058ff49a2ad6c1909a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Ordyna?=
Date: Wed, 5 Feb 2025 17:33:34 +0100
Subject: [PATCH 10/24] Add Jupyter Notebook from openpmd_scipp repo
---
examples/15_scipp_loader.ipynb | 381 +++++++++++++++++++++++++++++++++
1 file changed, 381 insertions(+)
create mode 100644 examples/15_scipp_loader.ipynb
diff --git a/examples/15_scipp_loader.ipynb b/examples/15_scipp_loader.ipynb
new file mode 100644
index 0000000000..269aca4835
--- /dev/null
+++ b/examples/15_scipp_loader.ipynb
@@ -0,0 +1,381 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "2e89b77182b6ebd7",
+ "metadata": {},
+ "source": [
+ "# openpmd-scipp\n",
+ "Load openpmd datasets to `scipp` `DataArrays`.\n",
+ "\n",
+ "## Description\n",
+ "### What is this good for?\n",
+ "[`scipp`](https://github.com/scipp/scipp) is an alternative to [`xarray`](https://github.com/pydata/xarray) and provides basically numpy arrays with axes description and units.\n",
+ "* Automatically load axes and units with openPMD data.\n",
+ "* Axes information is automatically updated when slicing, indexing, or filtering your data.\n",
+ "* With `scipp`'s plotting library [`plopp`](https://github.com/scipp/plopp) it becomes an alternative to `openpmd-viewer`.\n",
+ "* Many numpy and some scipy functions including all the basic algebraic operations on arrays are supported by `scipp`. When using these, the units and coordinates are automatically taken care of. \n",
+ "\n",
+ "### Limitations\n",
+ "* `scipp` currently handles units with a library, that does not support non-integer exponents for units. This can become problematic in some calculations. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fc4a0023fa8afef2",
+ "metadata": {},
+ "source": [
+ "\n",
+ "\n",
+ "## Installation\n",
+ "It can be easily installed with pip.\n",
+ "```bash\n",
+ "git clone https://github.com/pordyna/openpmd_scipp.git\n",
+ "cd openpmd-scipp\n",
+ "pip install .\n",
+ "```\n",
+ "\n",
+ "## Getting started\n",
+ "Get example data sets from the `openPMD-example-datasets` repository.\n",
+ "```bash\n",
+ "git clone https://github.com/openPMD/openPMD-example-datasets.git\n",
+ "cd openPMD-example-datasets\n",
+ "tar -zxvf example-2d.tar.gz\n",
+ "tar -zxvf example-3d.tar.gz\n",
+ "```\n",
+ " \n",
+ "\n",
+ "### Opening series"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "initial_id",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import scipp as sc\n",
+ "\n",
+ "import openpmd_scipp as pmdsc"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "67bba264bde780ef",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path = \"openPMD-example-datasets/example-3d/hdf5/data%T.h5\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cc1b21e51bd6e524",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "path = \".data/\" + path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "97abaf499694da8c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_loader = pmdsc.DataLoader(path)\n",
+ "print(data_loader.iterations)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "966866d44f32e381",
+ "metadata": {},
+ "source": [
+ "### Working with meshes (fields)\n",
+ "Let us plot electric field's x component at 65 fs. \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "48d147dd17b96d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Ex = data_loader.get_field(\"E\", \"x\", time=65 * sc.Unit(\"fs\"))\n",
+ "print(Ex)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "69d531a49c1021d4",
+ "metadata": {},
+ "source": [
+ "You may have noticed, that the time requested does not have to match exactly any iteration. By default, if there is an iteration within 10 fs distance it will be used instead. This 10 fs tolerance can be adjusted by setting `time_tolerance`. The check can be also disabled by setting `time_tolerance=None`, with that the method will return the closest iteration regardless of the difference. So that this will also work:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7811da56397eeab6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(data_loader.get_field(\"E\", \"x\", time=20 * sc.Unit(\"fs\"), time_tolerance=20 * sc.Unit(\"fs\")))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f05c2e19a7ea2350",
+ "metadata": {},
+ "source": [
+ ", but `data_loader.get_field('E', 'x', time=20 * sc.Unit('fs'))` not."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "52e59ab2524367f4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# It is also possible to use iteration number instead:\n",
+ "print(data_loader.get_field(\"E\", \"x\", iteration=200))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9f4b4ac390d8a45f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# For scalar fields just omit the second argument:\n",
+ "print(data_loader.get_field(\"rho\", iteration=200))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "524aaf62b6967893",
+ "metadata": {},
+ "source": [
+ "#### Plotting\n",
+ "W can't directly plot 3D data.\n",
+ "But we can for example select a slice. For that we can use a helper function `pmdsc.closest` to get the closets index, since `scipp` requires exact match. You can read more about indexing `scipp` arrays in `scipp`'s documentation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5c35a81329a3cc83",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "slicing_idx = pmdsc.closest(Ex, \"x\", 2 * sc.Unit(\"um\"))\n",
+ "Ex_slice = Ex[\"x\", slicing_idx]\n",
+ "print(Ex_slice)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "60536b5c1b47ff20",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Ex_slice.plot()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "66cea62fcce29f24",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# We can also plot line plots:\n",
+ "Ex_line = Ex_slice[\"z\", pmdsc.closest(Ex_slice, \"z\", 1.4e-5 * sc.Unit(\"m\"))]\n",
+ "print(Ex_line)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b2da76154c389c9f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Ex_line.plot()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c0b3b040b06ab6d9",
+ "metadata": {},
+ "source": [
+ "Alternatively it is possible to work interactively with `plopp`'s tools for visualizing multidimensional data, such as `pp.slicer`or `pp.inspector`.\n",
+ "\n",
+ "#### Doing math\n",
+ "Just as an example we can easily plot the square of the field:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2f8aa64caf8d2a66",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "(Ex_line * Ex_line).plot()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8ba6694fadbe70e5",
+ "metadata": {},
+ "source": [
+ "### Loading chunks\n",
+ "In the above example the whole 3D field is loaded into memory and sliced afterward. It is also possible to just load a sub-chunk into memory. When the `relay` option in `get_field` is set to `True` it will return a dummy object that only allocates memory for a single value. This relay object can be indexed, sliced etc. using the `scipp` indexing just like before. (The only limitation given by the `openpmd-api` is that the result has to a be contiguous chunk of the original array). The `load_data` method loads data and returns a proper `scipp` data array. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4f8585597ab80058",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# The full 3D array is not loaded into memory at this point.\n",
+ "Ex = data_loader.get_field(\"E\", \"x\", time=65 * sc.Unit(\"fs\"), relay=True)\n",
+ "# This time we will select a range rather than a slice.\n",
+ "# For a range there is no need for an exact match.\n",
+ "# But, we could also select a slice just like in the previous example.\n",
+ "Ex = Ex[\"x\", -2e-6 * sc.Unit(\"m\") : 2e-6 * sc.Unit(\"m\")]\n",
+ "# Only now the smaller subset wil be loaded into memory\n",
+ "Ex = Ex.load_data()\n",
+ "print(Ex)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7fe77682f59178ab",
+ "metadata": {},
+ "source": [
+ "### Time axis\n",
+ "It is also possible to combine arrays from different iterations into one using `scipp`'s `concat` function. Here is an example for creating a 4D array from all iterations:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "74e0e0960eb431e4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Ex = sc.concat(\n",
+ " [\n",
+ " data_loader.get_field(\"E\", \"x\", iteration=iteration.value, time_tolerance=None)\n",
+ " for iteration in data_loader.iterations[\"iteration_id\"]\n",
+ " ],\n",
+ " dim=\"t\",\n",
+ ")\n",
+ "print(Ex)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7ea35adaeecae10d",
+ "metadata": {},
+ "source": [
+ "The reason for the z coordinate having two dimensions (t,z) is the fact that the data comes from a moving window simulation. This is clearly visible in the plot below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "97c0e662a6724f16",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Let us just slice at some points to get a 2D dataset\n",
+ "Ex = Ex[\"x\", 10][\"y\", 10]\n",
+ "print(Ex)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "accb22f6ed91f90f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Ex.plot()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6118dd51b20e156e",
+ "metadata": {},
+ "source": [
+ "### Working with particle data\n",
+ "Coming soon!"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "60ebf9bd3826ee54",
+ "metadata": {},
+ "source": [
+ "## Developer documentation\n",
+ "### Generating this README\n",
+ "README file is generated from the README.ipynb.\n",
+ "```\n",
+ "# Download and extract example datasets if not present\n",
+ "# Will download data into `.data`\n",
+ "make data\n",
+ "\n",
+ "make docs\n",
+ "```\n",
+ "### Running tests\n",
+ "At the moment we only test we have is an integration test running this notebook. After downloading example datasets with `make data`, if needed, run:\n",
+ "```\n",
+ "make test\n",
+ "```\n",
+ "You can also run tests with different python version with tox, but you need to have the python version installed, for example with pyenv.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "425c6cc9a98a5878",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
From dc1877c0d2b89b7f64e1c2839bedaf6482075b5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Wed, 5 Feb 2025 17:34:30 +0100
Subject: [PATCH 11/24] Adapt to changes in the openPMD-api repo
---
examples/15_scipp_loader.ipynb | 67 +++++++---------------------------
1 file changed, 13 insertions(+), 54 deletions(-)
diff --git a/examples/15_scipp_loader.ipynb b/examples/15_scipp_loader.ipynb
index 269aca4835..2494984da4 100644
--- a/examples/15_scipp_loader.ipynb
+++ b/examples/15_scipp_loader.ipynb
@@ -28,22 +28,20 @@
"\n",
"\n",
"## Installation\n",
- "It can be easily installed with pip.\n",
+ "The adaptor from openPMD to Scipp is part of the regular openPMD-api Python distribution.\n",
+ "It is loaded lazily and available as soon as `scipp` is also installed.\n",
+ "Plotting functionality requires the additional installation of `plopp`.\n",
+ "\n",
"```bash\n",
- "git clone https://github.com/pordyna/openpmd_scipp.git\n",
- "cd openpmd-scipp\n",
- "pip install .\n",
+ "pip install openpmd_api scipp plopp.\n",
"```\n",
"\n",
"## Getting started\n",
"Get example data sets from the `openPMD-example-datasets` repository.\n",
"```bash\n",
- "git clone https://github.com/openPMD/openPMD-example-datasets.git\n",
- "cd openPMD-example-datasets\n",
- "tar -zxvf example-2d.tar.gz\n",
- "tar -zxvf example-3d.tar.gz\n",
+ "cd openPMD-api\n",
+ "./share/openPMD/download_samples.sh\n",
"```\n",
- " \n",
"\n",
"### Opening series"
]
@@ -57,7 +55,8 @@
"source": [
"import scipp as sc\n",
"\n",
- "import openpmd_scipp as pmdsc"
+ "import openpmd_api as pmd\n",
+ "import openpmd_api.scipp as pmdsc"
]
},
{
@@ -67,17 +66,7 @@
"metadata": {},
"outputs": [],
"source": [
- "path = \"openPMD-example-datasets/example-3d/hdf5/data%T.h5\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "cc1b21e51bd6e524",
- "metadata": {},
- "outputs": [],
- "source": [
- "path = \".data/\" + path"
+ "path = \"../samples/git-sample/data%T.h5\""
]
},
{
@@ -87,7 +76,8 @@
"metadata": {},
"outputs": [],
"source": [
- "data_loader = pmdsc.DataLoader(path)\n",
+ "series = pmd.Series(path, pmd.Access.read_random_access)\n",
+ "data_loader = series.to_scipp()\n",
"print(data_loader.iterations)"
]
},
@@ -165,7 +155,7 @@
"metadata": {},
"source": [
"#### Plotting\n",
- "W can't directly plot 3D data.\n",
+ "We can't directly plot 3D data.\n",
"But we can for example select a slice. For that we can use a helper function `pmdsc.closest` to get the closets index, since `scipp` requires exact match. You can read more about indexing `scipp` arrays in `scipp`'s documentation."
]
},
@@ -325,37 +315,6 @@
"### Working with particle data\n",
"Coming soon!"
]
- },
- {
- "cell_type": "markdown",
- "id": "60ebf9bd3826ee54",
- "metadata": {},
- "source": [
- "## Developer documentation\n",
- "### Generating this README\n",
- "README file is generated from the README.ipynb.\n",
- "```\n",
- "# Download and extract example datasets if not present\n",
- "# Will download data into `.data`\n",
- "make data\n",
- "\n",
- "make docs\n",
- "```\n",
- "### Running tests\n",
- "At the moment we only test we have is an integration test running this notebook. After downloading example datasets with `make data`, if needed, run:\n",
- "```\n",
- "make test\n",
- "```\n",
- "You can also run tests with different python version with tox, but you need to have the python version installed, for example with pyenv.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "425c6cc9a98a5878",
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
From fb0a9adca4c6c46ee63ff484ab828b80f9146ec5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Wed, 5 Feb 2025 17:34:44 +0100
Subject: [PATCH 12/24] Some fixes for documentation
---
docs/source/analysis/scipp.rst | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/docs/source/analysis/scipp.rst b/docs/source/analysis/scipp.rst
index 6436092c99..3b1e7814d8 100644
--- a/docs/source/analysis/scipp.rst
+++ b/docs/source/analysis/scipp.rst
@@ -5,6 +5,10 @@ Scipp
Load openpmd datasets to ``scipp`` ``DataArrays``.
+.. note::
+
+ This documentation page is also available as an interactively executable Jupyter Notebook in ``examples/15_scipp_loader.ipynb``.
+
What is this good for?
~~~~~~~~~~~~~~~~~~~~~~
@@ -63,13 +67,12 @@ Opening series
.. code:: python
- path = "openPMD-example-datasets/example-3d/hdf5/data%T.h5"
- path = "./data/" + path
+ path = "../samples/git-sample/data%T.h5"
.. code:: python
series = pmd.Series(path, pmd.Access.read_random_access)
- data_loader = sereies.to_scipp()
+ data_loader = series.to_scipp()
print(data_loader.iterations)
::
From 10eec0270139f7dbb149aba97fb4eb277a22e188 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Wed, 5 Feb 2025 18:07:19 +0100
Subject: [PATCH 13/24] Try adding some scipp runners to the CI
---
.github/workflows/linux.yml | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 53daa723c7..f336918594 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -133,6 +133,7 @@ jobs:
sudo apt install clang-14 cmake gfortran libhdf5-dev python3.11 python3.11-dev wget
sudo .github/workflows/dependencies/install_spack
python3.11 -m pip install numpy pandas
+ python -m pip install scipp plopp
git clone -b v4.0.3 https://github.com/ToruNiina/toml11
cmake -S toml11 -B build_toml11 \
-DCMAKE_INSTALL_PREFIX=toml11_install \
@@ -229,6 +230,8 @@ jobs:
python3 -m pip install -U pandas
python3 -m pip install -U dask
python3 -m pip install -U pyarrow
+ python3 -m pip install -U plopp
+ python3 -m pip install -U scipp
- name: Build
env: {CC: gcc-7, CXX: g++-7, CXXFLAGS: -Werror}
run: |
@@ -282,6 +285,8 @@ jobs:
apk update
apk add hdf5-dev
python3.10 -m pip install numpy
+ python -m pip install scipp
+ python -m pip install plopp
- name: Build
env: {CXXFLAGS: -Werror}
run: |
From 9449631431d57b3b30d8c657b5e512d6276d2370 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Tue, 11 Feb 2025 18:18:33 +0100
Subject: [PATCH 14/24] scipp apparently unsupported on musllinux
maybe report
---
.github/workflows/linux.yml | 2 --
1 file changed, 2 deletions(-)
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index f336918594..17d6aa069e 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -285,8 +285,6 @@ jobs:
apk update
apk add hdf5-dev
python3.10 -m pip install numpy
- python -m pip install scipp
- python -m pip install plopp
- name: Build
env: {CXXFLAGS: -Werror}
run: |
From 7c63412e0ec3dac53afcaa64372faa2888989ba4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Tue, 11 Feb 2025 18:47:07 +0100
Subject: [PATCH 15/24] Remove non-ASCII character
---
src/binding/python/openpmd_api/scipp/utils.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/binding/python/openpmd_api/scipp/utils.py b/src/binding/python/openpmd_api/scipp/utils.py
index 7f47645a59..a29cfe4024 100644
--- a/src/binding/python/openpmd_api/scipp/utils.py
+++ b/src/binding/python/openpmd_api/scipp/utils.py
@@ -22,7 +22,7 @@ def _unit_dimension_to_scipp(unit_dimension):
:param tuple unit_dimension: A tuple containing seven integers, each representing
the power of a base SI unit in the order: (length, mass, time, electric current,
thermodynamic temperature, amount of substance, luminous intensity).
- For example, (1, 0, -2, 0, 0, 0, 0) corresponds to meters per second squared (m/s²).
+ For example, (1, 0, -2, 0, 0, 0, 0) corresponds to meters per second squared (m/s^2).
:returns: A Scipp unit object representing the combined unit as specified by the input
unit dimensions.
From 26d7d048e91b16ebcc4b177aed5bd21a6d296e22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Ordyna?=
Date: Tue, 11 Feb 2025 18:56:35 +0100
Subject: [PATCH 16/24] Documentation: 80 chars line limit
---
.../python/openpmd_api/scipp/__init__.py | 3 +-
.../python/openpmd_api/scipp/loader.py | 24 +++++++----
.../python/openpmd_api/scipp/mesh_loader.py | 39 ++++++++++-------
src/binding/python/openpmd_api/scipp/utils.py | 43 +++++++++++--------
4 files changed, 64 insertions(+), 45 deletions(-)
diff --git a/src/binding/python/openpmd_api/scipp/__init__.py b/src/binding/python/openpmd_api/scipp/__init__.py
index 66019bd413..f74fe90536 100644
--- a/src/binding/python/openpmd_api/scipp/__init__.py
+++ b/src/binding/python/openpmd_api/scipp/__init__.py
@@ -1,4 +1,5 @@
-"""openpmd_scipp: A Python package for loading openPMD datasets into scipp DataArrays.
+"""openpmd_scipp: A Python package for loading openPMD datasets
+ into scipp DataArrays.
See README.md for documentation
diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py
index 45bbf5cff6..6b909e9b5a 100644
--- a/src/binding/python/openpmd_api/scipp/loader.py
+++ b/src/binding/python/openpmd_api/scipp/loader.py
@@ -35,7 +35,8 @@ def get_iterations(series):
:param series: The openPMD series containing the data.
:type series: openpmd_api.Series
- :return: A Scipp dataset containing iteration IDs and their corresponding times.
+ :return: A Scipp dataset containing iteration IDs and their
+ corresponding times.
:rtype: sc.Dataset
"""
t = get_time_axis(series)
@@ -56,20 +57,23 @@ class DataLoader:
The data can be retrieved as a DataRelay object or directly as a DataArray.
Attributes:
- series (openpmd_api.Series): The openPMD series initialized from the file path.
- iterations (sc.Dataset): A dataset containing iteration IDs and their corresponding times.
+ series (openpmd_api.Series): The openPMD series initialized from
+ the file path.
+ iterations (sc.Dataset): A dataset containing iteration IDs
+ and their corresponding times.
"""
def __init__(self, series):
- """Initialize the DataLoader with an openPMD series from the specified file path.
+ """Initialize the DataLoader with an openPMD series from
+ the specified file path.
:param path: The file path to the openPMD data file.
:type path: str
- Initializes the `series` attribute as an openPMD series in read-only mode
- and the `iterations` attribute as a Scipp dataset containing iteration IDs
- and their corresponding times.
+ Initializes the `series` attribute as an openPMD series in
+ read-only mode and the `iterations` attribute as a Scipp dataset
+ containing iteration IDs and their corresponding times.
"""
self.series = series
self.iterations = get_iterations(self.series)
@@ -122,9 +126,11 @@ def get_field(
# handle integer inputs
time = time.astype("double")
time_tolerance = time_tolerance.astype("double")
- time = time.to(unit=self.iterations["iteration_id"].coords["t"].unit)
+ time = time.to(
+ unit=self.iterations["iteration_id"].coords["t"].unit)
try:
- iteration = int(self.iterations["iteration_id"]["t", time].value)
+ iteration = int(
+ self.iterations["iteration_id"]["t", time].value)
except IndexError:
idx = closest(self.iterations["iteration_id"], "t", time)
iteration = self.iterations["iteration_id"]["t", idx]
diff --git a/src/binding/python/openpmd_api/scipp/mesh_loader.py b/src/binding/python/openpmd_api/scipp/mesh_loader.py
index e18178bd9b..790a13a27d 100644
--- a/src/binding/python/openpmd_api/scipp/mesh_loader.py
+++ b/src/binding/python/openpmd_api/scipp/mesh_loader.py
@@ -40,8 +40,8 @@ class DataRelay(sc.DataArray):
def _verify_init(self):
"""Verify that the data is contiguous.
- Check if the chosen subset is contiguous in the openPMD storage by checking coordinate
- differences against the expected grid spacing.
+ Check if the chosen subset is contiguous in the openPMD storage
+ by checking coordinate differences against the expected grid spacing.
This is needed since openPMD does nto allow us to load strided chunks.
"""
@@ -64,11 +64,13 @@ def __init__(self, series, record, record_component, dummy_array, coords):
:type series: openpmd_api.Series
:param record: The openPMD record object associated with the mesh.
:type record: openpmd_api.Record
- :param record_component: The openPMD record component associated with the mesh.
+ :param record_component: The openPMD record component
+ associated with the mesh.
:type record_component: openpmd_api.Record_Component
- :param dummy_array: A scipp array used for the dummy interface. It should use as little
- memory as possible. Usually achieved by setting the stride of the values array to 0. Can
- be read- only.
+ :param dummy_array: A scipp array used for the dummy interface.
+ It should use as little memory as possible.
+ Usually achieved by setting the stride of the values array to 0.
+ Can be read- only.
:type dummy_array: sc.array
:param coords: A dictionary of coordinates for the DataArray.
"""
@@ -81,8 +83,9 @@ def __init__(self, series, record, record_component, dummy_array, coords):
def __getitem__(self, *args, **kwargs):
"""Retrieve a subset of the data, returning a new DataRelay instance.
- Override this method from the base class to use the DataRelay initializer and ensure that
- DataRelay is returned and the _verify_init method is used.
+ Override this method from the base class to use the DataRelay
+ initializer and ensure that DataRelay is returned and
+ the _verify_init method is used.
:param args: Forwarded to the base class.
:type args: tuple
@@ -105,9 +108,9 @@ def load_data(self):
Loads a chunk based on the current data array coordinates.
- Calculates the offset and extent for each dimension using the data array coordinates. Loads
- the data chunk from the record component, scales it by the unit SI, and returns a new data
- array with loaded values.
+ Calculates the offset and extent for each dimension using the data array
+ coordinates. Loads the data chunk from the record component, scales it
+ by the unit SI, and returns a new data array with loaded values.
:return: The DataArray instance with the loaded data.
:rtype: DataRelay
@@ -137,7 +140,8 @@ def load_data(self):
def get_field_data_relay(series, iteration, field, component=None):
"""Get openPMD mesh as a data relay.
- Create a DataRelay object for a specified field and component in an openPMD series.
+ Create a DataRelay object for a specified field and component in
+ an openPMD series.
:param series: The openPMD series containing the data.
:type series: openpmd_api.Series
@@ -147,7 +151,8 @@ def get_field_data_relay(series, iteration, field, component=None):
:type field: str
:param component: The component of the field to retrieve, default is SCALAR.
:type component: openpmd_api.Mesh_Record_Component, optional
- :return: A DataRelay instance initialized with the specified field and component data.
+ :return: A DataRelay instance initialized with the specified field
+ and component data.
:rtype: DataRelay
"""
record = series.iterations[iteration].meshes[field]
@@ -175,7 +180,8 @@ def get_field_data_relay(series, iteration, field, component=None):
small, shape=rc.shape, strides=[0] * rc.ndim, writeable=False
)
dummy_array = sc.array(
- dims=dims, values=dummy_array, unit=_unit_dimension_to_scipp(record.unit_dimension)
+ dims=dims, values=dummy_array, unit=_unit_dimension_to_scipp(
+ record.unit_dimension)
)
return DataRelay(
@@ -186,8 +192,9 @@ def get_field_data_relay(series, iteration, field, component=None):
def get_field(series, iteration, field, component=None):
"""Retrieve and load openPMD mesh data without slicing.
- This function creates a DataRelay object for a specified field and component in an openPMD
- series, loads the whole mesh, and returns the resulting DataArray.
+ This function creates a DataRelay object for a specified field
+ and component in an openPMD series, loads the whole mesh, and returns
+ the resulting DataArray.
:param series: The openPMD series containing the data.
:type series: openpmd_api.Series
diff --git a/src/binding/python/openpmd_api/scipp/utils.py b/src/binding/python/openpmd_api/scipp/utils.py
index a29cfe4024..a3f300e902 100644
--- a/src/binding/python/openpmd_api/scipp/utils.py
+++ b/src/binding/python/openpmd_api/scipp/utils.py
@@ -14,18 +14,20 @@
def _unit_dimension_to_scipp(unit_dimension):
"""Convert a unit dimension from the openPMD standard to a Scipp unit.
- This function takes a tuple representing the powers of the seven base SI units
- (length, mass, time, electric current, thermodynamic temperature, amount of substance,
- and luminous intensity) and converts it into a Scipp unit. The conversion is based on
- the openPMD standard, which describes units as powers of these base measures.
-
- :param tuple unit_dimension: A tuple containing seven integers, each representing
- the power of a base SI unit in the order: (length, mass, time, electric current,
- thermodynamic temperature, amount of substance, luminous intensity).
- For example, (1, 0, -2, 0, 0, 0, 0) corresponds to meters per second squared (m/s^2).
-
- :returns: A Scipp unit object representing the combined unit as specified by the input
- unit dimensions.
+ This function takes a tuple representing the powers of the seven base SI
+ units (length, mass, time, electric current, thermodynamic temperature,
+ amount of substance, and luminous intensity) and converts it into
+ a Scipp unit. The conversion is based on the openPMD standard,
+ which describes units as powers of these base measures.
+
+ :param tuple unit_dimension: A tuple containing seven integers, each
+ representing the power of a base SI unit in the order: (length, mass, time,
+ electric current, thermodynamic temperature, amount of substance,
+ luminous intensity). For example, (1, 0, -2, 0, 0, 0, 0) corresponds
+ to meters per second squared (m/s^2).
+
+ :returns: A Scipp unit object representing the combined unit as specified
+ by the input unit dimensions.
:rtype: sc.Unit
:example:
@@ -34,7 +36,8 @@ def _unit_dimension_to_scipp(unit_dimension):
:notes:
- The function assumes that the input tuple has exactly seven elements.
- - Each element in the tuple corresponds to the power of a specific base unit.
+ - Each element in the tuple corresponds to the power of
+ a specific base unit.
"""
# unit dimension description from the openPMD standard:
# powers of the 7 base measures characterizing the record's unit in SI
@@ -57,19 +60,20 @@ def _unit_dimension_to_scipp(unit_dimension):
def closest(data, dim, val):
- """Find the index of the closest value in a dataset along a specified dimension.
+ """Find the index of the closest value in a dataset
+ along a specified dimension.
This function calculates the index of the element in the specified dimension
of the dataset that is closest to the given value. It ensures that the value
- is converted to the same unit as the dimension's coordinate before performing
- the comparison.
+ is converted to the same unit as the dimension's coordinate before
+ performing the comparison.
:param data: The data array containing the dimension to search.
:type data: sc.DataArray
:param dim: The name of the dimension along which to find the closest value.
:type dim: str
- :param val: The value to compare against, which will be converted to the unit of
- the dimension's coordinate.
+ :param val: The value to compare against, which will be converted to the
+ unit of the dimension's coordinate.
:type val: sc.Variable
:return: The index of the closest value in the specified dimension.
@@ -78,7 +82,8 @@ def closest(data, dim, val):
:notes:
- The function assumes that `val` can be converted to the unit of the
dimension's coordinate.
- - The dataset `data` must have coordinates defined for the specified dimension.
+ - The dataset `data` must have coordinates defined
+ for the specified dimension.
"""
val = val.astype("double")
coord = data.coords[dim]
From 9b3e45dc3b2d705c467b5e8c220efd9d3c44a7e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Ordyna?=
Date: Tue, 11 Feb 2025 19:03:46 +0100
Subject: [PATCH 17/24] 80 chars line limit: code
---
.../python/openpmd_api/scipp/loader.py | 20 ++++++++++++++-----
.../python/openpmd_api/scipp/mesh_loader.py | 11 ++++++----
2 files changed, 22 insertions(+), 9 deletions(-)
diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py
index 6b909e9b5a..864d6ac4fb 100644
--- a/src/binding/python/openpmd_api/scipp/loader.py
+++ b/src/binding/python/openpmd_api/scipp/loader.py
@@ -25,7 +25,8 @@ def get_time_axis(series):
:rtype: sc.DataArray
"""
t = [
- series.iterations[it].time * series.iterations[it].time_unit_SI for it in series.iterations
+ series.iterations[it].time * series.iterations[it].time_unit_SI \
+ for it in series.iterations
]
return sc.array(dims=["t"], values=t, unit="s", dtype="double")
@@ -43,7 +44,9 @@ def get_iterations(series):
return sc.Dataset(
data={
"iteration_id": sc.DataArray(
- data=sc.array(dims=["t"], values=list(series.iterations)), coords={"t": t}
+ data=sc.array(dims=["t"],
+ values=list(series.iterations)),
+ coords={"t": t}
)
}
)
@@ -137,7 +140,8 @@ def get_field(
assert time_tolerance is None or sc.abs(
iteration.coords["t"] - time
) <= time_tolerance.to(unit=time.unit), (
- f"No iteration found within time_tolerance={time_tolerance}."
+ f"No iteration found "
+ f"within time_tolerance={time_tolerance}."
)
print(
"Series does not contain iteration at the exact time. "
@@ -148,9 +152,15 @@ def get_field(
if relay:
return get_field_data_relay(
- series=self.series, field=field, component=component, iteration=iteration
+ series=self.series,
+ field=field,
+ component=component,
+ iteration=iteration
)
else:
return get_field(
- series=self.series, field=field, component=component, iteration=iteration
+ series=self.series,
+ field=field,
+ component=component,
+ iteration=iteration
)
diff --git a/src/binding/python/openpmd_api/scipp/mesh_loader.py b/src/binding/python/openpmd_api/scipp/mesh_loader.py
index 790a13a27d..9bdcb756fa 100644
--- a/src/binding/python/openpmd_api/scipp/mesh_loader.py
+++ b/src/binding/python/openpmd_api/scipp/mesh_loader.py
@@ -158,9 +158,8 @@ def get_field_data_relay(series, iteration, field, component=None):
record = series.iterations[iteration].meshes[field]
rc = record[component] if component else record
dims = record.axis_labels
- time = (series.iterations[iteration].time + record.time_offset) * series.iterations[
- iteration
- ].time_unit_SI
+ time = (series.iterations[iteration].time + record.time_offset) \
+ * series.iterations[iteration].time_unit_SI
time = sc.scalar(time, unit="s", dtype="double")
coords = {"t": time}
for dd, dim in enumerate(dims):
@@ -185,7 +184,11 @@ def get_field_data_relay(series, iteration, field, component=None):
)
return DataRelay(
- series=series, record=record, record_component=rc, dummy_array=dummy_array, coords=coords
+ series=series,
+ record=record,
+ record_component=rc,
+ dummy_array=dummy_array,
+ coords=coords
)
From aa4719a8166d51325edc8accade34876cd758706 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Wed, 12 Feb 2025 10:59:10 +0100
Subject: [PATCH 18/24] Compatibility with Python 3.8
---
src/binding/python/openpmd_api/scipp/utils.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/binding/python/openpmd_api/scipp/utils.py b/src/binding/python/openpmd_api/scipp/utils.py
index a3f300e902..c75f075f29 100644
--- a/src/binding/python/openpmd_api/scipp/utils.py
+++ b/src/binding/python/openpmd_api/scipp/utils.py
@@ -7,6 +7,8 @@
GPL - 3.0 license. See LICENSE file for details.
"""
+from sys import version_info
+
import numpy as np
import scipp as sc
@@ -53,7 +55,9 @@ def _unit_dimension_to_scipp(unit_dimension):
1.0 * sc.Unit("cd"),
)
unit = 1.0 * sc.Unit("1")
- for dim, base_unit in zip(unit_dimension, base_units, strict=False):
+ zipped = zip(unit_dimension, base_units) if version_info < (
+ 3, 10) else zip(unit_dimension, base_units, strict=False)
+ for dim, base_unit in zipped:
if dim != 0:
unit *= base_unit**dim
return unit.unit
From 3bde4fb17d31e8876086296fa3c76902a1cb576a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Ordyna?=
Date: Wed, 12 Feb 2025 11:05:23 +0100
Subject: [PATCH 19/24] 79 chars instead of 80..
---
src/binding/python/openpmd_api/scipp/loader.py | 7 ++++---
.../python/openpmd_api/scipp/mesh_loader.py | 13 ++++++++-----
src/binding/python/openpmd_api/scipp/utils.py | 15 ++++++++-------
3 files changed, 20 insertions(+), 15 deletions(-)
diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py
index 864d6ac4fb..f39a6f9e2f 100644
--- a/src/binding/python/openpmd_api/scipp/loader.py
+++ b/src/binding/python/openpmd_api/scipp/loader.py
@@ -55,9 +55,10 @@ def get_iterations(series):
class DataLoader:
"""DataLoader class for loading and retrieving openPMD mesh data.
- This class initializes an openPMD series from a given file path and provides
- methods to retrieve mesh data fields either by iteration index or by time.
- The data can be retrieved as a DataRelay object or directly as a DataArray.
+ This class initializes an openPMD series from a given file path and
+ provides methods to retrieve mesh data fields either by iteration index or
+ by time. The data can be retrieved as a DataRelay object or directly as a
+ DataArray.
Attributes:
series (openpmd_api.Series): The openPMD series initialized from
diff --git a/src/binding/python/openpmd_api/scipp/mesh_loader.py b/src/binding/python/openpmd_api/scipp/mesh_loader.py
index 9bdcb756fa..bee54067f0 100644
--- a/src/binding/python/openpmd_api/scipp/mesh_loader.py
+++ b/src/binding/python/openpmd_api/scipp/mesh_loader.py
@@ -108,9 +108,10 @@ def load_data(self):
Loads a chunk based on the current data array coordinates.
- Calculates the offset and extent for each dimension using the data array
- coordinates. Loads the data chunk from the record component, scales it
- by the unit SI, and returns a new data array with loaded values.
+ Calculates the offset and extent for each dimension using the data
+ array coordinates. Loads the data chunk from the record component,
+ scales it by the unit SI, and returns a new data array with loaded
+ values.
:return: The DataArray instance with the loaded data.
:rtype: DataRelay
@@ -149,7 +150,8 @@ def get_field_data_relay(series, iteration, field, component=None):
:type iteration: int
:param field: The name of the field to retrieve.
:type field: str
- :param component: The component of the field to retrieve, default is SCALAR.
+ :param component: The component of the field to retrieve,
+ default is SCALAR.
:type component: openpmd_api.Mesh_Record_Component, optional
:return: A DataRelay instance initialized with the specified field
and component data.
@@ -205,7 +207,8 @@ def get_field(series, iteration, field, component=None):
:type iteration: int
:param field: The name of the field to retrieve.
:type field: str
- :param component: The component of the field to retrieve, default is SCALAR.
+ :param component: The component of the field to retrieve,
+ default is SCALAR.
:type component: openpmd_api.Mesh_Record_Component, optional
:return: A DataArray instance with the loaded data.
:rtype: DataRelay
diff --git a/src/binding/python/openpmd_api/scipp/utils.py b/src/binding/python/openpmd_api/scipp/utils.py
index c75f075f29..5d8266f3aa 100644
--- a/src/binding/python/openpmd_api/scipp/utils.py
+++ b/src/binding/python/openpmd_api/scipp/utils.py
@@ -43,8 +43,8 @@ def _unit_dimension_to_scipp(unit_dimension):
"""
# unit dimension description from the openPMD standard:
# powers of the 7 base measures characterizing the record's unit in SI
- # (length L, mass M, time T, electric current I, thermodynamic temperature theta,
- # amount of substance N, luminous intensity J)
+ # (length L, mass M, time T, electric current I, thermodynamic
+ # temperature theta, amount of substance N, luminous intensity J)
base_units = (
1.0 * sc.Unit("m"),
1.0 * sc.Unit("kg"),
@@ -67,14 +67,15 @@ def closest(data, dim, val):
"""Find the index of the closest value in a dataset
along a specified dimension.
- This function calculates the index of the element in the specified dimension
- of the dataset that is closest to the given value. It ensures that the value
- is converted to the same unit as the dimension's coordinate before
- performing the comparison.
+ This function calculates the index of the element in the specified
+ dimension of the dataset that is closest to the given value. It ensures
+ that the value is converted to the same unit as the dimension's coordinate
+ before performing the comparison.
:param data: The data array containing the dimension to search.
:type data: sc.DataArray
- :param dim: The name of the dimension along which to find the closest value.
+ :param dim: The name of the dimension along which to find
+ the closest value.
:type dim: str
:param val: The value to compare against, which will be converted to the
unit of the dimension's coordinate.
From 153d0134804fae687bb9cbe0aa0f14033f3a5690 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Wed, 12 Feb 2025 11:07:33 +0100
Subject: [PATCH 20/24] Fix unused import
---
examples/15_scipp_loader.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/examples/15_scipp_loader.py b/examples/15_scipp_loader.py
index 874a0b7b08..f0c5a75745 100644
--- a/examples/15_scipp_loader.py
+++ b/examples/15_scipp_loader.py
@@ -7,6 +7,7 @@ def main():
try:
scipp_loader = series.to_scipp()
import plopp
+ print("Plopp version:", plopp.__version__)
except ImportError:
print("Need to install scipp and plopp to run this example.")
return
From fa98c146950366a213936f16753ec00c8d278ee7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Wed, 12 Feb 2025 11:14:27 +0100
Subject: [PATCH 21/24] Style fixes
f
---
examples/15_scipp_loader.py | 25 ++++++++++---------
.../python/openpmd_api/ScippLazyInit.py | 6 +++--
src/binding/python/openpmd_api/__init__.py | 2 +-
.../python/openpmd_api/scipp/loader.py | 1 -
4 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/examples/15_scipp_loader.py b/examples/15_scipp_loader.py
index f0c5a75745..05d57daa02 100644
--- a/examples/15_scipp_loader.py
+++ b/examples/15_scipp_loader.py
@@ -2,12 +2,12 @@
def main():
- series = pmd.Series("../samples/git-sample/data%T.h5", pmd.Access.read_only)
+ series = pmd.Series("../samples/git-sample/data%T.h5",
+ pmd.Access.read_only)
try:
scipp_loader = series.to_scipp()
- import plopp
- print("Plopp version:", plopp.__version__)
+ import plopp # noqa
except ImportError:
print("Need to install scipp and plopp to run this example.")
return
@@ -22,7 +22,8 @@ def main():
Ex_slice = Ex["x", slicing_idx]
print(Ex_slice)
Ex_slice.plot().save("slice.png")
- Ex_line = Ex_slice["z", pmdsc.closest(Ex_slice, "z", 1.4e-5 * sc.Unit("m"))]
+ Ex_line = Ex_slice["z", pmdsc.closest(
+ Ex_slice, "z", 1.4e-5 * sc.Unit("m"))]
print(Ex_line)
Ex_line.plot().save("line.png")
(Ex_line * Ex_line).plot().save("line_squared.png")
@@ -32,18 +33,17 @@ def main():
# This time we will select a range rather than a slice.
# For a range there is no need for an exact match.
# But, we could also select a slice just like in the previous example.
- Ex = Ex["x", -2e-6 * sc.Unit("m") : 2e-6 * sc.Unit("m")]
+ Ex = Ex["x", -2e-6 * sc.Unit("m"): 2e-6 * sc.Unit("m")]
# Only now the smaller subset wil be loaded into memory
Ex = Ex.load_data()
print(Ex)
- Ex = sc.concat(
- [
- scipp_loader.get_field("E", "x", iteration=iteration.value, time_tolerance=None)
- for iteration in scipp_loader.iterations["iteration_id"]
- ],
- dim="t",
- )
+ Ex = sc.concat([scipp_loader.get_field(
+ "E", "x", iteration=iteration.value,
+ time_tolerance=None)
+ for iteration in scipp_loader.iterations
+ ["iteration_id"]],
+ dim="t",)
print(Ex)
# Let us just slice at some points to get a 2D dataset
@@ -51,5 +51,6 @@ def main():
print(Ex)
Ex.plot().save("moving_window.png")
+
if __name__ == "__main__":
main()
diff --git a/src/binding/python/openpmd_api/ScippLazyInit.py b/src/binding/python/openpmd_api/ScippLazyInit.py
index e2c3832ddf..49f7aea69a 100644
--- a/src/binding/python/openpmd_api/ScippLazyInit.py
+++ b/src/binding/python/openpmd_api/ScippLazyInit.py
@@ -2,14 +2,16 @@ def series_to_scipp(series):
# lazy import
try:
- import scipp
+ import scipp # noqa
found_scipp = True
except ImportError as original_error:
found_scipp = False
original_error_string = f"{original_error}"
if not found_scipp:
- raise ImportError(f"Scipp NOT found. Install scipp for Scipp support. Original error: {original_error_string}")
+ raise ImportError(
+ f"Scipp NOT found. Install scipp for Scipp support. "
+ f"Original error: {original_error_string}")
from .scipp import DataLoader
diff --git a/src/binding/python/openpmd_api/__init__.py b/src/binding/python/openpmd_api/__init__.py
index 4fca249e68..572feedecc 100644
--- a/src/binding/python/openpmd_api/__init__.py
+++ b/src/binding/python/openpmd_api/__init__.py
@@ -17,7 +17,7 @@
Record_Component.to_dask_array = record_component_to_daskarray # noqa
Series.to_df = iterations_to_dataframe # noqa
Series.to_cudf = iterations_to_cudf # noqa
-Series.to_scipp = series_to_scipp
+Series.to_scipp = series_to_scipp # noqa
# TODO remove in future versions (deprecated)
Access_Type = Access # noqa
diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py
index f39a6f9e2f..63e06a998e 100644
--- a/src/binding/python/openpmd_api/scipp/loader.py
+++ b/src/binding/python/openpmd_api/scipp/loader.py
@@ -9,7 +9,6 @@
GPL - 3.0 license. See LICENSE file for details.
"""
-import openpmd_api as pmd
import scipp as sc
from .mesh_loader import get_field, get_field_data_relay
From 9c1b184a47a52473cef929b293ed4ee9288a3354 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Ordyna?=
Date: Wed, 12 Feb 2025 11:17:27 +0100
Subject: [PATCH 22/24] Style fixes
p
---
src/binding/python/openpmd_api/scipp/__init__.py | 4 ++--
src/binding/python/openpmd_api/scipp/loader.py | 7 +++----
src/binding/python/openpmd_api/scipp/mesh_loader.py | 3 ++-
3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/src/binding/python/openpmd_api/scipp/__init__.py b/src/binding/python/openpmd_api/scipp/__init__.py
index f74fe90536..bb264b6bab 100644
--- a/src/binding/python/openpmd_api/scipp/__init__.py
+++ b/src/binding/python/openpmd_api/scipp/__init__.py
@@ -10,5 +10,5 @@
GPL - 3.0 license. See LICENSE file for details.
"""
-from .loader import DataLoader as DataLoader
-from .utils import closest as closest
+from .loader import DataLoader as DataLoader # noqa
+from .utils import closest as closest # noqa
diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py
index 63e06a998e..7a74a18060 100644
--- a/src/binding/python/openpmd_api/scipp/loader.py
+++ b/src/binding/python/openpmd_api/scipp/loader.py
@@ -24,8 +24,8 @@ def get_time_axis(series):
:rtype: sc.DataArray
"""
t = [
- series.iterations[it].time * series.iterations[it].time_unit_SI \
- for it in series.iterations
+ series.iterations[it].time * series.iterations[it].time_unit_SI
+ for it in series.iterations
]
return sc.array(dims=["t"], values=t, unit="s", dtype="double")
@@ -43,8 +43,7 @@ def get_iterations(series):
return sc.Dataset(
data={
"iteration_id": sc.DataArray(
- data=sc.array(dims=["t"],
- values=list(series.iterations)),
+ data=sc.array(dims=["t"], values=list(series.iterations)),
coords={"t": t}
)
}
diff --git a/src/binding/python/openpmd_api/scipp/mesh_loader.py b/src/binding/python/openpmd_api/scipp/mesh_loader.py
index bee54067f0..f7bd1e6d63 100644
--- a/src/binding/python/openpmd_api/scipp/mesh_loader.py
+++ b/src/binding/python/openpmd_api/scipp/mesh_loader.py
@@ -213,4 +213,5 @@ def get_field(series, iteration, field, component=None):
:return: A DataArray instance with the loaded data.
:rtype: DataRelay
"""
- return get_field_data_relay(series, iteration, field, component).load_data()
+ return get_field_data_relay(
+ series, iteration, field, component).load_data()
From 12e9ad3ea0e7b24adad22e23f0487232529a5371 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Wed, 12 Feb 2025 11:26:15 +0100
Subject: [PATCH 23/24] Fix plopp import again..
---
examples/15_scipp_loader.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/examples/15_scipp_loader.py b/examples/15_scipp_loader.py
index 05d57daa02..699a4adcd1 100644
--- a/examples/15_scipp_loader.py
+++ b/examples/15_scipp_loader.py
@@ -8,6 +8,7 @@ def main():
try:
scipp_loader = series.to_scipp()
import plopp # noqa
+ print("Plopp version:", plopp.__version__)
except ImportError:
print("Need to install scipp and plopp to run this example.")
return
From 124b080a5230fe42221a3c4bf80177ba5bc9cb21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20P=C3=B6schel?=
Date: Fri, 28 Feb 2025 15:13:21 +0100
Subject: [PATCH 24/24] Change/add license info
License: LGPLv3+
---
examples/15_scipp_loader.py | 8 ++++++++
src/binding/python/openpmd_api/ScippLazyInit.py | 9 +++++++++
src/binding/python/openpmd_api/scipp/__init__.py | 3 +--
src/binding/python/openpmd_api/scipp/loader.py | 3 +--
src/binding/python/openpmd_api/scipp/mesh_loader.py | 3 +--
src/binding/python/openpmd_api/scipp/utils.py | 3 +--
6 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/examples/15_scipp_loader.py b/examples/15_scipp_loader.py
index 699a4adcd1..d68a83eb23 100644
--- a/examples/15_scipp_loader.py
+++ b/examples/15_scipp_loader.py
@@ -1,3 +1,11 @@
+"""
+This file is part of the openPMD-api.
+
+Copyright 2025 openPMD contributors
+Authors: Franz Poeschel, Pawel Ordyna
+License: LGPLv3+
+"""
+
import openpmd_api as pmd
diff --git a/src/binding/python/openpmd_api/ScippLazyInit.py b/src/binding/python/openpmd_api/ScippLazyInit.py
index 49f7aea69a..9ec90bcc2a 100644
--- a/src/binding/python/openpmd_api/ScippLazyInit.py
+++ b/src/binding/python/openpmd_api/ScippLazyInit.py
@@ -1,3 +1,12 @@
+"""
+This file is part of the openPMD-api.
+
+Copyright 2025 openPMD contributors
+Authors: Franz Poeschel
+License: LGPLv3+
+"""
+
+
def series_to_scipp(series):
# lazy import
diff --git a/src/binding/python/openpmd_api/scipp/__init__.py b/src/binding/python/openpmd_api/scipp/__init__.py
index bb264b6bab..25c1314dde 100644
--- a/src/binding/python/openpmd_api/scipp/__init__.py
+++ b/src/binding/python/openpmd_api/scipp/__init__.py
@@ -6,8 +6,7 @@
Author:
Pawel Ordyna
-License:
-GPL - 3.0 license. See LICENSE file for details.
+License: LGPLv3+
"""
from .loader import DataLoader as DataLoader # noqa
diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py
index 7a74a18060..aab80feeb7 100644
--- a/src/binding/python/openpmd_api/scipp/loader.py
+++ b/src/binding/python/openpmd_api/scipp/loader.py
@@ -5,8 +5,7 @@
Author:
Pawel Ordyna
-License:
-GPL - 3.0 license. See LICENSE file for details.
+License: LGPLv3+
"""
import scipp as sc
diff --git a/src/binding/python/openpmd_api/scipp/mesh_loader.py b/src/binding/python/openpmd_api/scipp/mesh_loader.py
index f7bd1e6d63..57978f4287 100644
--- a/src/binding/python/openpmd_api/scipp/mesh_loader.py
+++ b/src/binding/python/openpmd_api/scipp/mesh_loader.py
@@ -3,8 +3,7 @@
Author:
Pawel Ordyna
-License:
-GPL - 3.0 license. See LICENSE file for details.
+License: LGPLv3+
"""
import numpy as np
diff --git a/src/binding/python/openpmd_api/scipp/utils.py b/src/binding/python/openpmd_api/scipp/utils.py
index 5d8266f3aa..ac66880477 100644
--- a/src/binding/python/openpmd_api/scipp/utils.py
+++ b/src/binding/python/openpmd_api/scipp/utils.py
@@ -3,8 +3,7 @@
Author:
Pawel Ordyna
-License:
-GPL - 3.0 license. See LICENSE file for details.
+License: LGPLv3+
"""
from sys import version_info