From d47d6d848ec670da6c6ee0dc43e5f7491aed39b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Ordyna?= Date: Fri, 31 Jan 2025 17:54:36 +0100 Subject: [PATCH 01/24] Add Code from openpmd_scipp repository https://github.com/pordyna/openpmd_scipp --- .../python/openpmd_api/scipp/__init__.py | 13 ++ .../python/openpmd_api/scipp/loader.py | 150 +++++++++++++ .../python/openpmd_api/scipp/mesh_loader.py | 204 ++++++++++++++++++ src/binding/python/openpmd_api/scipp/utils.py | 86 ++++++++ 4 files changed, 453 insertions(+) create mode 100644 src/binding/python/openpmd_api/scipp/__init__.py create mode 100644 src/binding/python/openpmd_api/scipp/loader.py create mode 100644 src/binding/python/openpmd_api/scipp/mesh_loader.py create mode 100644 src/binding/python/openpmd_api/scipp/utils.py diff --git a/src/binding/python/openpmd_api/scipp/__init__.py b/src/binding/python/openpmd_api/scipp/__init__.py new file mode 100644 index 0000000000..66019bd413 --- /dev/null +++ b/src/binding/python/openpmd_api/scipp/__init__.py @@ -0,0 +1,13 @@ +"""openpmd_scipp: A Python package for loading openPMD datasets into scipp DataArrays. + +See README.md for documentation + +Author: + Pawel Ordyna + +License: +GPL - 3.0 license. See LICENSE file for details. +""" + +from .loader import DataLoader as DataLoader +from .utils import closest as closest diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py new file mode 100644 index 0000000000..acbf88410b --- /dev/null +++ b/src/binding/python/openpmd_api/scipp/loader.py @@ -0,0 +1,150 @@ +"""Module providing the DataLoader class. + +Provides the main openPMD to scpp interface class. + +Author: + Pawel Ordyna + +License: +GPL - 3.0 license. See LICENSE file for details. +""" + +import openpmd_api as pmd +import scipp as sc + +from .mesh_loader import get_field, get_field_data_relay +from .utils import closest + + +def get_time_axis(series): + """Get the time axis from an openPMD series. + + :param series: The openPMD series containing the data. + :type series: openpmd_api.Series + :return: A Scipp array representing the time axis. + :rtype: sc.DataArray + """ + t = [ + series.iterations[it].time * series.iterations[it].time_unit_SI for it in series.iterations + ] + return sc.array(dims=["t"], values=t, unit="s", dtype="double") + + +def get_iterations(series): + """Get the iterations from an openPMD series. + + :param series: The openPMD series containing the data. + :type series: openpmd_api.Series + :return: A Scipp dataset containing iteration IDs and their corresponding times. + :rtype: sc.Dataset + """ + t = get_time_axis(series) + return sc.Dataset( + data={ + "iteration_id": sc.DataArray( + data=sc.array(dims=["t"], values=list(series.iterations)), coords={"t": t} + ) + } + ) + + +class DataLoader: + """DataLoader class for loading and retrieving openPMD mesh data. + + This class initializes an openPMD series from a given file path and provides + methods to retrieve mesh data fields either by iteration index or by time. + The data can be retrieved as a DataRelay object or directly as a DataArray. + + Attributes: + series (openpmd_api.Series): The openPMD series initialized from the file path. + iterations (sc.Dataset): A dataset containing iteration IDs and their corresponding times. + + """ + + def __init__(self, path): + """Initialize the DataLoader with an openPMD series from the specified file path. + + :param path: The file path to the openPMD data file. + :type path: str + + Initializes the `series` attribute as an openPMD series in read-only mode + and the `iterations` attribute as a Scipp dataset containing iteration IDs + and their corresponding times. + """ + self.series = pmd.Series(str(path), pmd.Access.read_only) + self.iterations = get_iterations(self.series) + + def get_field( + self, + field, + component=pmd.Mesh_Record_Component.SCALAR, + time=None, + iteration=None, + relay=False, + time_tolerance=10 * sc.Unit("fs"), + ): + """Retrieve a mesh data field from the openPMD series. + + This method retrieves a specified field and component from the + openPMD series either by iteration index or by time. The data + can be returned as a DataRelay object or directly as a + DataArray. + + :param field: The name of the field to retrieve. + :type field: str + :param component: The component of the field to retrieve, + default is SCALAR. + :type component: openpmd_api.Mesh_Record_Component, optional + :param time: The time at which to retrieve the field. Either + time or iteration must be provided, but not both. + :type time: sc.Variable, optional + :param iteration: The iteration index at which to retrieve the + field. Either time or iteration must be provided, but not + both. + :type iteration: int, optional + :param relay: If True, return the data as a DataRelay object; + otherwise, return as a DataArray. + :type relay: bool, optional + :param time_tolerance: The tolerance for matching the time when + retrieving by time, default is 10 femtoseconds. + :type time_tolerance: sc.Unit, optional + :return: The requested field data as a DataRelay or DataArray. + :rtype: DataRelay or DataArray + :raises AssertionError: If neither time nor iteration is + provided, or if both are provided. + :raises IndexError: If no iteration is found within the + specified time tolerance. + """ + assert (time is None and iteration is not None) or ( + iteration is None and time is not None + ), "Provide either iteration index or time" + if iteration is None: + # handle integer inputs + time = time.astype("double") + time_tolerance = time_tolerance.astype("double") + time = time.to(unit=self.iterations["iteration_id"].coords["t"].unit) + try: + iteration = int(self.iterations["iteration_id"]["t", time].value) + except IndexError: + idx = closest(self.iterations["iteration_id"], "t", time) + iteration = self.iterations["iteration_id"]["t", idx] + assert time_tolerance is None or sc.abs( + iteration.coords["t"] - time + ) <= time_tolerance.to(unit=time.unit), ( + f"No iteration found within time_tolerance={time_tolerance}." + ) + print( + "Series does not contain iteration at the exact time. " + "Using closest iteration instead.", + flush=True, + ) + iteration = int(iteration.value) + + if relay: + return get_field_data_relay( + series=self.series, field=field, component=component, iteration=iteration + ) + else: + return get_field( + series=self.series, field=field, component=component, iteration=iteration + ) diff --git a/src/binding/python/openpmd_api/scipp/mesh_loader.py b/src/binding/python/openpmd_api/scipp/mesh_loader.py new file mode 100644 index 0000000000..24b3576dce --- /dev/null +++ b/src/binding/python/openpmd_api/scipp/mesh_loader.py @@ -0,0 +1,204 @@ +"""Module providing mesh like data loading capability. + +Author: + Pawel Ordyna + +License: +GPL - 3.0 license. See LICENSE file for details. +""" + +import numpy as np +import openpmd_api as pmd +import scipp as sc + +from .utils import _unit_dimension_to_scipp + + +class DataRelay(sc.DataArray): + """Data relay for loading openPMD meshes into scipp. + + Attributes + ---------- + series : openpmd_api.Series + The openPMD series object + record : openpmd_api.Record + The openPMD record object associated with the mesh + record_component : openpmd_api.Record_Component + The openPMD record component associated with the mesh + + Methods + ------- + _verify_init(): + Ensures that the data range to load is contiguous. + __getitem__(*args, **kwargs): + Retrieves a subset of the data, returning a new DataRelay instance. + load_data(): + Loads data from the record component based on data array coordinates, + adjusting for offsets and extents, and updates the DataArray values. + + """ + + def _verify_init(self): + """Verify that the data is contiguous. + + Check if the chosen subset is contiguous in the openPMD storage by checking coordinate + differences against the expected grid spacing. + + This is needed since openPMD does nto allow us to load strided chunks. + """ + for dim in self.dims: + coord = self.coords[dim] + diffs = coord[dim, 1:] - coord[dim, :-1] + diffs = diffs.to(unit="m") + idx = list(self.record.axis_labels).index(dim) + step = self.record.grid_spacing[idx] + step *= self.record.grid_unit_SI + step = step * sc.Unit("m") + assert sc.allclose(diffs, step), ( + f"The data has to be contiguous! diffs: {diffs}, step: {step}" + ) + + def __init__(self, series, record, record_component, dummy_array, coords): + """Initialize the DataRelay object. + + :param series: The openPMD series object associated with the data. + :type series: openpmd_api.Series + :param record: The openPMD record object associated with the mesh. + :type record: openpmd_api.Record + :param record_component: The openPMD record component associated with the mesh. + :type record_component: openpmd_api.Record_Component + :param dummy_array: A scipp array used for the dummy interface. It should use as little + memory as possible. Usually achieved by setting the stride of the values array to 0. Can + be read- only. + :type dummy_array: sc.array + :param coords: A dictionary of coordinates for the DataArray. + """ + super().__init__(data=dummy_array, coords=coords) + self.series = series + self.record = record + self.record_component = record_component + self._verify_init() + + def __getitem__(self, *args, **kwargs): + """Retrieve a subset of the data, returning a new DataRelay instance. + + Override this method from the base class to use the DataRelay initializer and ensure that + DataRelay is returned and the _verify_init method is used. + + :param args: Forwarded to the base class. + :type args: tuple + :param kwargs: Forwarded to the base class. + :type kwargs: dict + :return: A new DataRelay instance with the sliced data. + :rtype: DataRelay + """ + dummy_data_aray = super().__getitem__(*args, **kwargs) + return DataRelay( + series=self.series, + record=self.record, + record_component=self.record_component, + dummy_array=dummy_data_aray.data, + coords=dummy_data_aray.coords, + ) + + def load_data(self): + """Load data from the openPMD dataset. + + Loads a chunk based on the current data array coordinates. + + Calculates the offset and extent for each dimension using the data array coordinates. Loads + the data chunk from the record component, scales it by the unit SI, and returns a new data + array with loaded values. + + :return: The DataArray instance with the loaded data. + :rtype: DataRelay + """ + offset = [0] * self.record_component.ndim + extent = [0] * self.record_component.ndim + for dd, dim in enumerate(self.record.axis_labels): + try: + start = self.coords[dim].to(unit="m").value + except sc.DimensionError: + start = self.coords[dim][0].to(unit="m").value + start /= self.record.grid_unit_SI + start -= self.record.grid_global_offset[dd] + start /= self.record.grid_spacing[dd] + start -= self.record_component.position[dd] + offset[dd] = int(round(start)) + extent[dd] = self.coords[dim].size + data = self.record_component.load_chunk(offset=offset, extent=extent) + self.series.flush() + data *= self.record_component.unit_SI + data = np.squeeze(data) + data_array = self.copy() + data_array.values = data + return data_array + + +def get_field_data_relay(series, iteration, field, component=pmd.Mesh_Record_Component.SCALAR): + """Get openPMD mesh as a data relay. + + Create a DataRelay object for a specified field and component in an openPMD series. + + :param series: The openPMD series containing the data. + :type series: openpmd_api.Series + :param iteration: The iteration number to access within the series. + :type iteration: int + :param field: The name of the field to retrieve. + :type field: str + :param component: The component of the field to retrieve, default is SCALAR. + :type component: openpmd_api.Mesh_Record_Component, optional + :return: A DataRelay instance initialized with the specified field and component data. + :rtype: DataRelay + """ + record = series.iterations[iteration].meshes[field] + rc = record[component] + dims = record.axis_labels + time = (series.iterations[iteration].time + record.time_offset) * series.iterations[ + iteration + ].time_unit_SI + time = sc.scalar(time, unit="s", dtype="double") + coords = {"t": time} + for dd, dim in enumerate(dims): + length = rc.shape[dd] + start = record.grid_global_offset[dd] + step = record.grid_spacing[dd] + values = np.arange(length, dtype=np.float64) + values *= step + values += rc.position[dd] * step + values += start + values *= record.grid_unit_SI + coord = sc.array(dims=[dim], values=values, unit="m") + coords[dim] = coord + + small = np.zeros(1, dtype=rc.dtype) + dummy_array = np.lib.stride_tricks.as_strided( + small, shape=rc.shape, strides=[0] * rc.ndim, writeable=False + ) + dummy_array = sc.array( + dims=dims, values=dummy_array, unit=_unit_dimension_to_scipp(record.unit_dimension) + ) + + return DataRelay( + series=series, record=record, record_component=rc, dummy_array=dummy_array, coords=coords + ) + + +def get_field(series, iteration, field, component=pmd.Mesh_Record_Component.SCALAR): + """Retrieve and load openPMD mesh data without slicing. + + This function creates a DataRelay object for a specified field and component in an openPMD + series, loads the whole mesh, and returns the resulting DataArray. + + :param series: The openPMD series containing the data. + :type series: openpmd_api.Series + :param iteration: The iteration number to access within the series. + :type iteration: int + :param field: The name of the field to retrieve. + :type field: str + :param component: The component of the field to retrieve, default is SCALAR. + :type component: openpmd_api.Mesh_Record_Component, optional + :return: A DataArray instance with the loaded data. + :rtype: DataRelay + """ + return get_field_data_relay(series, iteration, field, component).load_data() diff --git a/src/binding/python/openpmd_api/scipp/utils.py b/src/binding/python/openpmd_api/scipp/utils.py new file mode 100644 index 0000000000..7f47645a59 --- /dev/null +++ b/src/binding/python/openpmd_api/scipp/utils.py @@ -0,0 +1,86 @@ +"""Utility module. + +Author: + Pawel Ordyna + +License: +GPL - 3.0 license. See LICENSE file for details. +""" + +import numpy as np +import scipp as sc + + +def _unit_dimension_to_scipp(unit_dimension): + """Convert a unit dimension from the openPMD standard to a Scipp unit. + + This function takes a tuple representing the powers of the seven base SI units + (length, mass, time, electric current, thermodynamic temperature, amount of substance, + and luminous intensity) and converts it into a Scipp unit. The conversion is based on + the openPMD standard, which describes units as powers of these base measures. + + :param tuple unit_dimension: A tuple containing seven integers, each representing + the power of a base SI unit in the order: (length, mass, time, electric current, + thermodynamic temperature, amount of substance, luminous intensity). + For example, (1, 0, -2, 0, 0, 0, 0) corresponds to meters per second squared (m/s²). + + :returns: A Scipp unit object representing the combined unit as specified by the input + unit dimensions. + :rtype: sc.Unit + + :example: + >>> _unit_dimension_to_scipp((1, 0, -2, 0, 0, 0, 0)) + Unit('m/s^2') + + :notes: + - The function assumes that the input tuple has exactly seven elements. + - Each element in the tuple corresponds to the power of a specific base unit. + """ + # unit dimension description from the openPMD standard: + # powers of the 7 base measures characterizing the record's unit in SI + # (length L, mass M, time T, electric current I, thermodynamic temperature theta, + # amount of substance N, luminous intensity J) + base_units = ( + 1.0 * sc.Unit("m"), + 1.0 * sc.Unit("kg"), + 1.0 * sc.Unit("s"), + 1.0 * sc.Unit("A"), + 1.0 * sc.Unit("K"), + 1.0 * sc.Unit("mol"), + 1.0 * sc.Unit("cd"), + ) + unit = 1.0 * sc.Unit("1") + for dim, base_unit in zip(unit_dimension, base_units, strict=False): + if dim != 0: + unit *= base_unit**dim + return unit.unit + + +def closest(data, dim, val): + """Find the index of the closest value in a dataset along a specified dimension. + + This function calculates the index of the element in the specified dimension + of the dataset that is closest to the given value. It ensures that the value + is converted to the same unit as the dimension's coordinate before performing + the comparison. + + :param data: The data array containing the dimension to search. + :type data: sc.DataArray + :param dim: The name of the dimension along which to find the closest value. + :type dim: str + :param val: The value to compare against, which will be converted to the unit of + the dimension's coordinate. + :type val: sc.Variable + + :return: The index of the closest value in the specified dimension. + :rtype: int + + :notes: + - The function assumes that `val` can be converted to the unit of the + dimension's coordinate. + - The dataset `data` must have coordinates defined for the specified dimension. + """ + val = val.astype("double") + coord = data.coords[dim] + val = val.to(unit=coord.unit) + return np.argmin(sc.abs(coord - val).values) From 7c1e4846d69f102039b4a3c7bdf02d4879da2dd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 31 Jan 2025 18:12:25 +0100 Subject: [PATCH 02/24] WIP: Try lazy loading scipp converter --- CMakeLists.txt | 3 +++ examples/15_scipp_loader.py | 9 +++++++++ src/binding/python/openpmd_api/ScippLazyInit.py | 8 ++++++++ src/binding/python/openpmd_api/__init__.py | 2 ++ src/binding/python/openpmd_api/scipp/loader.py | 6 +++--- src/binding/python/openpmd_api/scipp/mesh_loader.py | 7 +++---- 6 files changed, 28 insertions(+), 7 deletions(-) create mode 100644 examples/15_scipp_loader.py create mode 100644 src/binding/python/openpmd_api/ScippLazyInit.py diff --git a/CMakeLists.txt b/CMakeLists.txt index b3a0cc30c2..e3f931ffa8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -665,6 +665,8 @@ if(openPMD_HAVE_PYTHON) __init__.py DaskArray.py DaskDataFrame.py DataFrame.py ls/__init__.py ls/__main__.py pipe/__init__.py pipe/__main__.py + scipp/__init__.py scipp/loader.py scipp/mesh_loader.py scipp/utils.py + ScippLazyInit.py ) endif() @@ -720,6 +722,7 @@ set(openPMD_PYTHON_EXAMPLE_NAMES 11_particle_dataframe 12_span_write 13_write_dynamic_configuration + 15_scipp_loader ) if(openPMD_USE_INVASIVE_TESTS) diff --git a/examples/15_scipp_loader.py b/examples/15_scipp_loader.py new file mode 100644 index 0000000000..e42f2c2418 --- /dev/null +++ b/examples/15_scipp_loader.py @@ -0,0 +1,9 @@ +import openpmd_api as pmd + + +def main(): + series = pmd.Series("./out.bp5", pmd.Access.read_only) + scipp_loader = series.to_scipp() + +if __name__ == "__main__": + main() diff --git a/src/binding/python/openpmd_api/ScippLazyInit.py b/src/binding/python/openpmd_api/ScippLazyInit.py new file mode 100644 index 0000000000..e3acc45ccc --- /dev/null +++ b/src/binding/python/openpmd_api/ScippLazyInit.py @@ -0,0 +1,8 @@ +def series_to_scipp(series): + + import scipp + + from .scipp import DataLoader + + dl = DataLoader(series) + return dl diff --git a/src/binding/python/openpmd_api/__init__.py b/src/binding/python/openpmd_api/__init__.py index 09f21026f9..4fca249e68 100644 --- a/src/binding/python/openpmd_api/__init__.py +++ b/src/binding/python/openpmd_api/__init__.py @@ -3,6 +3,7 @@ from .DaskDataFrame import particles_to_daskdataframe from .DataFrame import (iterations_to_cudf, iterations_to_dataframe, particles_to_dataframe) +from .ScippLazyInit import series_to_scipp from .openpmd_api_cxx import * # noqa __version__ = cxx.__version__ @@ -16,6 +17,7 @@ Record_Component.to_dask_array = record_component_to_daskarray # noqa Series.to_df = iterations_to_dataframe # noqa Series.to_cudf = iterations_to_cudf # noqa +Series.to_scipp = series_to_scipp # TODO remove in future versions (deprecated) Access_Type = Access # noqa diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py index acbf88410b..45bbf5cff6 100644 --- a/src/binding/python/openpmd_api/scipp/loader.py +++ b/src/binding/python/openpmd_api/scipp/loader.py @@ -61,7 +61,7 @@ class DataLoader: """ - def __init__(self, path): + def __init__(self, series): """Initialize the DataLoader with an openPMD series from the specified file path. :param path: The file path to the openPMD data file. @@ -71,13 +71,13 @@ def __init__(self, path): and the `iterations` attribute as a Scipp dataset containing iteration IDs and their corresponding times. """ - self.series = pmd.Series(str(path), pmd.Access.read_only) + self.series = series self.iterations = get_iterations(self.series) def get_field( self, field, - component=pmd.Mesh_Record_Component.SCALAR, + component=None, time=None, iteration=None, relay=False, diff --git a/src/binding/python/openpmd_api/scipp/mesh_loader.py b/src/binding/python/openpmd_api/scipp/mesh_loader.py index 24b3576dce..e18178bd9b 100644 --- a/src/binding/python/openpmd_api/scipp/mesh_loader.py +++ b/src/binding/python/openpmd_api/scipp/mesh_loader.py @@ -8,7 +8,6 @@ """ import numpy as np -import openpmd_api as pmd import scipp as sc from .utils import _unit_dimension_to_scipp @@ -135,7 +134,7 @@ def load_data(self): return data_array -def get_field_data_relay(series, iteration, field, component=pmd.Mesh_Record_Component.SCALAR): +def get_field_data_relay(series, iteration, field, component=None): """Get openPMD mesh as a data relay. Create a DataRelay object for a specified field and component in an openPMD series. @@ -152,7 +151,7 @@ def get_field_data_relay(series, iteration, field, component=pmd.Mesh_Record_Com :rtype: DataRelay """ record = series.iterations[iteration].meshes[field] - rc = record[component] + rc = record[component] if component else record dims = record.axis_labels time = (series.iterations[iteration].time + record.time_offset) * series.iterations[ iteration @@ -184,7 +183,7 @@ def get_field_data_relay(series, iteration, field, component=pmd.Mesh_Record_Com ) -def get_field(series, iteration, field, component=pmd.Mesh_Record_Component.SCALAR): +def get_field(series, iteration, field, component=None): """Retrieve and load openPMD mesh data without slicing. This function creates a DataRelay object for a specified field and component in an openPMD From 50a9561aca42e10a8a1e6415d6943ca296da7bd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Ordyna?= Date: Tue, 4 Feb 2025 14:18:32 +0100 Subject: [PATCH 03/24] Add preliminary example --- examples/15_scipp_loader.py | 41 ++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/examples/15_scipp_loader.py b/examples/15_scipp_loader.py index e42f2c2418..4158b54500 100644 --- a/examples/15_scipp_loader.py +++ b/examples/15_scipp_loader.py @@ -1,9 +1,48 @@ import openpmd_api as pmd +import openpmd_api.scipp as pmdsc +import scipp as sc def main(): - series = pmd.Series("./out.bp5", pmd.Access.read_only) + series = pmd.Series("../samples/git-sample/data%T.h5", pmd.Access.read_only) + + time = 65 * sc.Unit("fs") scipp_loader = series.to_scipp() + print(scipp_loader.iterations) + Ex = scipp_loader.get_field("E", "x", time=time) + print(Ex) + slicing_idx = pmdsc.closest(Ex, "x", 2 * sc.Unit("um")) + Ex_slice = Ex["x", slicing_idx] + print(Ex_slice) + Ex_slice.plot().save("slice.png") + Ex_line = Ex_slice["z", pmdsc.closest(Ex_slice, "z", 1.4e-5 * sc.Unit("m"))] + print(Ex_line) + Ex_line.plot().save("line.png") + (Ex_line * Ex_line).plot().save("line_squared.png") + + # The full 3D array is not loaded into memory at this point. + Ex = scipp_loader.get_field("E", "x", time=time, relay=True) + # This time we will select a range rather than a slice. + # For a range there is no need for an exact match. + # But, we could also select a slice just like in the previous example. + Ex = Ex["x", -2e-6 * sc.Unit("m") : 2e-6 * sc.Unit("m")] + # Only now the smaller subset wil be loaded into memory + Ex = Ex.load_data() + print(Ex) + + Ex = sc.concat( + [ + scipp_loader.get_field("E", "x", iteration=iteration.value, time_tolerance=None) + for iteration in scipp_loader.iterations["iteration_id"] + ], + dim="t", + ) + print(Ex) + + # Let us just slice at some points to get a 2D dataset + Ex = Ex["x", 10]["y", 10] + print(Ex) + Ex.plot().save("moving_window.png") if __name__ == "__main__": main() From 2c36906c182c356c7568e74146a22b1969c335c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 4 Feb 2025 13:34:12 +0100 Subject: [PATCH 04/24] Move lazy_init under scipp folder again --- CMakeLists.txt | 2 +- src/binding/python/openpmd_api/__init__.py | 2 +- .../openpmd_api/{ScippLazyInit.py => scipp/lazy_init.py} | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) rename src/binding/python/openpmd_api/{ScippLazyInit.py => scipp/lazy_init.py} (72%) diff --git a/CMakeLists.txt b/CMakeLists.txt index e3f931ffa8..9129cf5b2c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -666,7 +666,7 @@ if(openPMD_HAVE_PYTHON) ls/__init__.py ls/__main__.py pipe/__init__.py pipe/__main__.py scipp/__init__.py scipp/loader.py scipp/mesh_loader.py scipp/utils.py - ScippLazyInit.py + scipp/lazy_init.py ) endif() diff --git a/src/binding/python/openpmd_api/__init__.py b/src/binding/python/openpmd_api/__init__.py index 4fca249e68..38b8827960 100644 --- a/src/binding/python/openpmd_api/__init__.py +++ b/src/binding/python/openpmd_api/__init__.py @@ -3,8 +3,8 @@ from .DaskDataFrame import particles_to_daskdataframe from .DataFrame import (iterations_to_cudf, iterations_to_dataframe, particles_to_dataframe) -from .ScippLazyInit import series_to_scipp from .openpmd_api_cxx import * # noqa +from .scipp.lazy_init import series_to_scipp __version__ = cxx.__version__ __doc__ = cxx.__doc__ diff --git a/src/binding/python/openpmd_api/ScippLazyInit.py b/src/binding/python/openpmd_api/scipp/lazy_init.py similarity index 72% rename from src/binding/python/openpmd_api/ScippLazyInit.py rename to src/binding/python/openpmd_api/scipp/lazy_init.py index e3acc45ccc..ebca4e8125 100644 --- a/src/binding/python/openpmd_api/ScippLazyInit.py +++ b/src/binding/python/openpmd_api/scipp/lazy_init.py @@ -1,8 +1,7 @@ def series_to_scipp(series): - import scipp - from .scipp import DataLoader + from . import DataLoader dl = DataLoader(series) return dl From c2b8412dcfc29b46cc2c412c3afdd7fb54403ee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 4 Feb 2025 13:46:06 +0100 Subject: [PATCH 05/24] Revert "Move lazy_init under scipp folder again" This reverts commit 7d4aede2d62a6bd38a6fcfa8147d59f5a06be577. --- CMakeLists.txt | 2 +- .../openpmd_api/{scipp/lazy_init.py => ScippLazyInit.py} | 3 ++- src/binding/python/openpmd_api/__init__.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) rename src/binding/python/openpmd_api/{scipp/lazy_init.py => ScippLazyInit.py} (72%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9129cf5b2c..e3f931ffa8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -666,7 +666,7 @@ if(openPMD_HAVE_PYTHON) ls/__init__.py ls/__main__.py pipe/__init__.py pipe/__main__.py scipp/__init__.py scipp/loader.py scipp/mesh_loader.py scipp/utils.py - scipp/lazy_init.py + ScippLazyInit.py ) endif() diff --git a/src/binding/python/openpmd_api/scipp/lazy_init.py b/src/binding/python/openpmd_api/ScippLazyInit.py similarity index 72% rename from src/binding/python/openpmd_api/scipp/lazy_init.py rename to src/binding/python/openpmd_api/ScippLazyInit.py index ebca4e8125..e3acc45ccc 100644 --- a/src/binding/python/openpmd_api/scipp/lazy_init.py +++ b/src/binding/python/openpmd_api/ScippLazyInit.py @@ -1,7 +1,8 @@ def series_to_scipp(series): + import scipp - from . import DataLoader + from .scipp import DataLoader dl = DataLoader(series) return dl diff --git a/src/binding/python/openpmd_api/__init__.py b/src/binding/python/openpmd_api/__init__.py index 38b8827960..4fca249e68 100644 --- a/src/binding/python/openpmd_api/__init__.py +++ b/src/binding/python/openpmd_api/__init__.py @@ -3,8 +3,8 @@ from .DaskDataFrame import particles_to_daskdataframe from .DataFrame import (iterations_to_cudf, iterations_to_dataframe, particles_to_dataframe) +from .ScippLazyInit import series_to_scipp from .openpmd_api_cxx import * # noqa -from .scipp.lazy_init import series_to_scipp __version__ = cxx.__version__ __doc__ = cxx.__doc__ From 7e93f34732bfdcc218facf5c8c6e76e2b6d98835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 4 Feb 2025 14:05:05 +0100 Subject: [PATCH 06/24] Some import checks --- examples/15_scipp_loader.py | 12 +++++++++--- src/binding/python/openpmd_api/ScippLazyInit.py | 11 ++++++++++- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/examples/15_scipp_loader.py b/examples/15_scipp_loader.py index 4158b54500..874a0b7b08 100644 --- a/examples/15_scipp_loader.py +++ b/examples/15_scipp_loader.py @@ -1,13 +1,19 @@ import openpmd_api as pmd -import openpmd_api.scipp as pmdsc -import scipp as sc def main(): series = pmd.Series("../samples/git-sample/data%T.h5", pmd.Access.read_only) + try: + scipp_loader = series.to_scipp() + import plopp + except ImportError: + print("Need to install scipp and plopp to run this example.") + return + import openpmd_api.scipp as pmdsc + import scipp as sc time = 65 * sc.Unit("fs") - scipp_loader = series.to_scipp() + print(scipp_loader.iterations) Ex = scipp_loader.get_field("E", "x", time=time) print(Ex) diff --git a/src/binding/python/openpmd_api/ScippLazyInit.py b/src/binding/python/openpmd_api/ScippLazyInit.py index e3acc45ccc..e2c3832ddf 100644 --- a/src/binding/python/openpmd_api/ScippLazyInit.py +++ b/src/binding/python/openpmd_api/ScippLazyInit.py @@ -1,6 +1,15 @@ def series_to_scipp(series): - import scipp + # lazy import + try: + import scipp + found_scipp = True + except ImportError as original_error: + found_scipp = False + original_error_string = f"{original_error}" + + if not found_scipp: + raise ImportError(f"Scipp NOT found. Install scipp for Scipp support. Original error: {original_error_string}") from .scipp import DataLoader From 13d6318bfb5d042a41008a4cdc43229731055231 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Ordyna?= Date: Wed, 5 Feb 2025 12:14:33 +0100 Subject: [PATCH 07/24] Add Scipp documentation from README.md converted with Pandoc --- docs/source/analysis/README_15_0.svg | 856 ++++++++++++++++++++++++++ docs/source/analysis/README_17_0.svg | 771 ++++++++++++++++++++++++ docs/source/analysis/README_19_0.svg | 786 ++++++++++++++++++++++++ docs/source/analysis/README_26_0.svg | 863 +++++++++++++++++++++++++++ docs/source/analysis/scipp.rst | 370 ++++++++++++ docs/source/index.rst | 1 + 6 files changed, 3647 insertions(+) create mode 100644 docs/source/analysis/README_15_0.svg create mode 100644 docs/source/analysis/README_17_0.svg create mode 100644 docs/source/analysis/README_19_0.svg create mode 100644 docs/source/analysis/README_26_0.svg create mode 100644 docs/source/analysis/scipp.rst diff --git a/docs/source/analysis/README_15_0.svg b/docs/source/analysis/README_15_0.svg new file mode 100644 index 0000000000..45bb3b0208 --- /dev/null +++ b/docs/source/analysis/README_15_0.svg @@ -0,0 +1,856 @@ + + + + + + + + 2025-01-27T16:07:28.783245 + image/svg+xml + + + Matplotlib v3.10.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/analysis/README_17_0.svg b/docs/source/analysis/README_17_0.svg new file mode 100644 index 0000000000..32bc944ba3 --- /dev/null +++ b/docs/source/analysis/README_17_0.svg @@ -0,0 +1,771 @@ + + + + + + + + 2025-01-27T16:07:28.840008 + image/svg+xml + + + Matplotlib v3.10.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/analysis/README_19_0.svg b/docs/source/analysis/README_19_0.svg new file mode 100644 index 0000000000..212d7b21bc --- /dev/null +++ b/docs/source/analysis/README_19_0.svg @@ -0,0 +1,786 @@ + + + + + + + + 2025-01-27T16:07:28.877880 + image/svg+xml + + + Matplotlib v3.10.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/analysis/README_26_0.svg b/docs/source/analysis/README_26_0.svg new file mode 100644 index 0000000000..48eae95e3c --- /dev/null +++ b/docs/source/analysis/README_26_0.svg @@ -0,0 +1,863 @@ + + + + + + + + 2025-01-27T16:07:28.963101 + image/svg+xml + + + Matplotlib v3.10.0, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/analysis/scipp.rst b/docs/source/analysis/scipp.rst new file mode 100644 index 0000000000..ac0033fe31 --- /dev/null +++ b/docs/source/analysis/scipp.rst @@ -0,0 +1,370 @@ +.. _analysis-scipp: + +Scipp +===== + +Load openpmd datasets to ``scipp`` ``DataArrays``. + +What is this good for? +~~~~~~~~~~~~~~~~~~~~~~ + +`scipp `__ is an alternative to +`xarray `__ and provides basically +numpy arrays with axes description and units. + +* Automatically load axes and units with openPMD data. +* Axes information is automatically updated when slicing, indexing, or filtering your data. +* With ``scipp``\ ’s plotting library `plopp `__ it becomes an alternative to ``openpmd-viewer``. +* Many numpy and some scipy functions including all the basic algebraic operations on arrays are supported by ``scipp``. When using these, the units and coordinates are automatically taken care of. + +Limitations +~~~~~~~~~~~ + +- ``scipp`` currently handles units with a library, that does not + support non-integer exponents for units. This can become problematic + in some calculations. + +Installation +------------ + +It can be easily installed with pip. + +.. code:: bash + + git clone https://github.com/pordyna/openpmd_scipp.git + cd openpmd-scipp + pip install . + +Getting started +--------------- + +Get example data sets from the ``openPMD-example-datasets`` repository. + +.. code:: bash + + git clone https://github.com/openPMD/openPMD-example-datasets.git + cd openPMD-example-datasets + tar -zxvf example-2d.tar.gz + tar -zxvf example-3d.tar.gz + +Opening series +~~~~~~~~~~~~~~ + +.. code:: python + + import scipp as sc + + import openpmd_scipp as pmdsc + +.. code:: python + + path = "openPMD-example-datasets/example-3d/hdf5/data%T.h5" + +.. code:: python + + path = ".data/" + path + +.. code:: python + + data_loader = pmdsc.DataLoader(path) + print(data_loader.iterations) + +:: + + + Dimensions: Sizes[t:5, ] + Coordinates: + * t float64 [s] (t) [3.28471e-14, 6.56942e-14, ..., 1.31388e-13, 1.64236e-13] + Data: + iteration_id int64 [dimensionless] (t) [100, 200, ..., 400, 500] + +Working with meshes (fields) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Let us plot electric field’s x component at 65 fs. + +.. code:: python + + Ex = data_loader.get_field("E", "x", time=65 * sc.Unit("fs")) + print(Ex) + +:: + + Series does not contain iteration at the exact time. Using closest iteration instead. + + + + Dimensions: Sizes[x:26, y:26, z:201, ] + Coordinates: + * t float64 [s] () 6.56942e-14 + * x float64 [m] (x) [-9.6e-06, -8.8e-06, ..., 9.6e-06, 1.04e-05] + * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05] + * z float64 [m] (z) [4.7e-06, 4.8e-06, ..., 2.46e-05, 2.47e-05] + Data: + float64 [V/m] (x, y, z) [-1.08652e+08, -1.9758e+08, ..., 0, 0] + +You may have noticed, that the time requested does not have to match +exactly any iteration. By default, if there is an iteration within 10 fs +distance it will be used instead. This 10 fs tolerance can be adjusted +by setting ``time_tolerance``. The check can be also disabled by setting +``time_tolerance=None``, with that the method will return the closest +iteration regardless of the difference. So that this will also work: + +.. code:: python + + print(data_loader.get_field("E", "x", time=20 * sc.Unit("fs"), time_tolerance=20 * sc.Unit("fs"))) + +:: + + Series does not contain iteration at the exact time. Using closest iteration instead. + + + + Dimensions: Sizes[x:26, y:26, z:201, ] + Coordinates: + * t float64 [s] () 3.28471e-14 + * x float64 [m] (x) [-9.6e-06, -8.8e-06, ..., 9.6e-06, 1.04e-05] + * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05] + * z float64 [m] (z) [-5.2e-06, -5.1e-06, ..., 1.47e-05, 1.48e-05] + Data: + float64 [V/m] (x, y, z) [-1.08549e+07, -1.3967e+07, ..., 0, 0] + +, but ``data_loader.get_field('E', 'x', time=20 * sc.Unit('fs'))`` not. + +.. code:: python + + # It is also possible to use iteration number instead: + print(data_loader.get_field("E", "x", iteration=200)) + +:: + + + Dimensions: Sizes[x:26, y:26, z:201, ] + Coordinates: + * t float64 [s] () 6.56942e-14 + * x float64 [m] (x) [-9.6e-06, -8.8e-06, ..., 9.6e-06, 1.04e-05] + * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05] + * z float64 [m] (z) [4.7e-06, 4.8e-06, ..., 2.46e-05, 2.47e-05] + Data: + float64 [V/m] (x, y, z) [-1.08652e+08, -1.9758e+08, ..., 0, 0] + +.. code:: python + + # For scalar fields just omit the second argument: + print(data_loader.get_field("rho", iteration=200)) + +:: + + + Dimensions: Sizes[x:26, y:26, z:201, ] + Coordinates: + * t float64 [s] () 6.56942e-14 + * x float64 [m] (x) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05] + * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05] + * z float64 [m] (z) [4.7e-06, 4.8e-06, ..., 2.46e-05, 2.47e-05] + Data: + float64 [mC/L] (x, y, z) [-7169.01, -7526.4, ..., 3.16049e-11, 1.22782e-11] + +Plotting +^^^^^^^^ + +W can’t directly plot 3D data. But we can for example select a slice. +For that we can use a helper function ``pmdsc.closest`` to get the +closets index, since ``scipp`` requires exact match. You can read more +about indexing ``scipp`` arrays in ``scipp``\ ’s documentation. + +.. code:: python + + slicing_idx = pmdsc.closest(Ex, "x", 2 * sc.Unit("um")) + Ex_slice = Ex["x", slicing_idx] + print(Ex_slice) + +:: + + + Dimensions: Sizes[y:26, z:201, ] + Coordinates: + * t float64 [s] () 6.56942e-14 + x float64 [m] () 2.4e-06 + * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05] + * z float64 [m] (z) [4.7e-06, 4.8e-06, ..., 2.46e-05, 2.47e-05] + Data: + float64 [V/m] (y, z) [4.86614e+08, 6.67018e+08, ..., 0, 0] + +.. code:: python + + Ex_slice.plot() + +.. figure:: README_15_0.svg + +.. code:: python + + # We can also plot line plots: + Ex_line = Ex_slice["z", pmdsc.closest(Ex_slice, "z", 1.4e-5 * sc.Unit("m"))] + print(Ex_line) + +:: + + + Dimensions: Sizes[y:26, ] + Coordinates: + * t float64 [s] () 6.56942e-14 + x float64 [m] () 2.4e-06 + * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05] + z float64 [m] () 1.4e-05 + Data: + float64 [V/m] (y) [3.46069e+07, -2.94134e+07, ..., 8.89353e+06, -4.32182e+07] + +.. code:: python + + Ex_line.plot() + +.. figure:: README_17_0.svg + +Alternatively it is possible to work interactively with ``plopp``\ ’s +tools for visualizing multidimensional data, such as ``pp.slicer``\ or +``pp.inspector``. + +Doing math +^^^^^^^^^^ + +Just as an example we can easily plot the square of the field: + +.. code:: python + + (Ex_line * Ex_line).plot() + +.. figure:: README_19_0.svg + +Loading chunks +~~~~~~~~~~~~~~ + +In the above example the whole 3D field is loaded into memory and sliced +afterward. It is also possible to just load a sub-chunk into memory. +When the ``relay`` option in ``get_field`` is set to ``True`` it will +return a dummy object that only allocates memory for a single value. +This relay object can be indexed, sliced etc. using the ``scipp`` +indexing just like before. (The only limitation given by the +``openpmd-api`` is that the result has to a be contiguous chunk of the +original array). The ``load_data`` method loads data and returns a +proper ``scipp`` data array. + +.. code:: python + + # The full 3D array is not loaded into memory at this point. + Ex = data_loader.get_field("E", "x", time=65 * sc.Unit("fs"), relay=True) + # This time we will select a range rather than a slice. + # For a range there is no need for an exact match. + # But, we could also select a slice just like in the previous example. + Ex = Ex["x", -2e-6 * sc.Unit("m") : 2e-6 * sc.Unit("m")] + # Only now the smaller subset wil be loaded into memory + Ex = Ex.load_data() + print(Ex) + +:: + + Series does not contain iteration at the exact time. Using closest iteration instead. + + + + Dimensions: Sizes[x:5, y:26, z:201, ] + Coordinates: + * t float64 [s] () 6.56942e-14 + * x float64 [m] (x) [-1.6e-06, -8e-07, ..., 8e-07, 1.6e-06] + * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05] + * z float64 [m] (z) [4.7e-06, 4.8e-06, ..., 2.46e-05, 2.47e-05] + Data: + float64 [V/m] (x, y, z) [-3.65733e+08, -5.01237e+08, ..., 0, 0] + +Time axis +~~~~~~~~~ + +It is also possible to combine arrays from different iterations into one +using ``scipp``\ ’s ``concat`` function. Here is an example for creating +a 4D array from all iterations: + +.. code:: python + + Ex = sc.concat( + [ + data_loader.get_field("E", "x", iteration=iteration.value, time_tolerance=None) + for iteration in data_loader.iterations["iteration_id"] + ], + dim="t", + ) + print(Ex) + +:: + + + Dimensions: Sizes[t:5, x:26, y:26, z:201, ] + Coordinates: + * t float64 [s] (t) [3.28471e-14, 6.56942e-14, ..., 1.31388e-13, 1.64236e-13] + * x float64 [m] (x) [-9.6e-06, -8.8e-06, ..., 9.6e-06, 1.04e-05] + * y float64 [m] (y) [-1e-05, -9.2e-06, ..., 9.2e-06, 1e-05] + * z float64 [m] (t, z) [-5.2e-06, -5.1e-06, ..., 5.41e-05, 5.42e-05] + Data: + float64 [V/m] (t, x, y, z) [-1.08549e+07, -1.3967e+07, ..., 0, 0] + +The reason for the z coordinate having two dimensions (t,z) is the fact +that the data comes from a moving window simulation. This is clearly +visible in the plot below. + +.. code:: python + + # Let us just slice at some points to get a 2D dataset + Ex = Ex["x", 10]["y", 10] + print(Ex) + +:: + + + Dimensions: Sizes[t:5, z:201, ] + Coordinates: + * t float64 [s] (t) [3.28471e-14, 6.56942e-14, ..., 1.31388e-13, 1.64236e-13] + x float64 [m] () -1.6e-06 + y float64 [m] () -2e-06 + * z float64 [m] (t, z) [-5.2e-06, -5.1e-06, ..., 5.41e-05, 5.42e-05] + Data: + float64 [V/m] (t, z) [-8.41738e+08, -7.8752e+08, ..., 0, 0] + +.. code:: python + + Ex.plot() + +.. figure:: README_26_0.svg + +Working with particle data +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Coming soon! + +Developer documentation +----------------------- + +Generating this README +~~~~~~~~~~~~~~~~~~~~~~ + +README file is generated from the README.ipynb. + +:: + + # Download and extract example datasets if not present + # Will download data into ``.data`` + make data + + make docs + +Running tests +~~~~~~~~~~~~~ + +At the moment we only test we have is an integration test running this +notebook. After downloading example datasets with ``make data``, if +needed, run: + +:: + + make test + +You can also run tests with different python version with tox, but you +need to have the python version installed, for example with pyenv. diff --git a/docs/source/index.rst b/docs/source/index.rst index db2bbc3002..a3e08885d7 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -142,6 +142,7 @@ Data Analysis analysis/pandas analysis/dask analysis/rapids + analysis/scipp analysis/contrib Development From a647bfd1e089a5497b11ad2ffbd82bdfd6160049 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Ordyna?= Date: Wed, 5 Feb 2025 16:43:19 +0100 Subject: [PATCH 08/24] Some formatting fixes --- docs/source/analysis/scipp.rst | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/source/analysis/scipp.rst b/docs/source/analysis/scipp.rst index ac0033fe31..009d892362 100644 --- a/docs/source/analysis/scipp.rst +++ b/docs/source/analysis/scipp.rst @@ -14,18 +14,18 @@ numpy arrays with axes description and units. * Automatically load axes and units with openPMD data. * Axes information is automatically updated when slicing, indexing, or filtering your data. -* With ``scipp``\ ’s plotting library `plopp `__ it becomes an alternative to ``openpmd-viewer``. +* With ``scipp``'s plotting library `plopp `__ it becomes an alternative to ``openpmd-viewer``. * Many numpy and some scipy functions including all the basic algebraic operations on arrays are supported by ``scipp``. When using these, the units and coordinates are automatically taken care of. Limitations -~~~~~~~~~~~ +----------- - ``scipp`` currently handles units with a library, that does not support non-integer exponents for units. This can become problematic in some calculations. Installation ------------- +~~~~~~~~~~~~ It can be easily installed with pip. @@ -36,7 +36,7 @@ It can be easily installed with pip. pip install . Getting started ---------------- +~~~~~~~~~~~~~~~ Get example data sets from the ``openPMD-example-datasets`` repository. @@ -79,9 +79,9 @@ Opening series iteration_id int64 [dimensionless] (t) [100, 200, ..., 400, 500] Working with meshes (fields) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +---------------------------- -Let us plot electric field’s x component at 65 fs. +Let us plot electric field's x component at 65 fs. .. code:: python @@ -166,12 +166,12 @@ iteration regardless of the difference. So that this will also work: float64 [mC/L] (x, y, z) [-7169.01, -7526.4, ..., 3.16049e-11, 1.22782e-11] Plotting -^^^^^^^^ +-------- -W can’t directly plot 3D data. But we can for example select a slice. +We can't directly plot 3D data. But we can for example select a slice. For that we can use a helper function ``pmdsc.closest`` to get the closets index, since ``scipp`` requires exact match. You can read more -about indexing ``scipp`` arrays in ``scipp``\ ’s documentation. +about indexing ``scipp`` arrays in ``scipp``'s documentation. .. code:: python @@ -221,12 +221,12 @@ about indexing ``scipp`` arrays in ``scipp``\ ’s documentation. .. figure:: README_17_0.svg -Alternatively it is possible to work interactively with ``plopp``\ ’s +Alternatively it is possible to work interactively with ``plopp``'s tools for visualizing multidimensional data, such as ``pp.slicer``\ or ``pp.inspector``. Doing math -^^^^^^^^^^ +---------- Just as an example we can easily plot the square of the field: @@ -237,7 +237,7 @@ Just as an example we can easily plot the square of the field: .. figure:: README_19_0.svg Loading chunks -~~~~~~~~~~~~~~ +-------------- In the above example the whole 3D field is loaded into memory and sliced afterward. It is also possible to just load a sub-chunk into memory. @@ -277,10 +277,10 @@ proper ``scipp`` data array. float64 [V/m] (x, y, z) [-3.65733e+08, -5.01237e+08, ..., 0, 0] Time axis -~~~~~~~~~ +--------- It is also possible to combine arrays from different iterations into one -using ``scipp``\ ’s ``concat`` function. Here is an example for creating +using ``scipp``'s ``concat`` function. Here is an example for creating a 4D array from all iterations: .. code:: python From 464572c8e77de7b9f84ffd6786a35894b12386b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 5 Feb 2025 16:43:37 +0100 Subject: [PATCH 09/24] Adapt documentation to integration into openPMD-api --- docs/source/analysis/scipp.rst | 55 +++++++++------------------------- 1 file changed, 14 insertions(+), 41 deletions(-) diff --git a/docs/source/analysis/scipp.rst b/docs/source/analysis/scipp.rst index 009d892362..6436092c99 100644 --- a/docs/source/analysis/scipp.rst +++ b/docs/source/analysis/scipp.rst @@ -27,13 +27,13 @@ Limitations Installation ~~~~~~~~~~~~ -It can be easily installed with pip. +The adaptor from openPMD to Scipp is part of the regular openPMD-api Python distribution. +It is loaded lazily and available as soon as ``scipp`` is also installed. +Plotting functionality requires the additional installation of ``plopp``. .. code:: bash - git clone https://github.com/pordyna/openpmd_scipp.git - cd openpmd-scipp - pip install . + pip install openpmd_api scipp plopp Getting started ~~~~~~~~~~~~~~~ @@ -47,26 +47,29 @@ Get example data sets from the ``openPMD-example-datasets`` repository. tar -zxvf example-2d.tar.gz tar -zxvf example-3d.tar.gz +.. note:: + + You can find scripts to download and unpack this sample data under ``share/openPMD``. + Opening series -~~~~~~~~~~~~~~ +-------------- .. code:: python import scipp as sc - import openpmd_scipp as pmdsc + import openpmd_api as pmd + import openpmd_api.scipp as pmdsc .. code:: python path = "openPMD-example-datasets/example-3d/hdf5/data%T.h5" + path = "./data/" + path .. code:: python - path = ".data/" + path - -.. code:: python - - data_loader = pmdsc.DataLoader(path) + series = pmd.Series(path, pmd.Access.read_random_access) + data_loader = sereies.to_scipp() print(data_loader.iterations) :: @@ -338,33 +341,3 @@ Working with particle data ~~~~~~~~~~~~~~~~~~~~~~~~~~ Coming soon! - -Developer documentation ------------------------ - -Generating this README -~~~~~~~~~~~~~~~~~~~~~~ - -README file is generated from the README.ipynb. - -:: - - # Download and extract example datasets if not present - # Will download data into ``.data`` - make data - - make docs - -Running tests -~~~~~~~~~~~~~ - -At the moment we only test we have is an integration test running this -notebook. After downloading example datasets with ``make data``, if -needed, run: - -:: - - make test - -You can also run tests with different python version with tox, but you -need to have the python version installed, for example with pyenv. From 12625838a478ee7757cbb3058ff49a2ad6c1909a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Ordyna?= Date: Wed, 5 Feb 2025 17:33:34 +0100 Subject: [PATCH 10/24] Add Jupyter Notebook from openpmd_scipp repo --- examples/15_scipp_loader.ipynb | 381 +++++++++++++++++++++++++++++++++ 1 file changed, 381 insertions(+) create mode 100644 examples/15_scipp_loader.ipynb diff --git a/examples/15_scipp_loader.ipynb b/examples/15_scipp_loader.ipynb new file mode 100644 index 0000000000..269aca4835 --- /dev/null +++ b/examples/15_scipp_loader.ipynb @@ -0,0 +1,381 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2e89b77182b6ebd7", + "metadata": {}, + "source": [ + "# openpmd-scipp\n", + "Load openpmd datasets to `scipp` `DataArrays`.\n", + "\n", + "## Description\n", + "### What is this good for?\n", + "[`scipp`](https://github.com/scipp/scipp) is an alternative to [`xarray`](https://github.com/pydata/xarray) and provides basically numpy arrays with axes description and units.\n", + "* Automatically load axes and units with openPMD data.\n", + "* Axes information is automatically updated when slicing, indexing, or filtering your data.\n", + "* With `scipp`'s plotting library [`plopp`](https://github.com/scipp/plopp) it becomes an alternative to `openpmd-viewer`.\n", + "* Many numpy and some scipy functions including all the basic algebraic operations on arrays are supported by `scipp`. When using these, the units and coordinates are automatically taken care of. \n", + "\n", + "### Limitations\n", + "* `scipp` currently handles units with a library, that does not support non-integer exponents for units. This can become problematic in some calculations. " + ] + }, + { + "cell_type": "markdown", + "id": "fc4a0023fa8afef2", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Installation\n", + "It can be easily installed with pip.\n", + "```bash\n", + "git clone https://github.com/pordyna/openpmd_scipp.git\n", + "cd openpmd-scipp\n", + "pip install .\n", + "```\n", + "\n", + "## Getting started\n", + "Get example data sets from the `openPMD-example-datasets` repository.\n", + "```bash\n", + "git clone https://github.com/openPMD/openPMD-example-datasets.git\n", + "cd openPMD-example-datasets\n", + "tar -zxvf example-2d.tar.gz\n", + "tar -zxvf example-3d.tar.gz\n", + "```\n", + " \n", + "\n", + "### Opening series" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "initial_id", + "metadata": {}, + "outputs": [], + "source": [ + "import scipp as sc\n", + "\n", + "import openpmd_scipp as pmdsc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67bba264bde780ef", + "metadata": {}, + "outputs": [], + "source": [ + "path = \"openPMD-example-datasets/example-3d/hdf5/data%T.h5\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc1b21e51bd6e524", + "metadata": {}, + "outputs": [], + "source": [ + "path = \".data/\" + path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97abaf499694da8c", + "metadata": {}, + "outputs": [], + "source": [ + "data_loader = pmdsc.DataLoader(path)\n", + "print(data_loader.iterations)" + ] + }, + { + "cell_type": "markdown", + "id": "966866d44f32e381", + "metadata": {}, + "source": [ + "### Working with meshes (fields)\n", + "Let us plot electric field's x component at 65 fs. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48d147dd17b96d", + "metadata": {}, + "outputs": [], + "source": [ + "Ex = data_loader.get_field(\"E\", \"x\", time=65 * sc.Unit(\"fs\"))\n", + "print(Ex)" + ] + }, + { + "cell_type": "markdown", + "id": "69d531a49c1021d4", + "metadata": {}, + "source": [ + "You may have noticed, that the time requested does not have to match exactly any iteration. By default, if there is an iteration within 10 fs distance it will be used instead. This 10 fs tolerance can be adjusted by setting `time_tolerance`. The check can be also disabled by setting `time_tolerance=None`, with that the method will return the closest iteration regardless of the difference. So that this will also work:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7811da56397eeab6", + "metadata": {}, + "outputs": [], + "source": [ + "print(data_loader.get_field(\"E\", \"x\", time=20 * sc.Unit(\"fs\"), time_tolerance=20 * sc.Unit(\"fs\")))" + ] + }, + { + "cell_type": "markdown", + "id": "f05c2e19a7ea2350", + "metadata": {}, + "source": [ + ", but `data_loader.get_field('E', 'x', time=20 * sc.Unit('fs'))` not." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52e59ab2524367f4", + "metadata": {}, + "outputs": [], + "source": [ + "# It is also possible to use iteration number instead:\n", + "print(data_loader.get_field(\"E\", \"x\", iteration=200))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f4b4ac390d8a45f", + "metadata": {}, + "outputs": [], + "source": [ + "# For scalar fields just omit the second argument:\n", + "print(data_loader.get_field(\"rho\", iteration=200))" + ] + }, + { + "cell_type": "markdown", + "id": "524aaf62b6967893", + "metadata": {}, + "source": [ + "#### Plotting\n", + "W can't directly plot 3D data.\n", + "But we can for example select a slice. For that we can use a helper function `pmdsc.closest` to get the closets index, since `scipp` requires exact match. You can read more about indexing `scipp` arrays in `scipp`'s documentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c35a81329a3cc83", + "metadata": {}, + "outputs": [], + "source": [ + "slicing_idx = pmdsc.closest(Ex, \"x\", 2 * sc.Unit(\"um\"))\n", + "Ex_slice = Ex[\"x\", slicing_idx]\n", + "print(Ex_slice)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60536b5c1b47ff20", + "metadata": {}, + "outputs": [], + "source": [ + "Ex_slice.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66cea62fcce29f24", + "metadata": {}, + "outputs": [], + "source": [ + "# We can also plot line plots:\n", + "Ex_line = Ex_slice[\"z\", pmdsc.closest(Ex_slice, \"z\", 1.4e-5 * sc.Unit(\"m\"))]\n", + "print(Ex_line)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2da76154c389c9f", + "metadata": {}, + "outputs": [], + "source": [ + "Ex_line.plot()" + ] + }, + { + "cell_type": "markdown", + "id": "c0b3b040b06ab6d9", + "metadata": {}, + "source": [ + "Alternatively it is possible to work interactively with `plopp`'s tools for visualizing multidimensional data, such as `pp.slicer`or `pp.inspector`.\n", + "\n", + "#### Doing math\n", + "Just as an example we can easily plot the square of the field:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f8aa64caf8d2a66", + "metadata": {}, + "outputs": [], + "source": [ + "(Ex_line * Ex_line).plot()" + ] + }, + { + "cell_type": "markdown", + "id": "8ba6694fadbe70e5", + "metadata": {}, + "source": [ + "### Loading chunks\n", + "In the above example the whole 3D field is loaded into memory and sliced afterward. It is also possible to just load a sub-chunk into memory. When the `relay` option in `get_field` is set to `True` it will return a dummy object that only allocates memory for a single value. This relay object can be indexed, sliced etc. using the `scipp` indexing just like before. (The only limitation given by the `openpmd-api` is that the result has to a be contiguous chunk of the original array). The `load_data` method loads data and returns a proper `scipp` data array. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f8585597ab80058", + "metadata": {}, + "outputs": [], + "source": [ + "# The full 3D array is not loaded into memory at this point.\n", + "Ex = data_loader.get_field(\"E\", \"x\", time=65 * sc.Unit(\"fs\"), relay=True)\n", + "# This time we will select a range rather than a slice.\n", + "# For a range there is no need for an exact match.\n", + "# But, we could also select a slice just like in the previous example.\n", + "Ex = Ex[\"x\", -2e-6 * sc.Unit(\"m\") : 2e-6 * sc.Unit(\"m\")]\n", + "# Only now the smaller subset wil be loaded into memory\n", + "Ex = Ex.load_data()\n", + "print(Ex)" + ] + }, + { + "cell_type": "markdown", + "id": "7fe77682f59178ab", + "metadata": {}, + "source": [ + "### Time axis\n", + "It is also possible to combine arrays from different iterations into one using `scipp`'s `concat` function. Here is an example for creating a 4D array from all iterations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74e0e0960eb431e4", + "metadata": {}, + "outputs": [], + "source": [ + "Ex = sc.concat(\n", + " [\n", + " data_loader.get_field(\"E\", \"x\", iteration=iteration.value, time_tolerance=None)\n", + " for iteration in data_loader.iterations[\"iteration_id\"]\n", + " ],\n", + " dim=\"t\",\n", + ")\n", + "print(Ex)" + ] + }, + { + "cell_type": "markdown", + "id": "7ea35adaeecae10d", + "metadata": {}, + "source": [ + "The reason for the z coordinate having two dimensions (t,z) is the fact that the data comes from a moving window simulation. This is clearly visible in the plot below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97c0e662a6724f16", + "metadata": {}, + "outputs": [], + "source": [ + "# Let us just slice at some points to get a 2D dataset\n", + "Ex = Ex[\"x\", 10][\"y\", 10]\n", + "print(Ex)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "accb22f6ed91f90f", + "metadata": {}, + "outputs": [], + "source": [ + "Ex.plot()" + ] + }, + { + "cell_type": "markdown", + "id": "6118dd51b20e156e", + "metadata": {}, + "source": [ + "### Working with particle data\n", + "Coming soon!" + ] + }, + { + "cell_type": "markdown", + "id": "60ebf9bd3826ee54", + "metadata": {}, + "source": [ + "## Developer documentation\n", + "### Generating this README\n", + "README file is generated from the README.ipynb.\n", + "```\n", + "# Download and extract example datasets if not present\n", + "# Will download data into `.data`\n", + "make data\n", + "\n", + "make docs\n", + "```\n", + "### Running tests\n", + "At the moment we only test we have is an integration test running this notebook. After downloading example datasets with `make data`, if needed, run:\n", + "```\n", + "make test\n", + "```\n", + "You can also run tests with different python version with tox, but you need to have the python version installed, for example with pyenv.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "425c6cc9a98a5878", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From dc1877c0d2b89b7f64e1c2839bedaf6482075b5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 5 Feb 2025 17:34:30 +0100 Subject: [PATCH 11/24] Adapt to changes in the openPMD-api repo --- examples/15_scipp_loader.ipynb | 67 +++++++--------------------------- 1 file changed, 13 insertions(+), 54 deletions(-) diff --git a/examples/15_scipp_loader.ipynb b/examples/15_scipp_loader.ipynb index 269aca4835..2494984da4 100644 --- a/examples/15_scipp_loader.ipynb +++ b/examples/15_scipp_loader.ipynb @@ -28,22 +28,20 @@ "\n", "\n", "## Installation\n", - "It can be easily installed with pip.\n", + "The adaptor from openPMD to Scipp is part of the regular openPMD-api Python distribution.\n", + "It is loaded lazily and available as soon as `scipp` is also installed.\n", + "Plotting functionality requires the additional installation of `plopp`.\n", + "\n", "```bash\n", - "git clone https://github.com/pordyna/openpmd_scipp.git\n", - "cd openpmd-scipp\n", - "pip install .\n", + "pip install openpmd_api scipp plopp.\n", "```\n", "\n", "## Getting started\n", "Get example data sets from the `openPMD-example-datasets` repository.\n", "```bash\n", - "git clone https://github.com/openPMD/openPMD-example-datasets.git\n", - "cd openPMD-example-datasets\n", - "tar -zxvf example-2d.tar.gz\n", - "tar -zxvf example-3d.tar.gz\n", + "cd openPMD-api\n", + "./share/openPMD/download_samples.sh\n", "```\n", - " \n", "\n", "### Opening series" ] @@ -57,7 +55,8 @@ "source": [ "import scipp as sc\n", "\n", - "import openpmd_scipp as pmdsc" + "import openpmd_api as pmd\n", + "import openpmd_api.scipp as pmdsc" ] }, { @@ -67,17 +66,7 @@ "metadata": {}, "outputs": [], "source": [ - "path = \"openPMD-example-datasets/example-3d/hdf5/data%T.h5\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc1b21e51bd6e524", - "metadata": {}, - "outputs": [], - "source": [ - "path = \".data/\" + path" + "path = \"../samples/git-sample/data%T.h5\"" ] }, { @@ -87,7 +76,8 @@ "metadata": {}, "outputs": [], "source": [ - "data_loader = pmdsc.DataLoader(path)\n", + "series = pmd.Series(path, pmd.Access.read_random_access)\n", + "data_loader = series.to_scipp()\n", "print(data_loader.iterations)" ] }, @@ -165,7 +155,7 @@ "metadata": {}, "source": [ "#### Plotting\n", - "W can't directly plot 3D data.\n", + "We can't directly plot 3D data.\n", "But we can for example select a slice. For that we can use a helper function `pmdsc.closest` to get the closets index, since `scipp` requires exact match. You can read more about indexing `scipp` arrays in `scipp`'s documentation." ] }, @@ -325,37 +315,6 @@ "### Working with particle data\n", "Coming soon!" ] - }, - { - "cell_type": "markdown", - "id": "60ebf9bd3826ee54", - "metadata": {}, - "source": [ - "## Developer documentation\n", - "### Generating this README\n", - "README file is generated from the README.ipynb.\n", - "```\n", - "# Download and extract example datasets if not present\n", - "# Will download data into `.data`\n", - "make data\n", - "\n", - "make docs\n", - "```\n", - "### Running tests\n", - "At the moment we only test we have is an integration test running this notebook. After downloading example datasets with `make data`, if needed, run:\n", - "```\n", - "make test\n", - "```\n", - "You can also run tests with different python version with tox, but you need to have the python version installed, for example with pyenv.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "425c6cc9a98a5878", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From fb0a9adca4c6c46ee63ff484ab828b80f9146ec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 5 Feb 2025 17:34:44 +0100 Subject: [PATCH 12/24] Some fixes for documentation --- docs/source/analysis/scipp.rst | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/source/analysis/scipp.rst b/docs/source/analysis/scipp.rst index 6436092c99..3b1e7814d8 100644 --- a/docs/source/analysis/scipp.rst +++ b/docs/source/analysis/scipp.rst @@ -5,6 +5,10 @@ Scipp Load openpmd datasets to ``scipp`` ``DataArrays``. +.. note:: + + This documentation page is also available as an interactively executable Jupyter Notebook in ``examples/15_scipp_loader.ipynb``. + What is this good for? ~~~~~~~~~~~~~~~~~~~~~~ @@ -63,13 +67,12 @@ Opening series .. code:: python - path = "openPMD-example-datasets/example-3d/hdf5/data%T.h5" - path = "./data/" + path + path = "../samples/git-sample/data%T.h5" .. code:: python series = pmd.Series(path, pmd.Access.read_random_access) - data_loader = sereies.to_scipp() + data_loader = series.to_scipp() print(data_loader.iterations) :: From 10eec0270139f7dbb149aba97fb4eb277a22e188 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 5 Feb 2025 18:07:19 +0100 Subject: [PATCH 13/24] Try adding some scipp runners to the CI --- .github/workflows/linux.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 53daa723c7..f336918594 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -133,6 +133,7 @@ jobs: sudo apt install clang-14 cmake gfortran libhdf5-dev python3.11 python3.11-dev wget sudo .github/workflows/dependencies/install_spack python3.11 -m pip install numpy pandas + python -m pip install scipp plopp git clone -b v4.0.3 https://github.com/ToruNiina/toml11 cmake -S toml11 -B build_toml11 \ -DCMAKE_INSTALL_PREFIX=toml11_install \ @@ -229,6 +230,8 @@ jobs: python3 -m pip install -U pandas python3 -m pip install -U dask python3 -m pip install -U pyarrow + python3 -m pip install -U plopp + python3 -m pip install -U scipp - name: Build env: {CC: gcc-7, CXX: g++-7, CXXFLAGS: -Werror} run: | @@ -282,6 +285,8 @@ jobs: apk update apk add hdf5-dev python3.10 -m pip install numpy + python -m pip install scipp + python -m pip install plopp - name: Build env: {CXXFLAGS: -Werror} run: | From 9449631431d57b3b30d8c657b5e512d6276d2370 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 11 Feb 2025 18:18:33 +0100 Subject: [PATCH 14/24] scipp apparently unsupported on musllinux maybe report --- .github/workflows/linux.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index f336918594..17d6aa069e 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -285,8 +285,6 @@ jobs: apk update apk add hdf5-dev python3.10 -m pip install numpy - python -m pip install scipp - python -m pip install plopp - name: Build env: {CXXFLAGS: -Werror} run: | From 7c63412e0ec3dac53afcaa64372faa2888989ba4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Tue, 11 Feb 2025 18:47:07 +0100 Subject: [PATCH 15/24] Remove non-ASCII character --- src/binding/python/openpmd_api/scipp/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/binding/python/openpmd_api/scipp/utils.py b/src/binding/python/openpmd_api/scipp/utils.py index 7f47645a59..a29cfe4024 100644 --- a/src/binding/python/openpmd_api/scipp/utils.py +++ b/src/binding/python/openpmd_api/scipp/utils.py @@ -22,7 +22,7 @@ def _unit_dimension_to_scipp(unit_dimension): :param tuple unit_dimension: A tuple containing seven integers, each representing the power of a base SI unit in the order: (length, mass, time, electric current, thermodynamic temperature, amount of substance, luminous intensity). - For example, (1, 0, -2, 0, 0, 0, 0) corresponds to meters per second squared (m/s²). + For example, (1, 0, -2, 0, 0, 0, 0) corresponds to meters per second squared (m/s^2). :returns: A Scipp unit object representing the combined unit as specified by the input unit dimensions. From 26d7d048e91b16ebcc4b177aed5bd21a6d296e22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Ordyna?= Date: Tue, 11 Feb 2025 18:56:35 +0100 Subject: [PATCH 16/24] Documentation: 80 chars line limit --- .../python/openpmd_api/scipp/__init__.py | 3 +- .../python/openpmd_api/scipp/loader.py | 24 +++++++---- .../python/openpmd_api/scipp/mesh_loader.py | 39 ++++++++++------- src/binding/python/openpmd_api/scipp/utils.py | 43 +++++++++++-------- 4 files changed, 64 insertions(+), 45 deletions(-) diff --git a/src/binding/python/openpmd_api/scipp/__init__.py b/src/binding/python/openpmd_api/scipp/__init__.py index 66019bd413..f74fe90536 100644 --- a/src/binding/python/openpmd_api/scipp/__init__.py +++ b/src/binding/python/openpmd_api/scipp/__init__.py @@ -1,4 +1,5 @@ -"""openpmd_scipp: A Python package for loading openPMD datasets into scipp DataArrays. +"""openpmd_scipp: A Python package for loading openPMD datasets + into scipp DataArrays. See README.md for documentation diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py index 45bbf5cff6..6b909e9b5a 100644 --- a/src/binding/python/openpmd_api/scipp/loader.py +++ b/src/binding/python/openpmd_api/scipp/loader.py @@ -35,7 +35,8 @@ def get_iterations(series): :param series: The openPMD series containing the data. :type series: openpmd_api.Series - :return: A Scipp dataset containing iteration IDs and their corresponding times. + :return: A Scipp dataset containing iteration IDs and their + corresponding times. :rtype: sc.Dataset """ t = get_time_axis(series) @@ -56,20 +57,23 @@ class DataLoader: The data can be retrieved as a DataRelay object or directly as a DataArray. Attributes: - series (openpmd_api.Series): The openPMD series initialized from the file path. - iterations (sc.Dataset): A dataset containing iteration IDs and their corresponding times. + series (openpmd_api.Series): The openPMD series initialized from + the file path. + iterations (sc.Dataset): A dataset containing iteration IDs + and their corresponding times. """ def __init__(self, series): - """Initialize the DataLoader with an openPMD series from the specified file path. + """Initialize the DataLoader with an openPMD series from + the specified file path. :param path: The file path to the openPMD data file. :type path: str - Initializes the `series` attribute as an openPMD series in read-only mode - and the `iterations` attribute as a Scipp dataset containing iteration IDs - and their corresponding times. + Initializes the `series` attribute as an openPMD series in + read-only mode and the `iterations` attribute as a Scipp dataset + containing iteration IDs and their corresponding times. """ self.series = series self.iterations = get_iterations(self.series) @@ -122,9 +126,11 @@ def get_field( # handle integer inputs time = time.astype("double") time_tolerance = time_tolerance.astype("double") - time = time.to(unit=self.iterations["iteration_id"].coords["t"].unit) + time = time.to( + unit=self.iterations["iteration_id"].coords["t"].unit) try: - iteration = int(self.iterations["iteration_id"]["t", time].value) + iteration = int( + self.iterations["iteration_id"]["t", time].value) except IndexError: idx = closest(self.iterations["iteration_id"], "t", time) iteration = self.iterations["iteration_id"]["t", idx] diff --git a/src/binding/python/openpmd_api/scipp/mesh_loader.py b/src/binding/python/openpmd_api/scipp/mesh_loader.py index e18178bd9b..790a13a27d 100644 --- a/src/binding/python/openpmd_api/scipp/mesh_loader.py +++ b/src/binding/python/openpmd_api/scipp/mesh_loader.py @@ -40,8 +40,8 @@ class DataRelay(sc.DataArray): def _verify_init(self): """Verify that the data is contiguous. - Check if the chosen subset is contiguous in the openPMD storage by checking coordinate - differences against the expected grid spacing. + Check if the chosen subset is contiguous in the openPMD storage + by checking coordinate differences against the expected grid spacing. This is needed since openPMD does nto allow us to load strided chunks. """ @@ -64,11 +64,13 @@ def __init__(self, series, record, record_component, dummy_array, coords): :type series: openpmd_api.Series :param record: The openPMD record object associated with the mesh. :type record: openpmd_api.Record - :param record_component: The openPMD record component associated with the mesh. + :param record_component: The openPMD record component + associated with the mesh. :type record_component: openpmd_api.Record_Component - :param dummy_array: A scipp array used for the dummy interface. It should use as little - memory as possible. Usually achieved by setting the stride of the values array to 0. Can - be read- only. + :param dummy_array: A scipp array used for the dummy interface. + It should use as little memory as possible. + Usually achieved by setting the stride of the values array to 0. + Can be read- only. :type dummy_array: sc.array :param coords: A dictionary of coordinates for the DataArray. """ @@ -81,8 +83,9 @@ def __init__(self, series, record, record_component, dummy_array, coords): def __getitem__(self, *args, **kwargs): """Retrieve a subset of the data, returning a new DataRelay instance. - Override this method from the base class to use the DataRelay initializer and ensure that - DataRelay is returned and the _verify_init method is used. + Override this method from the base class to use the DataRelay + initializer and ensure that DataRelay is returned and + the _verify_init method is used. :param args: Forwarded to the base class. :type args: tuple @@ -105,9 +108,9 @@ def load_data(self): Loads a chunk based on the current data array coordinates. - Calculates the offset and extent for each dimension using the data array coordinates. Loads - the data chunk from the record component, scales it by the unit SI, and returns a new data - array with loaded values. + Calculates the offset and extent for each dimension using the data array + coordinates. Loads the data chunk from the record component, scales it + by the unit SI, and returns a new data array with loaded values. :return: The DataArray instance with the loaded data. :rtype: DataRelay @@ -137,7 +140,8 @@ def load_data(self): def get_field_data_relay(series, iteration, field, component=None): """Get openPMD mesh as a data relay. - Create a DataRelay object for a specified field and component in an openPMD series. + Create a DataRelay object for a specified field and component in + an openPMD series. :param series: The openPMD series containing the data. :type series: openpmd_api.Series @@ -147,7 +151,8 @@ def get_field_data_relay(series, iteration, field, component=None): :type field: str :param component: The component of the field to retrieve, default is SCALAR. :type component: openpmd_api.Mesh_Record_Component, optional - :return: A DataRelay instance initialized with the specified field and component data. + :return: A DataRelay instance initialized with the specified field + and component data. :rtype: DataRelay """ record = series.iterations[iteration].meshes[field] @@ -175,7 +180,8 @@ def get_field_data_relay(series, iteration, field, component=None): small, shape=rc.shape, strides=[0] * rc.ndim, writeable=False ) dummy_array = sc.array( - dims=dims, values=dummy_array, unit=_unit_dimension_to_scipp(record.unit_dimension) + dims=dims, values=dummy_array, unit=_unit_dimension_to_scipp( + record.unit_dimension) ) return DataRelay( @@ -186,8 +192,9 @@ def get_field_data_relay(series, iteration, field, component=None): def get_field(series, iteration, field, component=None): """Retrieve and load openPMD mesh data without slicing. - This function creates a DataRelay object for a specified field and component in an openPMD - series, loads the whole mesh, and returns the resulting DataArray. + This function creates a DataRelay object for a specified field + and component in an openPMD series, loads the whole mesh, and returns + the resulting DataArray. :param series: The openPMD series containing the data. :type series: openpmd_api.Series diff --git a/src/binding/python/openpmd_api/scipp/utils.py b/src/binding/python/openpmd_api/scipp/utils.py index a29cfe4024..a3f300e902 100644 --- a/src/binding/python/openpmd_api/scipp/utils.py +++ b/src/binding/python/openpmd_api/scipp/utils.py @@ -14,18 +14,20 @@ def _unit_dimension_to_scipp(unit_dimension): """Convert a unit dimension from the openPMD standard to a Scipp unit. - This function takes a tuple representing the powers of the seven base SI units - (length, mass, time, electric current, thermodynamic temperature, amount of substance, - and luminous intensity) and converts it into a Scipp unit. The conversion is based on - the openPMD standard, which describes units as powers of these base measures. - - :param tuple unit_dimension: A tuple containing seven integers, each representing - the power of a base SI unit in the order: (length, mass, time, electric current, - thermodynamic temperature, amount of substance, luminous intensity). - For example, (1, 0, -2, 0, 0, 0, 0) corresponds to meters per second squared (m/s^2). - - :returns: A Scipp unit object representing the combined unit as specified by the input - unit dimensions. + This function takes a tuple representing the powers of the seven base SI + units (length, mass, time, electric current, thermodynamic temperature, + amount of substance, and luminous intensity) and converts it into + a Scipp unit. The conversion is based on the openPMD standard, + which describes units as powers of these base measures. + + :param tuple unit_dimension: A tuple containing seven integers, each + representing the power of a base SI unit in the order: (length, mass, time, + electric current, thermodynamic temperature, amount of substance, + luminous intensity). For example, (1, 0, -2, 0, 0, 0, 0) corresponds + to meters per second squared (m/s^2). + + :returns: A Scipp unit object representing the combined unit as specified + by the input unit dimensions. :rtype: sc.Unit :example: @@ -34,7 +36,8 @@ def _unit_dimension_to_scipp(unit_dimension): :notes: - The function assumes that the input tuple has exactly seven elements. - - Each element in the tuple corresponds to the power of a specific base unit. + - Each element in the tuple corresponds to the power of + a specific base unit. """ # unit dimension description from the openPMD standard: # powers of the 7 base measures characterizing the record's unit in SI @@ -57,19 +60,20 @@ def _unit_dimension_to_scipp(unit_dimension): def closest(data, dim, val): - """Find the index of the closest value in a dataset along a specified dimension. + """Find the index of the closest value in a dataset + along a specified dimension. This function calculates the index of the element in the specified dimension of the dataset that is closest to the given value. It ensures that the value - is converted to the same unit as the dimension's coordinate before performing - the comparison. + is converted to the same unit as the dimension's coordinate before + performing the comparison. :param data: The data array containing the dimension to search. :type data: sc.DataArray :param dim: The name of the dimension along which to find the closest value. :type dim: str - :param val: The value to compare against, which will be converted to the unit of - the dimension's coordinate. + :param val: The value to compare against, which will be converted to the + unit of the dimension's coordinate. :type val: sc.Variable :return: The index of the closest value in the specified dimension. @@ -78,7 +82,8 @@ def closest(data, dim, val): :notes: - The function assumes that `val` can be converted to the unit of the dimension's coordinate. - - The dataset `data` must have coordinates defined for the specified dimension. + - The dataset `data` must have coordinates defined + for the specified dimension. """ val = val.astype("double") coord = data.coords[dim] From 9b3e45dc3b2d705c467b5e8c220efd9d3c44a7e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Ordyna?= Date: Tue, 11 Feb 2025 19:03:46 +0100 Subject: [PATCH 17/24] 80 chars line limit: code --- .../python/openpmd_api/scipp/loader.py | 20 ++++++++++++++----- .../python/openpmd_api/scipp/mesh_loader.py | 11 ++++++---- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py index 6b909e9b5a..864d6ac4fb 100644 --- a/src/binding/python/openpmd_api/scipp/loader.py +++ b/src/binding/python/openpmd_api/scipp/loader.py @@ -25,7 +25,8 @@ def get_time_axis(series): :rtype: sc.DataArray """ t = [ - series.iterations[it].time * series.iterations[it].time_unit_SI for it in series.iterations + series.iterations[it].time * series.iterations[it].time_unit_SI \ + for it in series.iterations ] return sc.array(dims=["t"], values=t, unit="s", dtype="double") @@ -43,7 +44,9 @@ def get_iterations(series): return sc.Dataset( data={ "iteration_id": sc.DataArray( - data=sc.array(dims=["t"], values=list(series.iterations)), coords={"t": t} + data=sc.array(dims=["t"], + values=list(series.iterations)), + coords={"t": t} ) } ) @@ -137,7 +140,8 @@ def get_field( assert time_tolerance is None or sc.abs( iteration.coords["t"] - time ) <= time_tolerance.to(unit=time.unit), ( - f"No iteration found within time_tolerance={time_tolerance}." + f"No iteration found " + f"within time_tolerance={time_tolerance}." ) print( "Series does not contain iteration at the exact time. " @@ -148,9 +152,15 @@ def get_field( if relay: return get_field_data_relay( - series=self.series, field=field, component=component, iteration=iteration + series=self.series, + field=field, + component=component, + iteration=iteration ) else: return get_field( - series=self.series, field=field, component=component, iteration=iteration + series=self.series, + field=field, + component=component, + iteration=iteration ) diff --git a/src/binding/python/openpmd_api/scipp/mesh_loader.py b/src/binding/python/openpmd_api/scipp/mesh_loader.py index 790a13a27d..9bdcb756fa 100644 --- a/src/binding/python/openpmd_api/scipp/mesh_loader.py +++ b/src/binding/python/openpmd_api/scipp/mesh_loader.py @@ -158,9 +158,8 @@ def get_field_data_relay(series, iteration, field, component=None): record = series.iterations[iteration].meshes[field] rc = record[component] if component else record dims = record.axis_labels - time = (series.iterations[iteration].time + record.time_offset) * series.iterations[ - iteration - ].time_unit_SI + time = (series.iterations[iteration].time + record.time_offset) \ + * series.iterations[iteration].time_unit_SI time = sc.scalar(time, unit="s", dtype="double") coords = {"t": time} for dd, dim in enumerate(dims): @@ -185,7 +184,11 @@ def get_field_data_relay(series, iteration, field, component=None): ) return DataRelay( - series=series, record=record, record_component=rc, dummy_array=dummy_array, coords=coords + series=series, + record=record, + record_component=rc, + dummy_array=dummy_array, + coords=coords ) From aa4719a8166d51325edc8accade34876cd758706 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 12 Feb 2025 10:59:10 +0100 Subject: [PATCH 18/24] Compatibility with Python 3.8 --- src/binding/python/openpmd_api/scipp/utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/binding/python/openpmd_api/scipp/utils.py b/src/binding/python/openpmd_api/scipp/utils.py index a3f300e902..c75f075f29 100644 --- a/src/binding/python/openpmd_api/scipp/utils.py +++ b/src/binding/python/openpmd_api/scipp/utils.py @@ -7,6 +7,8 @@ GPL - 3.0 license. See LICENSE file for details. """ +from sys import version_info + import numpy as np import scipp as sc @@ -53,7 +55,9 @@ def _unit_dimension_to_scipp(unit_dimension): 1.0 * sc.Unit("cd"), ) unit = 1.0 * sc.Unit("1") - for dim, base_unit in zip(unit_dimension, base_units, strict=False): + zipped = zip(unit_dimension, base_units) if version_info < ( + 3, 10) else zip(unit_dimension, base_units, strict=False) + for dim, base_unit in zipped: if dim != 0: unit *= base_unit**dim return unit.unit From 3bde4fb17d31e8876086296fa3c76902a1cb576a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Ordyna?= Date: Wed, 12 Feb 2025 11:05:23 +0100 Subject: [PATCH 19/24] 79 chars instead of 80.. --- src/binding/python/openpmd_api/scipp/loader.py | 7 ++++--- .../python/openpmd_api/scipp/mesh_loader.py | 13 ++++++++----- src/binding/python/openpmd_api/scipp/utils.py | 15 ++++++++------- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py index 864d6ac4fb..f39a6f9e2f 100644 --- a/src/binding/python/openpmd_api/scipp/loader.py +++ b/src/binding/python/openpmd_api/scipp/loader.py @@ -55,9 +55,10 @@ def get_iterations(series): class DataLoader: """DataLoader class for loading and retrieving openPMD mesh data. - This class initializes an openPMD series from a given file path and provides - methods to retrieve mesh data fields either by iteration index or by time. - The data can be retrieved as a DataRelay object or directly as a DataArray. + This class initializes an openPMD series from a given file path and + provides methods to retrieve mesh data fields either by iteration index or + by time. The data can be retrieved as a DataRelay object or directly as a + DataArray. Attributes: series (openpmd_api.Series): The openPMD series initialized from diff --git a/src/binding/python/openpmd_api/scipp/mesh_loader.py b/src/binding/python/openpmd_api/scipp/mesh_loader.py index 9bdcb756fa..bee54067f0 100644 --- a/src/binding/python/openpmd_api/scipp/mesh_loader.py +++ b/src/binding/python/openpmd_api/scipp/mesh_loader.py @@ -108,9 +108,10 @@ def load_data(self): Loads a chunk based on the current data array coordinates. - Calculates the offset and extent for each dimension using the data array - coordinates. Loads the data chunk from the record component, scales it - by the unit SI, and returns a new data array with loaded values. + Calculates the offset and extent for each dimension using the data + array coordinates. Loads the data chunk from the record component, + scales it by the unit SI, and returns a new data array with loaded + values. :return: The DataArray instance with the loaded data. :rtype: DataRelay @@ -149,7 +150,8 @@ def get_field_data_relay(series, iteration, field, component=None): :type iteration: int :param field: The name of the field to retrieve. :type field: str - :param component: The component of the field to retrieve, default is SCALAR. + :param component: The component of the field to retrieve, + default is SCALAR. :type component: openpmd_api.Mesh_Record_Component, optional :return: A DataRelay instance initialized with the specified field and component data. @@ -205,7 +207,8 @@ def get_field(series, iteration, field, component=None): :type iteration: int :param field: The name of the field to retrieve. :type field: str - :param component: The component of the field to retrieve, default is SCALAR. + :param component: The component of the field to retrieve, + default is SCALAR. :type component: openpmd_api.Mesh_Record_Component, optional :return: A DataArray instance with the loaded data. :rtype: DataRelay diff --git a/src/binding/python/openpmd_api/scipp/utils.py b/src/binding/python/openpmd_api/scipp/utils.py index c75f075f29..5d8266f3aa 100644 --- a/src/binding/python/openpmd_api/scipp/utils.py +++ b/src/binding/python/openpmd_api/scipp/utils.py @@ -43,8 +43,8 @@ def _unit_dimension_to_scipp(unit_dimension): """ # unit dimension description from the openPMD standard: # powers of the 7 base measures characterizing the record's unit in SI - # (length L, mass M, time T, electric current I, thermodynamic temperature theta, - # amount of substance N, luminous intensity J) + # (length L, mass M, time T, electric current I, thermodynamic + # temperature theta, amount of substance N, luminous intensity J) base_units = ( 1.0 * sc.Unit("m"), 1.0 * sc.Unit("kg"), @@ -67,14 +67,15 @@ def closest(data, dim, val): """Find the index of the closest value in a dataset along a specified dimension. - This function calculates the index of the element in the specified dimension - of the dataset that is closest to the given value. It ensures that the value - is converted to the same unit as the dimension's coordinate before - performing the comparison. + This function calculates the index of the element in the specified + dimension of the dataset that is closest to the given value. It ensures + that the value is converted to the same unit as the dimension's coordinate + before performing the comparison. :param data: The data array containing the dimension to search. :type data: sc.DataArray - :param dim: The name of the dimension along which to find the closest value. + :param dim: The name of the dimension along which to find + the closest value. :type dim: str :param val: The value to compare against, which will be converted to the unit of the dimension's coordinate. From 153d0134804fae687bb9cbe0aa0f14033f3a5690 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 12 Feb 2025 11:07:33 +0100 Subject: [PATCH 20/24] Fix unused import --- examples/15_scipp_loader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/15_scipp_loader.py b/examples/15_scipp_loader.py index 874a0b7b08..f0c5a75745 100644 --- a/examples/15_scipp_loader.py +++ b/examples/15_scipp_loader.py @@ -7,6 +7,7 @@ def main(): try: scipp_loader = series.to_scipp() import plopp + print("Plopp version:", plopp.__version__) except ImportError: print("Need to install scipp and plopp to run this example.") return From fa98c146950366a213936f16753ec00c8d278ee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 12 Feb 2025 11:14:27 +0100 Subject: [PATCH 21/24] Style fixes f --- examples/15_scipp_loader.py | 25 ++++++++++--------- .../python/openpmd_api/ScippLazyInit.py | 6 +++-- src/binding/python/openpmd_api/__init__.py | 2 +- .../python/openpmd_api/scipp/loader.py | 1 - 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/examples/15_scipp_loader.py b/examples/15_scipp_loader.py index f0c5a75745..05d57daa02 100644 --- a/examples/15_scipp_loader.py +++ b/examples/15_scipp_loader.py @@ -2,12 +2,12 @@ def main(): - series = pmd.Series("../samples/git-sample/data%T.h5", pmd.Access.read_only) + series = pmd.Series("../samples/git-sample/data%T.h5", + pmd.Access.read_only) try: scipp_loader = series.to_scipp() - import plopp - print("Plopp version:", plopp.__version__) + import plopp # noqa except ImportError: print("Need to install scipp and plopp to run this example.") return @@ -22,7 +22,8 @@ def main(): Ex_slice = Ex["x", slicing_idx] print(Ex_slice) Ex_slice.plot().save("slice.png") - Ex_line = Ex_slice["z", pmdsc.closest(Ex_slice, "z", 1.4e-5 * sc.Unit("m"))] + Ex_line = Ex_slice["z", pmdsc.closest( + Ex_slice, "z", 1.4e-5 * sc.Unit("m"))] print(Ex_line) Ex_line.plot().save("line.png") (Ex_line * Ex_line).plot().save("line_squared.png") @@ -32,18 +33,17 @@ def main(): # This time we will select a range rather than a slice. # For a range there is no need for an exact match. # But, we could also select a slice just like in the previous example. - Ex = Ex["x", -2e-6 * sc.Unit("m") : 2e-6 * sc.Unit("m")] + Ex = Ex["x", -2e-6 * sc.Unit("m"): 2e-6 * sc.Unit("m")] # Only now the smaller subset wil be loaded into memory Ex = Ex.load_data() print(Ex) - Ex = sc.concat( - [ - scipp_loader.get_field("E", "x", iteration=iteration.value, time_tolerance=None) - for iteration in scipp_loader.iterations["iteration_id"] - ], - dim="t", - ) + Ex = sc.concat([scipp_loader.get_field( + "E", "x", iteration=iteration.value, + time_tolerance=None) + for iteration in scipp_loader.iterations + ["iteration_id"]], + dim="t",) print(Ex) # Let us just slice at some points to get a 2D dataset @@ -51,5 +51,6 @@ def main(): print(Ex) Ex.plot().save("moving_window.png") + if __name__ == "__main__": main() diff --git a/src/binding/python/openpmd_api/ScippLazyInit.py b/src/binding/python/openpmd_api/ScippLazyInit.py index e2c3832ddf..49f7aea69a 100644 --- a/src/binding/python/openpmd_api/ScippLazyInit.py +++ b/src/binding/python/openpmd_api/ScippLazyInit.py @@ -2,14 +2,16 @@ def series_to_scipp(series): # lazy import try: - import scipp + import scipp # noqa found_scipp = True except ImportError as original_error: found_scipp = False original_error_string = f"{original_error}" if not found_scipp: - raise ImportError(f"Scipp NOT found. Install scipp for Scipp support. Original error: {original_error_string}") + raise ImportError( + f"Scipp NOT found. Install scipp for Scipp support. " + f"Original error: {original_error_string}") from .scipp import DataLoader diff --git a/src/binding/python/openpmd_api/__init__.py b/src/binding/python/openpmd_api/__init__.py index 4fca249e68..572feedecc 100644 --- a/src/binding/python/openpmd_api/__init__.py +++ b/src/binding/python/openpmd_api/__init__.py @@ -17,7 +17,7 @@ Record_Component.to_dask_array = record_component_to_daskarray # noqa Series.to_df = iterations_to_dataframe # noqa Series.to_cudf = iterations_to_cudf # noqa -Series.to_scipp = series_to_scipp +Series.to_scipp = series_to_scipp # noqa # TODO remove in future versions (deprecated) Access_Type = Access # noqa diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py index f39a6f9e2f..63e06a998e 100644 --- a/src/binding/python/openpmd_api/scipp/loader.py +++ b/src/binding/python/openpmd_api/scipp/loader.py @@ -9,7 +9,6 @@ GPL - 3.0 license. See LICENSE file for details. """ -import openpmd_api as pmd import scipp as sc from .mesh_loader import get_field, get_field_data_relay From 9c1b184a47a52473cef929b293ed4ee9288a3354 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Ordyna?= Date: Wed, 12 Feb 2025 11:17:27 +0100 Subject: [PATCH 22/24] Style fixes p --- src/binding/python/openpmd_api/scipp/__init__.py | 4 ++-- src/binding/python/openpmd_api/scipp/loader.py | 7 +++---- src/binding/python/openpmd_api/scipp/mesh_loader.py | 3 ++- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/binding/python/openpmd_api/scipp/__init__.py b/src/binding/python/openpmd_api/scipp/__init__.py index f74fe90536..bb264b6bab 100644 --- a/src/binding/python/openpmd_api/scipp/__init__.py +++ b/src/binding/python/openpmd_api/scipp/__init__.py @@ -10,5 +10,5 @@ GPL - 3.0 license. See LICENSE file for details. """ -from .loader import DataLoader as DataLoader -from .utils import closest as closest +from .loader import DataLoader as DataLoader # noqa +from .utils import closest as closest # noqa diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py index 63e06a998e..7a74a18060 100644 --- a/src/binding/python/openpmd_api/scipp/loader.py +++ b/src/binding/python/openpmd_api/scipp/loader.py @@ -24,8 +24,8 @@ def get_time_axis(series): :rtype: sc.DataArray """ t = [ - series.iterations[it].time * series.iterations[it].time_unit_SI \ - for it in series.iterations + series.iterations[it].time * series.iterations[it].time_unit_SI + for it in series.iterations ] return sc.array(dims=["t"], values=t, unit="s", dtype="double") @@ -43,8 +43,7 @@ def get_iterations(series): return sc.Dataset( data={ "iteration_id": sc.DataArray( - data=sc.array(dims=["t"], - values=list(series.iterations)), + data=sc.array(dims=["t"], values=list(series.iterations)), coords={"t": t} ) } diff --git a/src/binding/python/openpmd_api/scipp/mesh_loader.py b/src/binding/python/openpmd_api/scipp/mesh_loader.py index bee54067f0..f7bd1e6d63 100644 --- a/src/binding/python/openpmd_api/scipp/mesh_loader.py +++ b/src/binding/python/openpmd_api/scipp/mesh_loader.py @@ -213,4 +213,5 @@ def get_field(series, iteration, field, component=None): :return: A DataArray instance with the loaded data. :rtype: DataRelay """ - return get_field_data_relay(series, iteration, field, component).load_data() + return get_field_data_relay( + series, iteration, field, component).load_data() From 12e9ad3ea0e7b24adad22e23f0487232529a5371 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Wed, 12 Feb 2025 11:26:15 +0100 Subject: [PATCH 23/24] Fix plopp import again.. --- examples/15_scipp_loader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/15_scipp_loader.py b/examples/15_scipp_loader.py index 05d57daa02..699a4adcd1 100644 --- a/examples/15_scipp_loader.py +++ b/examples/15_scipp_loader.py @@ -8,6 +8,7 @@ def main(): try: scipp_loader = series.to_scipp() import plopp # noqa + print("Plopp version:", plopp.__version__) except ImportError: print("Need to install scipp and plopp to run this example.") return From 124b080a5230fe42221a3c4bf80177ba5bc9cb21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20P=C3=B6schel?= Date: Fri, 28 Feb 2025 15:13:21 +0100 Subject: [PATCH 24/24] Change/add license info License: LGPLv3+ --- examples/15_scipp_loader.py | 8 ++++++++ src/binding/python/openpmd_api/ScippLazyInit.py | 9 +++++++++ src/binding/python/openpmd_api/scipp/__init__.py | 3 +-- src/binding/python/openpmd_api/scipp/loader.py | 3 +-- src/binding/python/openpmd_api/scipp/mesh_loader.py | 3 +-- src/binding/python/openpmd_api/scipp/utils.py | 3 +-- 6 files changed, 21 insertions(+), 8 deletions(-) diff --git a/examples/15_scipp_loader.py b/examples/15_scipp_loader.py index 699a4adcd1..d68a83eb23 100644 --- a/examples/15_scipp_loader.py +++ b/examples/15_scipp_loader.py @@ -1,3 +1,11 @@ +""" +This file is part of the openPMD-api. + +Copyright 2025 openPMD contributors +Authors: Franz Poeschel, Pawel Ordyna +License: LGPLv3+ +""" + import openpmd_api as pmd diff --git a/src/binding/python/openpmd_api/ScippLazyInit.py b/src/binding/python/openpmd_api/ScippLazyInit.py index 49f7aea69a..9ec90bcc2a 100644 --- a/src/binding/python/openpmd_api/ScippLazyInit.py +++ b/src/binding/python/openpmd_api/ScippLazyInit.py @@ -1,3 +1,12 @@ +""" +This file is part of the openPMD-api. + +Copyright 2025 openPMD contributors +Authors: Franz Poeschel +License: LGPLv3+ +""" + + def series_to_scipp(series): # lazy import diff --git a/src/binding/python/openpmd_api/scipp/__init__.py b/src/binding/python/openpmd_api/scipp/__init__.py index bb264b6bab..25c1314dde 100644 --- a/src/binding/python/openpmd_api/scipp/__init__.py +++ b/src/binding/python/openpmd_api/scipp/__init__.py @@ -6,8 +6,7 @@ Author: Pawel Ordyna -License: -GPL - 3.0 license. See LICENSE file for details. +License: LGPLv3+ """ from .loader import DataLoader as DataLoader # noqa diff --git a/src/binding/python/openpmd_api/scipp/loader.py b/src/binding/python/openpmd_api/scipp/loader.py index 7a74a18060..aab80feeb7 100644 --- a/src/binding/python/openpmd_api/scipp/loader.py +++ b/src/binding/python/openpmd_api/scipp/loader.py @@ -5,8 +5,7 @@ Author: Pawel Ordyna -License: -GPL - 3.0 license. See LICENSE file for details. +License: LGPLv3+ """ import scipp as sc diff --git a/src/binding/python/openpmd_api/scipp/mesh_loader.py b/src/binding/python/openpmd_api/scipp/mesh_loader.py index f7bd1e6d63..57978f4287 100644 --- a/src/binding/python/openpmd_api/scipp/mesh_loader.py +++ b/src/binding/python/openpmd_api/scipp/mesh_loader.py @@ -3,8 +3,7 @@ Author: Pawel Ordyna -License: -GPL - 3.0 license. See LICENSE file for details. +License: LGPLv3+ """ import numpy as np diff --git a/src/binding/python/openpmd_api/scipp/utils.py b/src/binding/python/openpmd_api/scipp/utils.py index 5d8266f3aa..ac66880477 100644 --- a/src/binding/python/openpmd_api/scipp/utils.py +++ b/src/binding/python/openpmd_api/scipp/utils.py @@ -3,8 +3,7 @@ Author: Pawel Ordyna -License: -GPL - 3.0 license. See LICENSE file for details. +License: LGPLv3+ """ from sys import version_info