Skip to content

Commit eb1a661

Browse files
committed
added meta, some classes still missing
1 parent 1f83d65 commit eb1a661

File tree

9 files changed

+283
-163
lines changed

9 files changed

+283
-163
lines changed

tests/test_iota.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from datetime import datetime
2+
from pathlib import Path
23

34
import h5py
45
import numpy as np
@@ -11,9 +12,9 @@
1112
from turn_by_turn.structures import TbtData, TransverseData
1213

1314

14-
def test_tbt_read_hdf5(_hdf5_file):
15+
def test_tbt_read_hdf5(_hdf5_file_v1):
1516
origin = _hdf5_file_content()
16-
new = iota.read_tbt(_hdf5_file, hdf5_version=1)
17+
new = iota.read_tbt(_hdf5_file_v1, version=1)
1718
compare_tbt(origin, new, no_binary=False)
1819

1920

@@ -23,9 +24,9 @@ def test_tbt_read_hdf5_v2(_hdf5_file_v2):
2324
compare_tbt(origin, new, no_binary=False)
2425

2526

26-
def test_tbt_raises_on_wrong_hdf5_version(_hdf5_file):
27+
def test_tbt_raises_on_wrong_hdf5_version(_hdf5_file_v1):
2728
with pytest.raises(HDF5VersionError):
28-
iota.read_tbt(_hdf5_file, hdf5_version=2)
29+
iota.read_tbt(_hdf5_file_v1, version=2)
2930

3031

3132
def _hdf5_file_content() -> TbtData:
@@ -45,15 +46,16 @@ def _hdf5_file_content() -> TbtData:
4546
),
4647
)
4748
],
48-
date=datetime.now(),
4949
bunch_ids=[1],
5050
nturns=2000,
5151
)
5252

5353

5454
@pytest.fixture()
55-
def _hdf5_file(tmp_path) -> h5py.File:
55+
def _hdf5_file_v1(tmp_path) -> Path:
5656
"""IOTA File standard."""
57+
content: TransverseData = _hdf5_file_content().matrices[0]
58+
5759
with h5py.File(tmp_path / "test_file.hdf5", "w") as hd5_file:
5860
hd5_file.create_dataset(
5961
"N:IBE2RH",
@@ -80,11 +82,11 @@ def _hdf5_file(tmp_path) -> h5py.File:
8082
"N:IBA1CS",
8183
data=create_data(np.linspace(0, 20, 2000, endpoint=False), 1, np.exp).flatten(),
8284
)
83-
yield tmp_path / "test_file.hdf5"
85+
return tmp_path / "test_file.hdf5"
8486

8587

8688
@pytest.fixture()
87-
def _hdf5_file_v2(tmp_path) -> h5py.File:
89+
def _hdf5_file_v2(tmp_path) -> Path:
8890
"""IOTA File standard."""
8991
with h5py.File(tmp_path / "test_file_v2.hdf5", "w") as hd5_file:
9092
hd5_file.create_group("A1C")
@@ -114,4 +116,4 @@ def _hdf5_file_v2(tmp_path) -> h5py.File:
114116
"Intensity",
115117
data=create_data(np.linspace(0, 20, 2000, endpoint=False), 1, np.exp).flatten(),
116118
)
117-
yield tmp_path / "test_file_v2.hdf5"
119+
return tmp_path / "test_file_v2.hdf5"

turn_by_turn/ascii.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,13 @@
2323
import pandas as pd
2424
from dateutil import tz
2525

26-
from turn_by_turn.constants import FORMAT_STRING, NUM_TO_PLANE, PLANE_TO_NUM, PLANES
26+
from turn_by_turn.constants import (
27+
FORMAT_STRING,
28+
NUM_TO_PLANE,
29+
PLANE_TO_NUM,
30+
PLANES,
31+
MetaDict,
32+
)
2733
from turn_by_turn.structures import TbtData, TransverseData
2834

2935
LOGGER = logging.getLogger(__name__)
@@ -108,7 +114,7 @@ def _write_tbt_data(tbt_data: TbtData, bunch_id: int, output_file: TextIO) -> No
108114
# ----- Reader ----- #
109115

110116

111-
def read_tbt(file_path: str | Path, bunch_id: int = None) -> TbtData:
117+
def read_tbt(file_path: str | Path, bunch_id: int | None = None) -> TbtData:
112118
"""
113119
Reads turn-by-turn data from an ASCII turn-by-turn format file, and return the date as well as
114120
parsed matrices for construction of a ``TbtData`` object.
@@ -125,7 +131,10 @@ def read_tbt(file_path: str | Path, bunch_id: int = None) -> TbtData:
125131
data_lines = Path(file_path).read_text().splitlines()
126132
bpm_names = {"X": [], "Y": []}
127133
bpm_data = {"X": [], "Y": []}
128-
date = None # will switch to TbtData.date's default if not found in file
134+
meta: MetaDict = {
135+
"file": file_path,
136+
"source_datatype": "ascii",
137+
}
129138

130139
if bunch_id is None:
131140
bunch_id = _parse_bunch_id(file_path)
@@ -135,7 +144,7 @@ def read_tbt(file_path: str | Path, bunch_id: int = None) -> TbtData:
135144

136145
if ACQ_DATE_PREFIX in line:
137146
LOGGER.debug("Acquiring date from file")
138-
date = _parse_date(line)
147+
meta["date"] = _parse_date(line)
139148
continue
140149

141150
if line == "" or line.startswith(ASCII_COMMENT): # empty or comment line
@@ -159,7 +168,7 @@ def read_tbt(file_path: str | Path, bunch_id: int = None) -> TbtData:
159168
)
160169
]
161170
return TbtData(
162-
matrices=matrices, date=date, bunch_ids=[bunch_id], nturns=matrices[0].X.shape[1]
171+
matrices=matrices, meta=meta, bunch_ids=[bunch_id], nturns=matrices[0].X.shape[1]
163172
)
164173

165174

turn_by_turn/constants.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,34 @@
77

88
from __future__ import annotations
99

10+
from typing import TYPE_CHECKING, TypedDict
11+
12+
if TYPE_CHECKING:
13+
from datetime import datetime
14+
from pathlib import Path
15+
1016
PLANES: tuple[str, str] = ("X", "Y")
1117
NUM_TO_PLANE: dict[str, str] = {"0": "X", "1": "Y"}
1218
PLANE_TO_NUM: dict[str, int] = {"X": 0, "Y": 1}
1319

20+
# ----- Common Meta Keys ----- #
21+
22+
class MetaDict(TypedDict, total=False):
23+
""" Metadata dictionary, to type-hint known entries.
24+
None of the entries are required (``total=False``).
25+
26+
Attributes:
27+
date (datetime): Date of the measurement/creation of the data
28+
file (Path | str): Path to the file the data was loaded from (if available).
29+
machine (str): Name of the machine the data was measured/simulated on.
30+
source_datatype (str): The datatype this data was loaded from.
31+
comment (str): Any comment on the measurement.
32+
"""
33+
date: datetime
34+
file: Path | str
35+
machine: str
36+
source_datatype: str
37+
comment: str
1438

1539
# ----- Miscellaneous ----- #
1640

turn_by_turn/doros.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import logging
3939
from datetime import datetime
4040
from pathlib import Path
41+
from typing import TYPE_CHECKING
4142

4243
import h5py
4344
import pandas as pd
@@ -46,6 +47,9 @@
4647
from turn_by_turn.structures import TbtData, TransverseData
4748
from turn_by_turn.utils import all_elements_equal
4849

50+
if TYPE_CHECKING:
51+
from turn_by_turn.constants import MetaDict
52+
4953
LOGGER = logging.getLogger(__name__)
5054

5155
DEFAULT_BUNCH_ID: int = 0 # bunch ID not saved in the DOROS file
@@ -132,6 +136,10 @@ def read_tbt(
132136
file_path = Path(file_path)
133137
LOGGER.debug(f"Reading DOROS {data_type} data at path: '{file_path.absolute()}'")
134138
data_keys = DataKeys.get_data_keys(data_type)
139+
meta: MetaDict = {
140+
"file": file_path,
141+
"source_datatype": data_type,
142+
}
135143

136144
with h5py.File(file_path, "r") as hdf_file:
137145
# use "/" to keep track of bpm order, see https://github.com/h5py/h5py/issues/1471
@@ -141,7 +149,7 @@ def read_tbt(
141149
_check_data_lengths(hdf_file, data_keys, bpm_names)
142150

143151
time_stamps = [hdf_file[bpm][ACQ_STAMP][0] for bpm in bpm_names]
144-
date = datetime.fromtimestamp(min(time_stamps) / 1e6, tz=tz.tzutc())
152+
meta["date"] = datetime.fromtimestamp(min(time_stamps) / 1e6, tz=tz.tzutc())
145153

146154
nturns = hdf_file[bpm_names[0]][data_keys.n_samples][0] # equal lengths checked before
147155
matrices = [
@@ -150,7 +158,7 @@ def read_tbt(
150158
Y=_create_dataframe(hdf_file, data_keys, bpm_names, plane="Y"),
151159
)
152160
]
153-
return TbtData(matrices, date, [bunch_id], nturns)
161+
return TbtData(matrices, bunch_ids=[bunch_id], nturns=nturns, meta=meta)
154162

155163

156164
def write_tbt(
@@ -172,13 +180,15 @@ def write_tbt(
172180
data_keys = DataKeys.get_data_keys(data_type)
173181
other_keys = DataKeys.get_other_data_keys(data_type)
174182

183+
timestamp = tbt_data.meta.get("date", datetime.now(tz=tz.tzutc())).timestamp() * 1e6
184+
175185
data = tbt_data.matrices[0]
176186
with h5py.File(file_path, "w", track_order=True) as hdf_file:
177187
hdf_file.create_group(METADATA)
178188
for bpm in data.X.index:
179189
hdf_file.create_group(bpm)
180-
hdf_file[bpm].create_dataset(ACQ_STAMP, data=[tbt_data.date.timestamp() * 1e6])
181-
hdf_file[bpm].create_dataset(BST_TIMESTAMP, data=[tbt_data.date.timestamp() * 1e6])
190+
hdf_file[bpm].create_dataset(ACQ_STAMP, data=[timestamp])
191+
hdf_file[bpm].create_dataset(BST_TIMESTAMP, data=[timestamp])
182192

183193
hdf_file[bpm].create_dataset(data_keys.n_samples, data=[tbt_data.nturns])
184194
hdf_file[bpm].create_dataset(data_keys.data["X"], data=data.X.loc[bpm, :].values)

turn_by_turn/esrf.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ def read_tbt(file_path: str | Path) -> TbtData:
3838
file_path = Path(file_path)
3939
LOGGER.debug(f"Reading ESRF file at path: '{file_path.absolute()}'")
4040
names, matrix = load_esrf_mat_file(file_path)
41-
return numpy_to_tbt(names, matrix)
41+
tbt_data = numpy_to_tbt(names, matrix)
42+
tbt_data.meta["file"] = file_path
43+
tbt_data.meta["source_datatype"] = "esrf"
44+
return tbt_data
4245

4346

4447
def load_esrf_mat_file(infile: str | Path) -> tuple[np.ndarray, np.ndarray]:

0 commit comments

Comments
 (0)