Skip to content

Commit d55ad43

Browse files
committed
ENH: Add files for C3PO-PCLR in YODA
1 parent 0fca9ca commit d55ad43

File tree

6 files changed

+115
-0
lines changed

6 files changed

+115
-0
lines changed

model_zoo/PCLR/deployment/C3PO_PCLR/v1/__init__.py

Whitespace-only changes.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"inputs": [
3+
{
4+
"name": "ecg",
5+
"shape": [2500, 12],
6+
"dtype": "FP32"
7+
}
8+
],
9+
"outputs": [
10+
{
11+
"name": "output_0",
12+
"shape": [320],
13+
"dtype": "FP32"
14+
},
15+
]
16+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
FROM python:3.9-slim
2+
WORKDIR /app
3+
COPY prepare.py /app/
4+
COPY finalize.py /app/
5+
COPY requirements.txt /app/
6+
RUN pip install -r /app/requirements.txt
7+
ENTRYPOINT ["python"]
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import argparse
2+
import json
3+
import pandas as pd
4+
5+
latent_dimensions = 320
6+
7+
def finalize(input_csv, predictions_json, output_csv):
8+
with open(predictions_json, "r") as f:
9+
prediction_data = json.load(f)
10+
11+
df = pd.read_csv(input_csv, dtype={"file_id": str})
12+
13+
embedding = prediction_data["output_0"]
14+
15+
if len(embedding) != len(df):
16+
raise ValueError(f"Mismatch: {len(embedding)} predictions but {len(df)} rows in input CSV!")
17+
18+
new_frame = pd.DataFrame(embedding, columns=[f'pclr_{i}' for i in range(latent_dimensions)])
19+
df = pd.concat([df, new_frame], axis=1)
20+
21+
df.to_csv(output_csv, index=False)
22+
print(f"✅ Predictions written to {output_csv} ({len(df)} rows).")
23+
24+
25+
if __name__ == "__main__":
26+
parser = argparse.ArgumentParser()
27+
parser.add_argument("--input", required=True, help="Path to input CSV")
28+
parser.add_argument("--output", required=True, help="Path to final CSV with predictions")
29+
parser.add_argument("--predictions", required=True, help="Path to predictions JSON")
30+
args = parser.parse_args()
31+
32+
finalize(args.input, args.predictions, args.output)
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import argparse
2+
3+
import h5py
4+
import numpy as np
5+
import pandas as pd
6+
import smart_open
7+
8+
leads = [
9+
'I', 'II', 'III', 'aVR', 'aVL', 'aVF',
10+
'V1', 'V2', 'V3', 'V4', 'V5', 'V6',
11+
]
12+
13+
ECG_LENGTH = 2500
14+
ECG_SHAPE = (ECG_LENGTH, 12)
15+
ECG_HD5_PATH = 'ukb_ecg_rest'
16+
17+
def ecg_as_tensor(ecg_file):
18+
with smart_open.open(ecg_file, 'rb') as f:
19+
with h5py.File(f, 'r') as hd5:
20+
ecg = np.zeros(ECG_SHAPE, dtype=np.float32)
21+
for k,l in enumerate(leads):
22+
lead = np.array(hd5[f'{ECG_HD5_PATH}/strip_{l}/instance_0'])
23+
24+
interpolated_lead = np.interp(
25+
np.linspace(0, 1, ECG_LENGTH),
26+
np.linspace(0, 1, lead.shape[0]),
27+
lead,
28+
)
29+
ecg[:, k] = interpolated_lead / 1000
30+
31+
return ecg
32+
33+
def prepare(input_csv, output_h5):
34+
"""Processes ECG files into HDF5 tensor format from GCS/Azure/Local."""
35+
df = pd.read_csv(input_csv, dtype={"file": str})
36+
h5_file = h5py.File(output_h5, "w")
37+
tensors_group = h5_file.create_group("tensors")
38+
df = df.dropna(subset=["file"])
39+
df["file"] = df["file"].astype(str)
40+
for _, row in df.iterrows():
41+
sample_id, file_path = row["file_id"], row["file"]
42+
print(f"Processing: sample_id={sample_id}, file_path={file_path}, type={type(file_path)}")
43+
tensor = ecg_as_tensor(file_path)
44+
tensors_group.create_dataset(str(sample_id), data=tensor)
45+
46+
h5_file.close()
47+
print(f"Processed ECG tensors saved to {output_h5}")
48+
49+
50+
if __name__ == "__main__":
51+
parser = argparse.ArgumentParser()
52+
parser.add_argument("--input", required=True, help="Path to input CSV")
53+
parser.add_argument("--output", required=True, help="Path to output HDF5 file")
54+
args = parser.parse_args()
55+
56+
prepare(args.input, args.output)
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
pandas
2+
numpy
3+
h5py
4+
smart-open[gcs]

0 commit comments

Comments
 (0)