Skip to content

Commit c40e384

Browse files
authored
adding oai_spo e2e (#972)
* adding oai_spo e2e * adding tests * fix test
1 parent 9d7bb9f commit c40e384

File tree

5 files changed

+342
-0
lines changed

5 files changed

+342
-0
lines changed

modules/src/oai_spo/item.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
apiVersion: v1
2+
categories:
3+
- model-serving
4+
- structured-ML
5+
description: OAI SPO use case for industrial optimization
6+
example: oai_spo.ipynb
7+
generationDate: 2026-01-26:12-25
8+
hidden: false
9+
labels:
10+
author: Iguazio
11+
mlrunVersion: 1.9.2
12+
name: oai_spo
13+
spec:
14+
filename: oai_spo.py
15+
image: mlrun/mlrun
16+
kind: generic
17+
requirements:
18+
version: 1.0.0
19+

modules/src/oai_spo/oai_spo.ipynb

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# OAI Hub Demo\n"
8+
]
9+
},
10+
{
11+
"cell_type": "markdown",
12+
"metadata": {},
13+
"source": [
14+
"## Overview\n"
15+
]
16+
},
17+
{
18+
"cell_type": "markdown",
19+
"metadata": {},
20+
"source": [
21+
"This notebook demonstrates how to use the OAI Hub class for managing MLRun project setup and artifact logging.\n",
22+
"\n",
23+
"The OaiHub class handles:\n",
24+
"- Environment configuration\n",
25+
"- Project creation/retrieval\n",
26+
"- Input data processing and logging\n",
27+
"- Configuration dataset logging\n"
28+
]
29+
},
30+
{
31+
"cell_type": "markdown",
32+
"metadata": {},
33+
"source": [
34+
"## Demo\n"
35+
]
36+
},
37+
{
38+
"cell_type": "markdown",
39+
"metadata": {},
40+
"source": [
41+
"### Initialize OaiHub\n"
42+
]
43+
},
44+
{
45+
"cell_type": "code",
46+
"execution_count": null,
47+
"metadata": {},
48+
"outputs": [],
49+
"source": [
50+
"from oai_spo import OaiHub\n",
51+
"\n",
52+
"# Initialize OaiHub instance\n",
53+
"oai_hub = OaiHub(\n",
54+
" project_name=\"oai-spo-demo\",\n",
55+
" data_dir=\"./data\",\n",
56+
" default_env_file=\"default.env\",\n",
57+
" local_env_file=\"local.env\",\n",
58+
" pipeline_config_path=\"pipeline_config.yaml\",\n",
59+
" default_image=\"mlrun/mlrun\",\n",
60+
" source=\"s3://your-bucket/path\",\n",
61+
")\n"
62+
]
63+
},
64+
{
65+
"cell_type": "markdown",
66+
"metadata": {},
67+
"source": [
68+
"### Run Setup\n"
69+
]
70+
},
71+
{
72+
"cell_type": "code",
73+
"execution_count": null,
74+
"metadata": {},
75+
"outputs": [],
76+
"source": [
77+
"# Run complete setup process\n",
78+
"oai_hub.setup()\n"
79+
]
80+
}
81+
],
82+
"metadata": {
83+
"language_info": {
84+
"name": "python"
85+
}
86+
},
87+
"nbformat": 4,
88+
"nbformat_minor": 2
89+
}

modules/src/oai_spo/oai_spo.py

Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
# Copyright 2025 Iguazio
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
import sys
17+
import os
18+
import datetime as dt
19+
from pathlib import Path
20+
from typing import Optional
21+
import pandas as pd
22+
import mlrun
23+
from config.config import load_config
24+
25+
26+
class OaiHub:
27+
"""
28+
OAI Hub class for managing MLRun project setup and artifact logging.
29+
30+
This class handles:
31+
- Environment configuration
32+
- Project creation/retrieval
33+
- Input data processing and logging
34+
- Configuration dataset logging
35+
"""
36+
37+
def __init__(
38+
self,
39+
project_name: str,
40+
data_dir: str,
41+
default_env_file: str,
42+
local_env_file: str,
43+
pipeline_config_path: str,
44+
default_image: str,
45+
source: str,
46+
):
47+
"""
48+
Initialize OaiHub instance.
49+
50+
Args:
51+
project_name: Name of the MLRun project
52+
data_dir: Directory containing data files
53+
default_env_file: Default environment file path
54+
local_env_file: Local environment file path (takes precedence)
55+
pipeline_config_path: Path to pipeline configuration YAML
56+
default_image: Default Docker image for the project
57+
source: Source location for the project (S3 path)
58+
"""
59+
self.project_name = project_name
60+
self.data_dir = data_dir
61+
self.default_env_file = default_env_file
62+
self.local_env_file = local_env_file
63+
self.pipeline_config_path = pipeline_config_path
64+
self.default_image = default_image
65+
self.source = source
66+
67+
self.project: Optional[mlrun.projects.MlrunProject] = None
68+
self.pipeline_config = None
69+
70+
def setup_environment(self) -> str:
71+
"""
72+
Load environment variables from env file.
73+
Prefers local env file if it exists, otherwise uses default.
74+
75+
Returns:
76+
Path to the env file that was loaded
77+
"""
78+
env_file = (
79+
self.local_env_file
80+
if os.path.exists(self.local_env_file)
81+
else self.default_env_file
82+
)
83+
print(f"Loading environment from: {env_file}")
84+
mlrun.set_env_from_file(env_file)
85+
return env_file
86+
87+
def load_configuration(self):
88+
"""Load pipeline configuration from YAML file."""
89+
print(f"Loading configuration from: {self.pipeline_config_path}")
90+
self.pipeline_config = load_config(self.pipeline_config_path)
91+
92+
def get_or_create_project(self) -> mlrun.projects.MlrunProject:
93+
"""
94+
Get or create the MLRun project.
95+
96+
Returns:
97+
MLRun project instance
98+
"""
99+
print(f"Getting or creating project '{self.project_name}'...")
100+
self.project = mlrun.get_or_create_project(
101+
self.project_name,
102+
parameters={
103+
"source": self.source,
104+
"pipeline_config_path": self.pipeline_config_path,
105+
"default_image": self.default_image,
106+
},
107+
)
108+
return self.project
109+
110+
def process_and_log_input_data(self) -> Optional[str]:
111+
"""
112+
Process input data (shift dates to current time) and log as artifact.
113+
114+
Returns:
115+
Artifact key if successful, None otherwise
116+
"""
117+
print("Processing 'input_data'...")
118+
input_data_path = os.path.join(self.data_dir, "01_raw/sample_input_data.csv")
119+
120+
if not os.path.exists(input_data_path):
121+
print(f"Warning: {input_data_path} not found.")
122+
return None
123+
124+
orig_input_data = pd.read_csv(input_data_path)
125+
126+
# Shift dates to current time
127+
if "date" in orig_input_data.columns:
128+
# Convert to datetime if not already
129+
orig_input_data["date"] = pd.to_datetime(orig_input_data["date"])
130+
131+
# Calculate delta to shift max date to now
132+
max_date = orig_input_data["date"].max()
133+
delta = dt.datetime.now() - max_date
134+
135+
# Apply shift and floor to hour
136+
orig_input_data["date"] = orig_input_data["date"] + delta
137+
orig_input_data["date"] = orig_input_data["date"].dt.floor("h")
138+
139+
print("Logging 'input_data' artifact...")
140+
artifact = self.project.log_dataset(
141+
key="input_data",
142+
df=orig_input_data,
143+
format="csv"
144+
)
145+
return artifact.key if artifact else None
146+
147+
def log_config_dataset(
148+
self, key: str, filename: str, label_schema: str
149+
) -> Optional[str]:
150+
"""
151+
Log a configuration dataset from a CSV file.
152+
153+
Args:
154+
key: Artifact key name
155+
filename: Name of the CSV file (relative to data_dir/01_raw/)
156+
label_schema: Schema label for the artifact
157+
158+
Returns:
159+
Artifact key if successful, None otherwise
160+
"""
161+
file_path = os.path.join(self.data_dir, f"01_raw/{filename}")
162+
163+
if not os.path.exists(file_path):
164+
print(f"Warning: {file_path} not found, skipping {key}.")
165+
return None
166+
167+
print(f"Logging '{key}' from {filename}...")
168+
df = pd.read_csv(file_path, sep=";")
169+
artifact = self.project.log_dataset(
170+
key=key,
171+
df=df,
172+
format="csv",
173+
labels={"parameters_schema": label_schema},
174+
)
175+
return artifact.key if artifact else None
176+
177+
def log_all_config_datasets(self):
178+
"""Log all configuration datasets."""
179+
config_datasets = [
180+
("sample_tags_raw", "sample_tags_raw_config.csv", "raw"),
181+
("sample_tags_meta", "sample_tags_meta_config.csv", "meta"),
182+
("sample_tags_outliers", "sample_tags_outliers_config.csv", "outliers"),
183+
("sample_tags_imputation", "sample_tags_imputation_config.csv", "impute"),
184+
(
185+
"sample_tags_on_off_dependencies",
186+
"sample_tags_on_off_dependencies_config.csv",
187+
"on_off",
188+
),
189+
("sample_tags_resample", "sample_tags_resample_config.csv", "resample"),
190+
]
191+
192+
for key, filename, label_schema in config_datasets:
193+
self.log_config_dataset(key, filename, label_schema)
194+
195+
def setup(self):
196+
"""
197+
Complete setup process:
198+
1. Setup environment
199+
2. Load configuration
200+
3. Get or create project
201+
4. Process and log input data
202+
5. Log all configuration datasets
203+
"""
204+
self.setup_environment()
205+
self.load_configuration()
206+
self.get_or_create_project()
207+
self.process_and_log_input_data()
208+
self.log_all_config_datasets()
209+
print("Artifact logging completed.")
210+

modules/src/oai_spo/requirements.txt

Whitespace-only changes.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Copyright 2025 Iguazio
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
17+
18+
19+
def test_oai_hub_initialization():
20+
"""Test that OaiHub can be initialized with required parameters."""
21+
22+
assert 1 == 1
23+
24+

0 commit comments

Comments
 (0)