Skip to content

Commit c20a619

Browse files
committed
Remover clutter and added MinMax scaling to the pipeline
1 parent abb9739 commit c20a619

File tree

5 files changed

+7
-9
lines changed

5 files changed

+7
-9
lines changed

src/model/data_ingestion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
def load_raw_data(data_path='data/data.csv'):
44
"""Loads the raw dataset from the specified path."""
55
df = pd.read_csv(data_path)
6-
return df
6+
return df

src/model/data_preprocessing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ def prepare_features_and_target(df):
1313
"""Prepares features (X) and target (y) from the preprocessed DataFrame."""
1414
X = df.drop('diagnosis', axis=1)
1515
y = df['diagnosis']
16-
return X, y
16+
return X, y

src/model/model_inference.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
import pandas as pd
33
import os
44

5-
# Ensure the data_preprocessing module is accessible for FunctionTransformer if it was pickled
6-
# This might not be strictly necessary if FunctionTransformer only relies on the function definition itself
7-
# but good practice to have the context
85
from .data_preprocessing import drop_unnecessary_columns
96

107
def load_pipeline(model_path='models/model.joblib'):
@@ -38,4 +35,4 @@ def predict(raw_data, model_path='models/model.joblib'):
3835
prediction = predict(sample_new_data)
3936
print(f"Prediction for sample data: {prediction[0]} (0: Benign, 1: Malignant)")
4037
except FileNotFoundError as e:
41-
print(e)
38+
print(e)

src/model/model_training.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,4 @@ def train_and_save_pipeline(data_path='data/data.csv', model_path='models/model.
3939
print(f"Trained pipeline saved to {model_path}")
4040

4141
if __name__ == "__main__":
42-
train_and_save_pipeline()
42+
train_and_save_pipeline()

src/model/pipeline_utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from sklearn.pipeline import Pipeline
22
from sklearn.preprocessing import FunctionTransformer
3+
from sklearn.preprocessing import MinMaxScaler
34
from sklearn.ensemble import RandomForestClassifier
45
import pandas as pd
56

@@ -11,12 +12,12 @@ def create_breast_cancer_pipeline():
1112
# Define preprocessing steps
1213
preprocessing_pipeline = Pipeline([
1314
('drop_cols', FunctionTransformer(drop_unnecessary_columns, validate=False)),
14-
# Add other preprocessing steps here if needed, e.g., scaling
15+
('scaler', MinMaxScaler()),
1516
])
1617

1718
# Combine preprocessing and model into a full pipeline
1819
full_pipeline = Pipeline([
1920
('preprocessor', preprocessing_pipeline),
2021
('classifier', RandomForestClassifier(random_state=42))
2122
])
22-
return full_pipeline
23+
return full_pipeline

0 commit comments

Comments
 (0)