Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions AFL/double_agent/TreePipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from AFL.double_agent import *
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib
#import tune_all_decisions as tad
import itertools
import joblib
from io import BytesIO
import xarray as xr
import json
import TreeHierarchy as te
from sklearn.metrics import classification_report as cr
from sklearn.metrics import root_mean_squared_error as RMSE
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import mean_absolute_percentage_error as MAPE

#PipelineOp constructor for classification tree
#The tree itself is defined in TreeHierarchy
#This constructor follows the expected PipelineOp syntax
# input_variable: the name of the input feature in the xarray
# output_variable: the name of the variable to add/modify in the xarray dataset
# model_definition: A dictionary containing an encoding of a TreeHierarchy object. The encoder is contained in treeHierarchy.
class ClassificationPipeline(PipelineOp):
def __init__(self, input_variable, output_variable, model_definition, name="Classifier"):
super().__init__(
input_variable=input_variable,
output_variable=output_variable,
name=name
)
self.classifier = te.json_decoder(model_definition)

def set_classifier(self, classifier_instance):
self.classifier = classifier_instance

def calculate(self, dataset):
data = self._get_variable(dataset)
predicted_classes = self.classifier.predict(np.log10(data))
dataset[self.output_variable] = ('sample', predicted_classes)
return(self)

#PipelineOp constructor for a regressor
#This constructor follows the expected PipelineOp syntax, with some important considerations
# input_variable: the name of the input feature in the xarray
# output_variable: the name of the variable to add/modify in the xarray dataset
# key_variable: the name of the variable that contains morphology information in the xarray, could be ground_truth_labels, predicted_labels, etc.
# morphology: the morphology that this model is trained on
# model_Efinition: a dictionary containing a complete definition of a trained classification model, the encoder in TreeHierarchy also works for this
#NOTE: Each regressor only works for one parameter for one morphology, if multiple morphologies share a parameter i.e., radius is common to many morphologies, then they shuold each operate on the SAME output_variable.
#Each RegressionPipeline will only modify output_variable where key_variable==morphology, place mulptiple PipelineOps in the same pipeline to perform regression over all parameters and morphologies
class RegressionPipeline(PipelineOp):
def __init__(self, input_variable, output_variable, key_variable, morphology, model_definition, name="Classifier"):
super().__init__(
input_variable=input_variable,
output_variable=output_variable,
name=name
)
self.key_variable = key_variable
self.morphology = morphology
self.regression = te.json_decoder(model_definition)

def calculate(self, dataset):
data = self._get_variable(dataset)
key = dataset[self.key_variable].data
print(np.unique(key))
print(self.morphology)
inds = np.where(np.equal(key, self.morphology))
predictions = self.regression.predict(np.log10(data[inds]))
if self.output_variable in dataset.data_vars:
output = dataset[self.output_variable].data
else:
output = np.nan * np.ones(data.shape[0])
output[inds] = predictions
dataset[self.output_variable] = ('sample', output)
return(self)

827 changes: 827 additions & 0 deletions AFL/double_agent/data/classification_pipeline.json

Large diffs are not rendered by default.

Binary file not shown.
401 changes: 401 additions & 0 deletions AFL/double_agent/data/example_tree_structure.json

Large diffs are not rendered by default.

Binary file added AFL/double_agent/data/reference_predictions.nc
Binary file not shown.
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ automation = [
"requests",
]

mlmodels = [
"TreeHierarchy @ git+https://github.com/grahamRobertsW/TreeHierarchy"
]

dev = [
"black",
"mypy",
Expand Down
86 changes: 86 additions & 0 deletions tests/test_classifier_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""
Unit tests for the AFL.double_agent.PipelineOp module.
"""

import pytest
import numpy as np
import xarray as xr
import json
import os

from tests.utils import MockPipelineOp
from AFL.double_agent import TreePipeline as tp
from AFL.double_agent import (Pipeline, LogLogTransform)
from sklearn.svm import SVC
from AFL.double_agent.data import (
get_data_dir,
list_datasets,
load_dataset,
example_dataset1,
)
from TreeHierarchy import (
TreeHierarchy,
json_decoder
)


@pytest.mark.unit
class TestClassificationPipeline:
"""Tests for the PipelineOp class."""
def test_classifier_creation(self):
data = load_dataset("example_classification_data")
classification_def = json.loads(open(os.path.join(get_data_dir(), "example_tree_structure.json"), 'r').read())
with Pipeline() as P:
LogLogTransform("SAS_curves", "log_sas_curves")
pipe = tp.ClassificationPipeline("SAS_curves", "predicted_labels", classification_def)
assert isinstance(pipe, tp.ClassificationPipeline)
assert isinstance(pipe.classifier, TreeHierarchy)
assert isinstance(pipe.classifier.left, TreeHierarchy)
assert isinstance(pipe.classifier.right, TreeHierarchy)
assert isinstance(pipe.classifier.left.left, TreeHierarchy)
assert isinstance(pipe.classifier.left.right, TreeHierarchy)
assert isinstance(pipe.classifier.right.left, TreeHierarchy)
assert isinstance(pipe.classifier.right.right, TreeHierarchy)
assert isinstance(pipe.classifier.entity, SVC)
assert isinstance(pipe.classifier.left.entity, SVC)
assert isinstance(pipe.classifier.right.entity, SVC)

@pytest.mark.unit
class TestClassificationPipelineLoaded:
"""Tests for the PipelineOp class."""
def test_classifier_load(self):
### data = load_dataset("classification_data")
### classification_def = json.loads(open(os.path.join(get_data_dir(), "classification_tree.json"), 'r').read())
### pipe = tp.ClassificationPipeline("log_sas_curves", "predicted_labels", classification_def)
save_path = os.path.join(get_data_dir(), "classification_pipeline.json")
with Pipeline.read_json(str(save_path)) as P:
assert isinstance(P[1], tp.ClassificationPipeline)
assert isinstance(P[1].classifier, TreeHierarchy)
assert isinstance(P[1].classifier.left, TreeHierarchy)
assert isinstance(P[1].classifier.right, TreeHierarchy)
assert isinstance(P[1].classifier.left.left, TreeHierarchy)
assert isinstance(P[1].classifier.left.right, TreeHierarchy)
assert isinstance(P[1].classifier.right.left, TreeHierarchy)
assert isinstance(P[1].classifier.right.right, TreeHierarchy)
assert isinstance(P[1].classifier.entity, SVC)
assert isinstance(P[1].classifier.left.entity, SVC)
assert isinstance(P[1].classifier.right.entity, SVC)

@pytest.mark.unit
class TestClassificationPipelinePerformance:
"""Tests for the PipelineOp class."""
def test_classifier_load(self):
### data = load_dataset("classification_data")
### classification_def = json.loads(open(os.path.join(get_data_dir(), "classification_tree.json"), 'r').read())
### pipe = tp.ClassificationPipeline("log_sas_curves", "predicted_labels", classification_def)
save_path = os.path.join(get_data_dir(), "classification_pipeline.json")
data = load_dataset("example_classification_data")
ref = load_dataset("reference_predictions")
with Pipeline.read_json(str(save_path)) as P:
out = P.calculate(data)
print(P[0].output_variable)
np.testing.assert_array_equal(out["predicted_test_labels"].data, ref["reference_predictions"].data)




Loading