Sketch out getting model from mlflow for inference

naglepuff · naglepuff · commit b25cf0e403a8 · 2025-05-14T12:13:59.000-04:00
diff --git a/bats_ai/core/management/commands/registeronnxmodel.py b/bats_ai/core/management/commands/registeronnxmodel.py
@@ -22,6 +22,7 @@ def command():
         mlflow.onnx.log_model(
             onnx_model=onnx_model,
             artifact_path='onnx_model',
+            # save_as_external_data=True,
         )
         model_uri = f'runs:/{run_id}/onnx_model'
         result = mlflow.register_model(model_uri=model_uri, name='prototype')
diff --git a/bats_ai/tasks/tasks.py b/bats_ai/tasks/tasks.py
@@ -1,5 +1,6 @@
 import io
 import math
+import os
 import tempfile
 
 from PIL import Image
@@ -431,36 +432,57 @@ def predict(compressed_spectrogram_id: int):
     return label, score, confs
 
 
-def predict_compressed(image_file):
+def _fully_local_inference(image_file, use_mlflow_model):
     import json
-    import os
 
     import onnx
     import onnxruntime as ort
     import tqdm
 
     img = Image.open(image_file)
 
-    relative = ('..',) * 3
-    asset_path = os.path.abspath(os.path.join(__file__, *relative, 'assets'))
-
-    onnx_filename = os.path.join(asset_path, 'model.mobilenet.onnx')
-    assert os.path.exists(onnx_filename)
-
-    session = ort.InferenceSession(
-        onnx_filename,
-        providers=[
-            (
-                'CUDAExecutionProvider',
-                {
-                    'cudnn_conv_use_max_workspace': '1',
-                    'device_id': 0,
-                    'cudnn_conv_algo_search': 'HEURISTIC',
-                },
-            ),
-            'CPUExecutionProvider',
-        ],
-    )
+    if not use_mlflow_model:
+        relative = ('..',) * 3
+        asset_path = os.path.abspath(os.path.join(__file__, *relative, 'assets'))
+
+        onnx_filename = os.path.join(asset_path, 'model.mobilenet.onnx')
+        assert os.path.exists(onnx_filename)
+
+        session = ort.InferenceSession(
+            onnx_filename,
+            providers=[
+                (
+                    'CUDAExecutionProvider',
+                    {
+                        'cudnn_conv_use_max_workspace': '1',
+                        'device_id': 0,
+                        'cudnn_conv_algo_search': 'HEURISTIC',
+                    },
+                ),
+                'CPUExecutionProvider',
+            ],
+        )
+    else:
+        import mlflow
+        import mlflow.onnx
+
+        MODEL_URI = 'models:/prototype/1'
+        mlflow.set_tracking_uri(settings.MLFLOW_ENDPOINT)
+        model = mlflow.onnx.load_model(model_uri=MODEL_URI)
+        session = ort.InferenceSession(
+            model.SerializeToString(),
+            providers=[
+                (
+                    'CUDAExecutionProvider',
+                    {
+                        'cudnn_conv_use_max_workspace': '1',
+                        'device_id': 0,
+                        'cudnn_conv_algo_search': 'HEURISTIC',
+                    },
+                ),
+                'CPUExecutionProvider',
+            ],
+        )
 
     img = np.array(img)
 
@@ -507,6 +529,19 @@ def predict_compressed(image_file):
     return label, score, confs
 
 
+def predict_compressed(image_file):
+    # 0: use the local file and do inference with that
+    # 1: get the file from mlflow and do inference locally
+    # 2: do inference from deployed mlflow model
+    inference_mode = int(os.getenv('INFERENCE_MODE', 0))
+    if inference_mode == 1:
+        pass
+    elif inference_mode == 2:
+        pass
+    else:
+        return _fully_local_inference(image_file, False)
+
+
 def train_body(experiment_name: str):
     import mlflow
     from mlflow.models import infer_signature

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@ def command():`
`22`	`22`	`mlflow.onnx.log_model(`
`23`	`23`	`onnx_model=onnx_model,`
`24`	`24`	`artifact_path='onnx_model',`
	`25`	`+ # save_as_external_data=True,`
`25`	`26`	`)`
`26`	`27`	`model_uri = f'runs:/{run_id}/onnx_model'`
`27`	`28`	`result = mlflow.register_model(model_uri=model_uri, name='prototype')`