mlrun
diff --git a/‎README.md‎
Lines changed: 49 additions & 0 deletions b/‎README.md‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎catalog.json‎
Lines changed: 1 addition & 1 deletion b/‎catalog.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎functions/master/catalog.json‎
Lines changed: 1 addition & 1 deletion b/‎functions/master/catalog.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎functions/master/mlflow_utils/1.2.0/src/function.yaml‎
Lines changed: 30 additions & 0 deletions b/‎functions/master/mlflow_utils/1.2.0/src/function.yaml‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎functions/master/mlflow_utils/1.2.0/src/item.yaml‎
Lines changed: 28 additions & 0 deletions b/‎functions/master/mlflow_utils/1.2.0/src/item.yaml‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎functions/master/mlflow_utils/1.2.0/src/mlflow_utils.ipynb‎
Lines changed: 1353 additions & 0 deletions b/‎functions/master/mlflow_utils/1.2.0/src/mlflow_utils.ipynb‎
Lines changed: 1353 additions & 0 deletions
diff --git a/‎functions/master/mlflow_utils/1.2.0/src/mlflow_utils.py‎
Lines changed: 45 additions & 0 deletions b/‎functions/master/mlflow_utils/1.2.0/src/mlflow_utils.py‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎functions/master/mlflow_utils/1.2.0/src/requirements.txt‎
Lines changed: 3 additions & 0 deletions b/‎functions/master/mlflow_utils/1.2.0/src/requirements.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎functions/master/mlflow_utils/1.2.0/src/test_mlflow_utils.py‎
Lines changed: 207 additions & 0 deletions b/‎functions/master/mlflow_utils/1.2.0/src/test_mlflow_utils.py‎
Lines changed: 207 additions & 0 deletions
@@ -1,3 +1,52 @@
+### Change log [2026-01-20 18:09:09]
+1. Item Updated: `verify_schema` (from version: `1.0.0` to `1.0.0`)
+
+### Change log [2026-01-20 18:09:03]
+1. Item Updated: `agent_deployer` (from version: `1.0.0` to `1.0.0`)
+2. Item Updated: `histogram_data_drift` (from version: `1.0.0` to `1.0.0`)
+3. Item Updated: `openai_proxy_app` (from version: `1.0.0` to `1.0.0`)
+4. Item Updated: `vllm_module` (from version: `1.0.0` to `1.0.0`)
+5. Item Updated: `count_events` (from version: `1.0.0` to `1.0.0`)
+6. Item Updated: `evidently_iris` (from version: `1.0.0` to `1.0.0`)
+
+### Change log [2026-01-20 18:08:55]
+1. Item Updated: `test_classifier` (from version: `1.1.0` to `1.1.0`)
+2. Item Updated: `sklearn_classifier` (from version: `1.2.0` to `1.2.0`)
+3. Item Updated: `model_server_tester` (from version: `1.1.0` to `1.1.0`)
+4. Item Updated: `azureml_serving` (from version: `1.1.0` to `1.1.0`)
+5. Item Updated: `describe_dask` (from version: `1.2.0` to `1.2.0`)
+6. Item Updated: `batch_inference` (from version: `1.8.0` to `1.8.0`)
+7. Item Updated: `v2_model_server` (from version: `1.2.0` to `1.2.0`)
+8. Item Updated: `gen_class_data` (from version: `1.3.0` to `1.3.0`)
+9. Item Updated: `send_email` (from version: `1.2.0` to `1.2.0`)
+10. Item Updated: `tf2_serving` (from version: `1.1.0` to `1.1.0`)
+11. Item Updated: `aggregate` (from version: `1.4.0` to `1.4.0`)
+12. Item Updated: `open_archive` (from version: `1.2.0` to `1.2.0`)
+13. Item Updated: `describe` (from version: `1.4.0` to `1.4.0`)
+14. Item Updated: `v2_model_tester` (from version: `1.1.0` to `1.1.0`)
+15. Item Updated: `text_to_audio_generator` (from version: `1.3.0` to `1.3.0`)
+16. Item Updated: `pii_recognizer` (from version: `0.4.0` to `0.4.0`)
+17. Item Updated: `github_utils` (from version: `1.1.0` to `1.1.0`)
+18. Item Updated: `sklearn_classifier_dask` (from version: `1.1.1` to `1.1.1`)
+19. Item Updated: `azureml_utils` (from version: `1.4.0` to `1.4.0`)
+20. Item Updated: `question_answering` (from version: `0.5.0` to `0.5.0`)
+21. Item Updated: `structured_data_generator` (from version: `1.6.0` to `1.6.0`)
+22. Item Updated: `arc_to_parquet` (from version: `1.5.0` to `1.5.0`)
+23. Item Updated: `silero_vad` (from version: `1.4.0` to `1.4.0`)
+24. Item Updated: `load_dataset` (from version: `1.2.0` to `1.2.0`)
+25. Item Updated: `auto_trainer` (from version: `1.8.0` to `1.8.0`)
+26. Item Updated: `feature_selection` (from version: `1.6.0` to `1.6.0`)
+27. Item Updated: `translate` (from version: `0.3.0` to `0.3.0`)
+28. Item Updated: `describe_spark` (from version: `1.1.0` to `1.1.0`)
+29. Item Updated: `pyannote_audio` (from version: `1.3.0` to `1.3.0`)
+30. Item Updated: `onnx_utils` (from version: `1.3.0` to `1.3.0`)
+31. Item Updated: `batch_inference_v2` (from version: `2.6.0` to `2.6.0`)
+32. Item Updated: `transcribe` (from version: `1.2.0` to `1.2.0`)
+33. Item Updated: `model_server` (from version: `1.2.0` to `1.2.0`)
+34. Item Updated: `mlflow_utils` (from version: `1.2.0` to `1.2.0`)
+35. Item Updated: `noise_reduction` (from version: `1.1.0` to `1.1.0`)
+36. Item Updated: `hugging_face_serving` (from version: `1.1.0` to `1.1.0`)
+
 ### Change log [2026-01-11 09:32:46]
 1. Item Updated: `verify_schema` (from version: `1.0.0` to `1.0.0`)
 
 
@@ -0,0 +1,30 @@
+kind: serving
+verbose: false
+metadata:
+  categories:
+  - model-serving
+  - utilities
+  name: mlflow-utils
+  tag: ''
+spec:
+  image: mlrun/mlrun
+  function_kind: serving_v2
+  disable_auto_mount: false
+  max_replicas: 4
+  min_replicas: 1
+  function_handler: mlflow-utils-nuclio:handler
+  build:
+    functionSourceCode: aW1wb3J0IHppcGZpbGUKZnJvbSB0eXBpbmcgaW1wb3J0IEFueSwgRGljdAppbXBvcnQgbWxmbG93CmZyb20gbWxydW4uc2VydmluZy52Ml9zZXJ2aW5nIGltcG9ydCBWMk1vZGVsU2VydmVyCmltcG9ydCBwYW5kYXMgYXMgcGQKCgpjbGFzcyBNTEZsb3dNb2RlbFNlcnZlcihWMk1vZGVsU2VydmVyKToKICAgICIiIgogICAgTUxGbG93IHRyYWNrZXIgTW9kZWwgc2VydmluZyBjbGFzcywgaW5oZXJpdGluZyB0aGUgVjJNb2RlbFNlcnZlciBjbGFzcyBmb3IgYmVpbmcgaW5pdGlhbGl6ZWQgYXV0b21hdGljYWxseSBieSB0aGUgbW9kZWwKICAgIHNlcnZlciBhbmQgYmUgYWJsZSB0byBydW4gbG9jYWxseSBhcyBwYXJ0IG9mIGEgbnVjbGlvIHNlcnZlcmxlc3MgZnVuY3Rpb24sIG9yIGFzIHBhcnQgb2YgYSByZWFsLXRpbWUgcGlwZWxpbmUuCiAgICAiIiIKCiAgICBkZWYgbG9hZChzZWxmKToKICAgICAgICAiIiIKICAgICAgICBsb2FkcyBhIG1vZGVsIHRoYXQgd2FzIGxvZ2dlZCBieSB0aGUgTUxGbG93IHRyYWNrZXIgbW9kZWwKICAgICAgICAiIiIKICAgICAgICAjIFVuemlwIHRoZSBtb2RlbCBkaXIgYW5kIHRoZW4gdXNlIG1sZmxvdydzIGxvYWQgZnVuY3Rpb24KICAgICAgICBtb2RlbF9maWxlLCBfID0gc2VsZi5nZXRfbW9kZWwoIi56aXAiKQogICAgICAgIG1vZGVsX3BhdGhfdW56aXAgPSBtb2RlbF9maWxlLnJlcGxhY2UoIi56aXAiLCAiIikKCiAgICAgICAgd2l0aCB6aXBmaWxlLlppcEZpbGUobW9kZWxfZmlsZSwgInIiKSBhcyB6aXBfcmVmOgogICAgICAgICAgICB6aXBfcmVmLmV4dHJhY3RhbGwobW9kZWxfcGF0aF91bnppcCkKCiAgICAgICAgc2VsZi5tb2RlbCA9IG1sZmxvdy5weWZ1bmMubG9hZF9tb2RlbChtb2RlbF9wYXRoX3VuemlwKQoKICAgIGRlZiBwcmVkaWN0KHNlbGYsIHJlcXVlc3Q6IERpY3Rbc3RyLCBBbnldKSAtPiBsaXN0OgogICAgICAgICIiIgogICAgICAgIEluZmVyIHRoZSBpbnB1dHMgdGhyb3VnaCB0aGUgbW9kZWwuIFRoZSBpbmZlcnJlZCBkYXRhIHdpbGwKICAgICAgICBiZSByZWFkIGZyb20gdGhlICJpbnB1dHMiIGtleSBvZiB0aGUgcmVxdWVzdC4KCiAgICAgICAgOnBhcmFtIHJlcXVlc3Q6IFRoZSByZXF1ZXN0IHRvIHRoZSBtb2RlbCB1c2luZyB4Z2Jvb3N0J3MgcHJlZGljdC4KICAgICAgICAgICAgICAgIFRoZSBpbnB1dCB0byB0aGUgbW9kZWwgd2lsbCBiZSByZWFkIGZyb20gdGhlICJpbnB1dHMiIGtleS4KCiAgICAgICAgOnJldHVybjogVGhlIG1vZGVsJ3MgcHJlZGljdGlvbiBvbiB0aGUgZ2l2ZW4gaW5wdXQuCiAgICAgICAgIiIiCgogICAgICAgICMgR2V0IHRoZSBpbnB1dHMgYW5kIHNldCB0byBhY2NlcHRlZCB0eXBlOgogICAgICAgIGlucHV0cyA9IHBkLkRhdGFGcmFtZShyZXF1ZXN0WyJpbnB1dHMiXSkKCiAgICAgICAgIyBQcmVkaWN0IHVzaW5nIHRoZSBtb2RlbCdzIHByZWRpY3QgZnVuY3Rpb246CiAgICAgICAgcHJlZGljdGlvbnMgPSBzZWxmLm1vZGVsLnByZWRpY3QoaW5wdXRzKQoKICAgICAgICAjIFJldHVybiBhcyBsaXN0OgogICAgICAgIHJldHVybiBwcmVkaWN0aW9ucy50b2xpc3QoKQoKZnJvbSBtbHJ1bi5ydW50aW1lcyBpbXBvcnQgbnVjbGlvX2luaXRfaG9vawpkZWYgaW5pdF9jb250ZXh0KGNvbnRleHQpOgogICAgbnVjbGlvX2luaXRfaG9vayhjb250ZXh0LCBnbG9iYWxzKCksICdzZXJ2aW5nX3YyJykKCmRlZiBoYW5kbGVyKGNvbnRleHQsIGV2ZW50KToKICAgIHJldHVybiBjb250ZXh0Lm1scnVuX2hhbmRsZXIoY29udGV4dCwgZXZlbnQpCg==
+    requirements:
+    - mlflow~=3.5
+    origin_filename: ''
+    code_origin: ''
+  description: Mlflow model server, and additional utils.
+  command: ''
+  base_image_pull: false
+  default_class: MLFlowModelServer
+  source: ''
+  default_handler: ''
+  env:
+  - name: MLRUN_HTTPDB__NUCLIO__EXPLICIT_ACK
+    value: enabled
@@ -0,0 +1,28 @@
+apiVersion: v1
+categories:
+- model-serving
+- utilities
+description: Mlflow model server, and additional utils.
+doc: ''
+example: mlflow_utils.ipynb
+generationDate: 2024-05-23:12-00
+hidden: false
+icon: ''
+labels:
+  author: Iguazio
+maintainers: []
+marketplaceType: ''
+mlrunVersion: 1.10.0
+name: mlflow_utils
+platformVersion: ''
+spec:
+  customFields:
+    default_class: MLFlowModelServer
+  filename: mlflow_utils.py
+  handler: handler
+  image: mlrun/mlrun
+  kind: serving
+  requirements:
+  - mlflow~=3.5
+url: ''
+version: 1.2.0
@@ -0,0 +1,45 @@
+import zipfile
+from typing import Any, Dict
+import mlflow
+from mlrun.serving.v2_serving import V2ModelServer
+import pandas as pd
+
+
+class MLFlowModelServer(V2ModelServer):
+    """
+    MLFlow tracker Model serving class, inheriting the V2ModelServer class for being initialized automatically by the model
+    server and be able to run locally as part of a nuclio serverless function, or as part of a real-time pipeline.
+    """
+
+    def load(self):
+        """
+        loads a model that was logged by the MLFlow tracker model
+        """
+        # Unzip the model dir and then use mlflow's load function
+        model_file, _ = self.get_model(".zip")
+        model_path_unzip = model_file.replace(".zip", "")
+
+        with zipfile.ZipFile(model_file, "r") as zip_ref:
+            zip_ref.extractall(model_path_unzip)
+
+        self.model = mlflow.pyfunc.load_model(model_path_unzip)
+
+    def predict(self, request: Dict[str, Any]) -> list:
+        """
+        Infer the inputs through the model. The inferred data will
+        be read from the "inputs" key of the request.
+
+        :param request: The request to the model using xgboost's predict.
+                The input to the model will be read from the "inputs" key.
+
+        :return: The model's prediction on the given input.
+        """
+
+        # Get the inputs and set to accepted type:
+        inputs = pd.DataFrame(request["inputs"])
+
+        # Predict using the model's predict function:
+        predictions = self.model.predict(inputs)
+
+        # Return as list:
+        return predictions.tolist()
@@ -0,0 +1,3 @@
+mlflow~=3.5
+lightgbm
+xgboost
@@ -0,0 +1,207 @@
+# Copyright 2018 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import tempfile
+import shutil
+import lightgbm as lgb
+import mlflow
+import mlflow.environment_variables
+import mlflow.xgboost
+import pytest
+import xgboost as xgb
+from sklearn import datasets
+from sklearn.metrics import accuracy_score, log_loss
+from sklearn.model_selection import train_test_split
+
+import os
+# os.environ["MLRUN_IGNORE_ENV_FILE"] = "True"  #TODO remove before push
+
+import mlrun
+import mlrun.launcher.local
+#  Important:
+#  unlike mlconf which resets back to default after each test run, the mlflow configurations
+#  and env vars don't, so at the end of each test we need to redo anything we set in that test.
+#  what we cover in these tests: logging "regular" runs with, experiment name, run id and context
+#  name (last two using mlconf), failing run mid-way, and a run with no handler.
+#  we also test here importing of runs, artifacts and models from a previous run.
+
+# simple mlflow example of lgb logging
+def lgb_run():
+    # prepare train and test data
+    iris = datasets.load_iris()
+    X = iris.data
+    y = iris.target
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
+
+    # enable auto logging
+    mlflow.lightgbm.autolog()
+
+    train_set = lgb.Dataset(X_train, label=y_train)
+
+    with mlflow.start_run():
+        # train model
+        params = {
+            "objective": "multiclass",
+            "num_class": 3,
+            "learning_rate": 0.1,
+            "metric": "multi_logloss",
+            "colsample_bytree": 1.0,
+            "subsample": 1.0,
+            "seed": 42,
+        }
+        # model and training data are being logged automatically
+        model = lgb.train(
+            params,
+            train_set,
+            num_boost_round=10,
+            valid_sets=[train_set],
+            valid_names=["train"],
+        )
+
+        # evaluate model
+        y_proba = model.predict(X_test)
+        y_pred = y_proba.argmax(axis=1)
+        loss = log_loss(y_test, y_proba)
+        acc = accuracy_score(y_test, y_pred)
+
+        # log metrics
+        mlflow.log_metrics({"log_loss": loss, "accuracy": acc})
+
+
+# simple mlflow example of xgb logging
+def xgb_run():
+    # prepare train and test data
+    iris = datasets.load_iris()
+    x = iris.data
+    y = iris.target
+    x_train, x_test, y_train, y_test = train_test_split(
+        x, y, test_size=0.2, random_state=42
+    )
+
+    # enable auto logging
+    mlflow.xgboost.autolog()
+
+    dtrain = xgb.DMatrix(x_train, label=y_train)
+    dtest = xgb.DMatrix(x_test, label=y_test)
+
+    with mlflow.start_run():
+        # train model
+        params = {
+            "objective": "multi:softprob",
+            "num_class": 3,
+            "learning_rate": 0.3,
+            "eval_metric": "mlogloss",
+            "colsample_bytree": 1.0,
+            "subsample": 1.0,
+            "seed": 42,
+        }
+        # model and training data are being logged automatically
+        model = xgb.train(params, dtrain, evals=[(dtrain, "train")])
+        # evaluate model
+        y_proba = model.predict(dtest)
+        y_pred = y_proba.argmax(axis=1)
+        loss = log_loss(y_test, y_proba)
+        acc = accuracy_score(y_test, y_pred)
+        # log metrics
+        mlflow.log_metrics({"log_loss": loss, "accuracy": acc})
+
+
+@pytest.mark.parametrize("handler", ["xgb_run", "lgb_run"])
+def test_track_run_with_experiment_name(handler):
+    """
+    This test is for tracking a run logged by mlflow into mlrun while it's running using the experiment name.
+    first activate the tracking option in mlconf, then we name the mlflow experiment,
+    then we run some code that is being logged by mlflow using mlrun,
+    and finally compare the mlrun we tracked with the original mlflow run using the validate func
+    """
+    # Enable general tracking
+    mlrun.mlconf.external_platform_tracking.enabled = True
+    # Set the mlflow experiment name
+    mlflow.environment_variables.MLFLOW_EXPERIMENT_NAME.set(f"{handler}_test_track")
+    with tempfile.TemporaryDirectory() as test_directory:
+        # Use SQLite backend instead of filesystem (filesystem will be deprecated in Feb 2026)
+        db_uri = f"sqlite:///{os.path.join(test_directory, 'mlflow.db')}"
+        mlflow.set_tracking_uri(db_uri)  # Tell mlflow where to save logged data
+
+        # Create a project for this tester:
+        project = mlrun.get_or_create_project(name="default", context=test_directory)
+
+        # Create a MLRun function using the tester source file (all the functions must be located in it):
+        func = project.set_function(
+            func=__file__,
+            name=f"{handler}-test",
+            kind="job",
+            image="mlrun/mlrun",
+            requirements=["mlflow"],
+        )
+        # mlflow creates a dir to log the run, this makes it in the tmpdir we create
+        trainer_run = func.run(
+            local=True,
+            handler=handler,
+            output_path=test_directory,
+        )
+
+        # Find the MLflow logged model and prepare it for serving
+        # Note: In MLflow 2.24+, we must dynamically discover model paths since MLflow changed
+        # its directory structure from predictable paths (e.g., experiment_name/0/model/) to
+        # UUID-based paths (e.g., experiment_id/run_uuid/artifacts/model/).
+
+        # Create MLflow client to query the tracking server
+        mlflow_client = mlflow.tracking.MlflowClient(tracking_uri=db_uri)
+
+        # Get the experiment by name to obtain its ID
+        experiment = mlflow_client.get_experiment_by_name(f"{handler}_test_track")
+
+        # Search for runs in this experiment and get the run ID
+        # (There should only be one run from our training above)
+        run_id = mlflow_client.search_runs(experiment_ids=[experiment.experiment_id])[0].info.run_id
+
+        # Find all models logged in this run
+        logged_models = mlflow.search_logged_models(filter_string=f"source_run_id = '{run_id}'")
+
+        # Extract the artifact location and remove the "file://" prefix
+        model_artifacts_dir = logged_models["artifact_location"].tolist()[0].replace("file://", "")
+
+        # Package the model artifacts as a zip file for MLFlowModelServer
+        # Note: MLFlowModelServer requires models to be packaged as zip archives
+        # rather than loose directories for deployment
+        model_path = os.path.join(test_directory, f"{handler}-model-serving")
+        os.makedirs(model_path, exist_ok=True)
+        shutil.make_archive(os.path.join(model_path, "model"), 'zip', model_artifacts_dir)
+
+        serving_func = project.set_function(
+            func=os.path.abspath("function.yaml"),
+            name=f"{handler}-server",
+        )
+        model_name = f"{handler}-model"
+        # Add the model
+        serving_func.add_model(
+            model_name,
+            class_name="MLFlowModelServer",
+            model_path=model_path,
+        )
+
+        # Create a mock server
+        server = serving_func.to_mock_server()
+
+        # An example taken randomly
+        result = server.test(f"/v2/models/{model_name}/predict", {"inputs": [[5.1, 3.5, 1.4, 0.2]]})
+    print(result)
+    assert result
+    # unset mlflow experiment name to default
+    mlflow.environment_variables.MLFLOW_EXPERIMENT_NAME.unset()
+
+
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+mlflow~=3.5`
	`2`	`+lightgbm`
	`3`	`+xgboost`