Skip to content

Commit 87b4e46

Browse files
authored
adding warning if pat detects in git url while creation of job (#40337)
* adding warning if pat detects in git url while creation of job * adding test cases * applying black formatter
1 parent 5d250cd commit 87b4e46

File tree

5 files changed

+107
-15
lines changed

5 files changed

+107
-15
lines changed

Diff for: sdk/ml/azure-ai-ml/CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
### Bugs Fixed
66
- Fix for compute Instance, disableLocalAuth property should be depend on ssh public access enabled.
7+
- Removing Git-related properties from job properties if a PAT token is detected in the repository URL.
78

89
## 1.26.1 (2025-03-27)
910

Diff for: sdk/ml/azure-ai-ml/azure/ai/ml/operations/_job_operations.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
WorkspaceDiscoveryUrlKey,
6666
)
6767
from azure.ai.ml.constants._compute import ComputeType
68+
from azure.ai.ml.constants._common import GitProperties
6869
from azure.ai.ml.constants._job.pipeline import PipelineConstants
6970
from azure.ai.ml.entities import Compute, Job, PipelineJob, ServiceInstance, ValidationResult
7071
from azure.ai.ml.entities._assets._artifacts.code import Code
@@ -105,7 +106,12 @@
105106
from ._component_operations import ComponentOperations
106107
from ._compute_operations import ComputeOperations
107108
from ._dataset_dataplane_operations import DatasetDataplaneOperations
108-
from ._job_ops_helper import get_git_properties, get_job_output_uris_from_dataplane, stream_logs_until_completion
109+
from ._job_ops_helper import (
110+
get_git_properties,
111+
get_job_output_uris_from_dataplane,
112+
has_pat_token,
113+
stream_logs_until_completion,
114+
)
109115
from ._local_job_invoker import is_local_run, start_run_if_local
110116
from ._model_dataplane_operations import ModelDataplaneOperations
111117
from ._operation_orchestrator import (
@@ -681,7 +687,18 @@ def create_or_update(
681687
# their job, the request will fail since the git props will be repopulated.
682688
# MFE does not allow existing properties to be updated, only for new props to be added
683689
if not any(prop_name in job.properties for prop_name in git_props):
684-
job.properties = {**job.properties, **git_props}
690+
repo_url = git_props.get(GitProperties.PROP_MLFLOW_GIT_REPO_URL)
691+
692+
if has_pat_token(repo_url):
693+
git_props.pop(GitProperties.PROP_MLFLOW_GIT_REPO_URL)
694+
git_props.pop(GitProperties.PROP_MLFLOW_GIT_BRANCH)
695+
git_props.pop(GitProperties.PROP_MLFLOW_GIT_COMMIT)
696+
git_props.pop(GitProperties.PROP_DIRTY)
697+
module_logger.warning("Git properties are removed because the repository URL contains a secret.")
698+
699+
if git_props:
700+
job.properties = {**job.properties, **git_props}
701+
685702
rest_job_resource = to_rest_job_object(job)
686703

687704
# Make a copy of self._kwargs instead of contaminate the original one

Diff for: sdk/ml/azure-ai-ml/azure/ai/ml/operations/_job_ops_helper.py

+16
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,22 @@ def _run_git_cmd(args: Iterable[str]) -> Optional[str]:
435435
return properties
436436

437437

438+
def has_pat_token(url: Optional[str]) -> bool:
439+
"""Check if the given repository URL contains a PAT token.
440+
441+
:param url: Repository URL to check.
442+
:type url: Optional[str]
443+
:return: True if PAT token is detected, False otherwise.
444+
:rtype: bool
445+
"""
446+
if url is None:
447+
return False
448+
449+
# Matches both "https://dev.azure.com/mypattoken@..." and "https://[email protected]/..."
450+
pat_regex = r"https://(?:[^/@]+/)?([^/@]+)@"
451+
return re.search(pat_regex, url) is not None
452+
453+
438454
def get_job_output_uris_from_dataplane(
439455
job_name: Optional[str],
440456
run_operations: RunOperations,

Diff for: sdk/ml/azure-ai-ml/tests/job_common/unittests/test_job_operations.py

+60-13
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,25 @@
11
import json
22
import os
3-
from typing import Callable
43
from unittest.mock import Mock, patch
54

65
import jwt
76
import pytest
8-
import yaml
9-
from msrest import Deserializer
107
from pytest_mock import MockFixture
118

12-
from azure.ai.ml import MLClient, load_job
9+
from azure.ai.ml import load_job
1310
from azure.ai.ml._azure_environments import _get_aml_resource_id_from_metadata, _resource_to_scopes
1411
from azure.ai.ml._restclient.v2023_04_01_preview import models
1512
from azure.ai.ml._scope_dependent_operations import OperationConfig, OperationScope
16-
from azure.ai.ml.constants._common import AZUREML_PRIVATE_FEATURES_ENV_VAR, AzureMLResourceType
13+
from azure.ai.ml.constants._common import AZUREML_PRIVATE_FEATURES_ENV_VAR, AzureMLResourceType, GitProperties
1714
from azure.ai.ml.entities._builders import Command
18-
from azure.ai.ml.entities._job.automl.automl_job import AutoMLJob
19-
from azure.ai.ml.entities._job.automl.training_settings import TrainingSettings
2015
from azure.ai.ml.entities._job.job import Job
21-
from azure.ai.ml.entities._job.sweep.sweep_job import SweepJob
22-
from azure.ai.ml.exceptions import ValidationException
2316
from azure.ai.ml.operations import DatastoreOperations, EnvironmentOperations, JobOperations, WorkspaceOperations
2417
from azure.ai.ml.operations._code_operations import CodeOperations
2518
from azure.ai.ml.operations._job_ops_helper import get_git_properties
26-
from azure.ai.ml.operations._run_history_constants import RunHistoryConstants
2719
from azure.ai.ml.operations._run_operations import RunOperations
2820
from azure.core.credentials import AccessToken
29-
from azure.core.exceptions import HttpResponseError
3021
from azure.identity import DefaultAzureCredential
3122

32-
from .test_vcr_utils import before_record_cb, vcr_header_filters
33-
3423

3524
@pytest.fixture
3625
def mock_datastore_operation(
@@ -298,6 +287,64 @@ def test_job_create_skip_validation(self, mock_method, mock_job_operation: JobOp
298287
mock_job_operation.create_or_update(job=job)
299288
mock_thing.assert_called_once()
300289

290+
@patch("azure.ai.ml.operations._job_operations.get_git_properties")
291+
@patch.object(Job, "_from_rest_object")
292+
def test_create_or_update_removes_git_props_if_pat_in_repo_url(
293+
self, mock_method, mock_get_git_properties, mock_job_operation: JobOperations
294+
) -> None:
295+
mock_method.return_value = Command(component=None)
296+
297+
mock_get_git_properties.return_value = {
298+
GitProperties.PROP_MLFLOW_GIT_REPO_URL: "https://example@mock-repo-url",
299+
GitProperties.PROP_MLFLOW_GIT_BRANCH: "mock-branch",
300+
GitProperties.PROP_MLFLOW_GIT_COMMIT: "mock-commit",
301+
GitProperties.PROP_DIRTY: "True",
302+
}
303+
304+
job = load_job("./tests/test_configs/command_job/simple_train_test.yml")
305+
with patch.object(JobOperations, "_validate") as mock_thing, patch.object(
306+
JobOperations, "_resolve_arm_id_or_upload_dependencies"
307+
):
308+
mock_job_operation.create_or_update(job=job)
309+
mock_get_git_properties.assert_called_once()
310+
assert (
311+
GitProperties.PROP_MLFLOW_GIT_REPO_URL not in job.properties
312+
), "repoURL key should not exist in job.properties"
313+
assert (
314+
GitProperties.PROP_MLFLOW_GIT_BRANCH not in job.properties
315+
), "branch key should not exist in job.properties"
316+
assert (
317+
GitProperties.PROP_MLFLOW_GIT_COMMIT not in job.properties
318+
), "commit key should not exist in job.properties"
319+
assert GitProperties.PROP_DIRTY not in job.properties, "dirty key should not exist in job.properties"
320+
321+
@patch("azure.ai.ml.operations._job_operations.get_git_properties")
322+
@patch.object(Job, "_from_rest_object")
323+
def test_create_or_update_includes_git_props_if_no_pat_in_repo_url(
324+
self, mock_method, mock_get_git_properties, mock_job_operation: JobOperations
325+
) -> None:
326+
mock_method.return_value = Command(component=None)
327+
328+
mock_get_git_properties.return_value = {
329+
GitProperties.PROP_MLFLOW_GIT_REPO_URL: "https://mock-repo-url",
330+
GitProperties.PROP_MLFLOW_GIT_BRANCH: "mock-branch",
331+
GitProperties.PROP_MLFLOW_GIT_COMMIT: "mock-commit",
332+
GitProperties.PROP_DIRTY: "True",
333+
}
334+
335+
job = load_job("./tests/test_configs/command_job/simple_train_test.yml")
336+
with patch.object(JobOperations, "_validate") as mock_thing, patch.object(
337+
JobOperations, "_resolve_arm_id_or_upload_dependencies"
338+
):
339+
mock_job_operation.create_or_update(job=job)
340+
mock_get_git_properties.assert_called_once()
341+
assert (
342+
GitProperties.PROP_MLFLOW_GIT_REPO_URL in job.properties
343+
), "repoURL key should exist in job.properties"
344+
assert GitProperties.PROP_MLFLOW_GIT_BRANCH in job.properties, "branch key should exist in job.properties"
345+
assert GitProperties.PROP_MLFLOW_GIT_COMMIT in job.properties, "commit key should exist in job.properties"
346+
assert GitProperties.PROP_DIRTY in job.properties, "dirty key should exist in job.properties"
347+
301348
def test_download_with_none(self, mock_job_operation: JobOperations) -> None:
302349
with pytest.raises(Exception) as ex:
303350
mock_job_operation.download(None)

Diff for: sdk/ml/azure-ai-ml/tests/job_common/unittests/test_job_ops_helper.py

+11
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from azure.ai.ml._scope_dependent_operations import OperationScope
1313
from azure.ai.ml.operations._job_ops_helper import (
1414
_get_sorted_filtered_logs,
15+
has_pat_token,
1516
_incremental_print,
1617
list_logs,
1718
stream_logs_until_completion,
@@ -64,6 +65,16 @@ def mock_run_operations(mock_workspace_scope: OperationScope, mock_aml_services_
6465
yield RunOperations(mock_workspace_scope, mock_aml_services_run_history)
6566

6667

68+
@pytest.mark.unittest
69+
@pytest.mark.training_experiences_test
70+
class TestJobOpsHelper:
71+
def test_has_pat_token(self) -> None:
72+
assert has_pat_token("https://dev.azure.com/mypattoken@company_machineName/project-name/_git/repo_name")
73+
assert has_pat_token("https://[email protected]/<organization>/<project>/_git/<repo>")
74+
assert not has_pat_token("https://dev.azure.com/organization/project/_apis/pipelines/1/runs")
75+
assert not has_pat_token("https://learn.microsoft.com/en-us/ai/?tabs=developer")
76+
77+
6778
@pytest.mark.skip("TODO 1907352: Relies on a missing VCR.py recording + test suite needs to be reworked")
6879
@pytest.mark.unittest
6980
@pytest.mark.training_experiences_test

0 commit comments

Comments
 (0)