Skip to content

Commit bbc16d1

Browse files
Submit more files to Log Detective for analysis (#3106)
Submit more files to Log Detective for analysis References to multiple artifacts are now used when asking Log Detective for analysis. Since we can not be completely sure about their availability, all URLs are checked using get request. If no artifacts pass the check, the trigger fails and the information is logged. In order to avoid references to files that can't be possibly analyzed by Log Detective, only URLs pointing to files of the text/plain type are allowed, all others are dropped from the final request. This check should see more use as we interact with other build backends, and submit artifacts other than just logs. New tests for the verify_artifact were introduced and existing tests adjusted. Merge after fedora-copr/logdetective-packit#36 fedora-copr/logdetective#492 RELEASE NOTES BEGIN Packit now sends build.log, root.log and mock_output.log from failed Koji builds to Log Detective. RELEASE NOTES END Reviewed-by: gemini-code-assist[bot] Reviewed-by: Nikola Forró Reviewed-by: Jiří Podivín Reviewed-by: Laura Barcziová
2 parents c6d0c90 + fdd4c30 commit bbc16d1

5 files changed

Lines changed: 165 additions & 12 deletions

File tree

packit_service/utils.py

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
from logging import StreamHandler
1313
from pathlib import Path
1414
from re import search
15-
from typing import Optional
15+
from typing import Optional, Union
16+
from urllib.parse import urlparse
1617

1718
import requests
1819
from cachetools.func import ttl_cache
@@ -428,6 +429,84 @@ def get_user_agent() -> str:
428429
)
429430

430431

432+
def check_url(url: str) -> bool:
433+
"""
434+
Verify that URL is valid.
435+
436+
Args:
437+
url: URL to be validated
438+
439+
Returns:
440+
True if the URL can be parsed, contains both scheme and netloc
441+
"""
442+
443+
try:
444+
result = urlparse(url)
445+
return all([result.scheme, result.netloc])
446+
except ValueError:
447+
return False
448+
449+
450+
def check_content_type(content_type: Union[str, None], url: str) -> bool:
451+
"""
452+
Verify that content_type is text/plain and log failures.
453+
454+
Args:
455+
content_type: Value of Content-Type header field
456+
url: URL of the checked artifact
457+
458+
Returns:
459+
True if content type is text/plain
460+
"""
461+
if not content_type:
462+
logger.warning("No content-type reported for artifact at %s", url)
463+
return False
464+
465+
if "text/plain" not in content_type.lower():
466+
logger.warning("Artifact: %s is not of allowed text/plain type, but %s", url, content_type)
467+
return False
468+
469+
return True
470+
471+
472+
def verify_artifact(url: str) -> bool:
473+
"""
474+
Verify that URL points to an text/plain artifact that can be downloaded.
475+
476+
Checks Content-Type in headers using GET request.
477+
478+
Args:
479+
url: URL of the artifact to be verified
480+
481+
Returns:
482+
True if the artifact satisfies all requirements
483+
"""
484+
485+
if not check_url(url=url):
486+
logger.warning("Invalid URL for build artifact: %s", url)
487+
return False
488+
try:
489+
with requests.get(
490+
url=url,
491+
headers={"User-Agent": get_user_agent()},
492+
timeout=(10, 30),
493+
stream=True,
494+
allow_redirects=True,
495+
) as response:
496+
if response.status_code != 200:
497+
logger.warning(
498+
"Request for artifact: %s failed with: %s", url, response.status_code
499+
)
500+
return False
501+
return check_content_type(response.headers.get("Content-Type"), url)
502+
503+
except requests.exceptions.RequestException as exc:
504+
logger.warning(
505+
"Verification of artifact %s failed with error: %s", url, exc, stack_info=True
506+
)
507+
return False
508+
509+
431510
def download_file(url: str, path: Path):
432511
"""
433512
Download a file from given url to the given path.

packit_service/worker/helpers/logdetective.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
LogDetectiveRunGroupModel,
1919
LogDetectiveRunModel,
2020
)
21+
from packit_service.utils import verify_artifact
2122
from packit_service.worker.monitoring import Pushgateway
2223

2324
logger = logging.getLogger(__name__)
@@ -84,14 +85,30 @@ def trigger_log_detective_analysis_for_arch(self, arch: str) -> bool:
8485
Instead of returning TaskResults() object, as job handlers do,
8586
we just return a boolean signaling whether or not the trigger succeeded.
8687
"""
87-
88+
artifacts = {}
8889
build_arch_task_id = self.koji_event.rpm_build_task_ids[arch]
89-
artifacts = {
90-
"mock_output.log": koji.result.KojiEvent.get_koji_build_logs_url(
91-
build_arch_task_id,
92-
self.koji_logs_url,
90+
91+
# Following artifacts are the most likely to contain valuable information
92+
# Including too many could impact latency and response quality
93+
possible_koji_artifacts = [
94+
"root.log",
95+
"mock_output.log",
96+
"build.log",
97+
]
98+
99+
# Use only artifacts with valid URLs
100+
for artifact in possible_koji_artifacts:
101+
url = koji.result.KojiEvent.get_koji_build_logs_url(
102+
rpm_build_task_id=build_arch_task_id,
103+
koji_logs_url=self.koji_logs_url,
104+
log_file=artifact,
93105
)
94-
}
106+
if verify_artifact(url):
107+
artifacts[artifact] = url
108+
109+
if len(artifacts) == 0:
110+
logger.warning("No artifact URLs passed verification")
111+
return False
95112

96113
endpoint_url = f"{self.url}/analyze"
97114
request_json = {

tests/integration/test_logdetective_koji.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from flexmock import Mock, flexmock
1111
from packit.config.common_package_config import Deployment
1212

13+
import packit_service.worker.helpers.logdetective as logdetective_module
1314
from packit_service.config import FedoraCISettings, ServiceConfig
1415
from packit_service.constants import LOGDETECTIVE_PACKIT_SERVER_URL
1516
from packit_service.events import koji
@@ -101,6 +102,7 @@ def test_logdetective_koji_build_scratch_downstream(
101102
},
102103
).one_by_one()
103104
flexmock(requests).should_receive("post").and_return(mock_ld_response)
105+
flexmock(logdetective_module).should_receive("verify_artifact").and_return(True)
104106

105107
mock_group_run = flexmock(id=1)
106108
flexmock(LogDetectiveRunGroupModel).should_receive("create").times(

tests/unit/test_logdetective_koji_helper.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import requests
1010
from flexmock import flexmock
1111

12+
import packit_service.worker.helpers.logdetective as logdetective_module
1213
from packit_service.constants import LOGDETECTIVE_PACKIT_SERVER_URL, KojiTaskState
1314
from packit_service.models import (
1415
LogDetectiveBuildSystem,
@@ -81,15 +82,17 @@ def test_logdetective_koji_set_payload(mock_koji_task_failed_event, mock_event_d
8182

8283
request_json = {
8384
"artifacts": {
85+
"root.log": "https://kojipkgs.fedoraproject.org//work/tasks/2345/12345/root.log",
8486
"mock_output.log": "https://kojipkgs.fedoraproject.org//work/tasks/2345/12345/mock_output.log",
87+
"build.log": "https://kojipkgs.fedoraproject.org//work/tasks/2345/12345/build.log",
8588
},
8689
"target_build": "12345",
8790
"build_system": "koji",
8891
"commit_sha": "abc123",
8992
"project_url": "https://github.com/test/repo",
9093
"pr_id": 42,
9194
}
92-
95+
flexmock(logdetective_module).should_receive("verify_artifact").and_return(True)
9396
flexmock(requests).should_receive("post").with_args(
9497
"https://logdetective01.fedorainfracloud.org/analyze",
9598
json=request_json,
@@ -118,12 +121,14 @@ def test_logdetective_koji_success(
118121
"creation_time": "2026-01-01T12:00:00",
119122
}
120123
)
121-
124+
flexmock(logdetective_module).should_receive("verify_artifact").and_return(True)
122125
flexmock(requests).should_receive("post").with_args(
123126
f"{LOGDETECTIVE_PACKIT_SERVER_URL}/analyze",
124127
json={
125128
"artifacts": {
126-
"mock_output.log": "https://kojipkgs.fedoraproject.org//work/tasks/2345/12345/mock_output.log"
129+
"root.log": "https://kojipkgs.fedoraproject.org//work/tasks/2345/12345/root.log",
130+
"mock_output.log": "https://kojipkgs.fedoraproject.org//work/tasks/2345/12345/mock_output.log",
131+
"build.log": "https://kojipkgs.fedoraproject.org//work/tasks/2345/12345/build.log",
127132
},
128133
"target_build": "12345",
129134
"build_system": LogDetectiveBuildSystem.koji.value,
@@ -175,7 +180,7 @@ def test_logdetective_koji_http_error(
175180
):
176181
mock_response = flexmock(status_code=500)
177182
mock_response.should_receive("raise_for_status")
178-
183+
flexmock(logdetective_module).should_receive("verify_artifact").and_return(True)
179184
flexmock(requests).should_receive("post").and_raise(
180185
requests.exceptions.HTTPError("500 Server Error")
181186
)
@@ -203,6 +208,7 @@ def test_logdetective_koji_connection_error(
203208
):
204209
mock_response = flexmock()
205210
mock_response.should_receive("raise_for_status")
211+
flexmock(logdetective_module).should_receive("verify_artifact").and_return(True)
206212
flexmock(requests).should_receive("post").and_raise(requests.exceptions.ConnectionError)
207213

208214
flexmock(LogDetectiveRunGroupModel).should_receive("create").never()
@@ -230,7 +236,7 @@ def test_logdetective_koji_json_decode_error(
230236
mock_response.should_receive("json").and_raise(
231237
requests.exceptions.JSONDecodeError("Invalid JSON", "", 0)
232238
)
233-
239+
flexmock(logdetective_module).should_receive("verify_artifact").and_return(True)
234240
flexmock(requests).should_receive("post").and_return(mock_response)
235241
flexmock(LogDetectiveRunGroupModel).should_receive("create").never()
236242
flexmock(LogDetectiveRunModel).should_receive("create").never()
@@ -252,6 +258,7 @@ def test_logdetective_koji_json_decode_error(
252258
def test_logdetective_koji_timeout(
253259
mock_koji_task_failed_event, mock_event_data, mock_pushgateway_log_detective_no_inc
254260
):
261+
flexmock(logdetective_module).should_receive("verify_artifact").and_return(True)
255262
flexmock(requests).should_receive("post").and_raise(
256263
requests.exceptions.Timeout("Request timed out")
257264
)
@@ -283,6 +290,7 @@ def test_logdetective_koji_missing_id(
283290
"creation_time": "2026-01-01T12:00:00",
284291
}
285292
)
293+
flexmock(logdetective_module).should_receive("verify_artifact").and_return(True)
286294
flexmock(requests).should_receive("post").and_return(mock_response)
287295
flexmock(logger).should_receive("warning").with_args(
288296
"Log Detective response is missing log_detective_analysis_id",
@@ -311,6 +319,7 @@ def test_logdetective_koji_missing_time(
311319
"log_detective_analysis_id": "test-uuid-123",
312320
}
313321
)
322+
flexmock(logdetective_module).should_receive("verify_artifact").and_return(True)
314323
flexmock(requests).should_receive("post").and_return(mock_response)
315324
flexmock(logger).should_receive("warning").with_args(
316325
"Log Detective response is missing creation_time",

tests/unit/test_utils.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Copyright Contributors to the Packit project.
22
# SPDX-License-Identifier: MIT
33
import pytest
4+
import requests
45
from flexmock import flexmock
56
from packit.config.aliases import Distro
67

@@ -9,6 +10,7 @@
910
get_default_tf_mapping,
1011
only_once,
1112
pr_labels_match_configuration,
13+
verify_artifact,
1214
)
1315

1416

@@ -190,3 +192,47 @@ def test_get_default_tf_mapping(internal, target, compose):
190192
)
191193
mapping = get_default_tf_mapping(internal)
192194
assert mapping[target] == compose
195+
196+
197+
def test_verify_artifact_valid():
198+
url = "https://example.com/valid.txt"
199+
mock_response = flexmock(status_code=200, headers={"Content-Type": "text/plain; charset=utf-8"})
200+
201+
mock_response.should_receive("__enter__").and_return(mock_response)
202+
mock_response.should_receive("__exit__").and_return(None)
203+
204+
flexmock(requests).should_receive("get").once().and_return(mock_response)
205+
206+
assert verify_artifact(url) is True
207+
208+
209+
def test_verify_artifact_404_not_found():
210+
url = "https://example.com/missing.txt"
211+
212+
mock_response = flexmock(status_code=404, headers={})
213+
mock_response.should_receive("__enter__").and_return(mock_response)
214+
mock_response.should_receive("__exit__").and_return(None)
215+
216+
flexmock(requests).should_receive("get").once().and_return(mock_response)
217+
218+
assert verify_artifact(url) is False
219+
220+
221+
def test_verify_artifact_wrong_type():
222+
url = "https://example.com/api/data.json"
223+
224+
mock_response = flexmock(status_code=200, headers={"Content-Type": "application/json"})
225+
mock_response.should_receive("__enter__").and_return(mock_response)
226+
mock_response.should_receive("__exit__").and_return(None)
227+
228+
flexmock(requests).should_receive("get").once().and_return(mock_response)
229+
230+
assert verify_artifact(url) is False
231+
232+
233+
def test_verify_artifact_network_failure():
234+
url = "https://broken-link.com"
235+
236+
flexmock(requests).should_receive("get").and_raise(requests.exceptions.ConnectionError)
237+
238+
assert verify_artifact(url) is False

0 commit comments

Comments
 (0)