konflux-ci · jajreidy · Mar 26, 2026 · Mar 26, 2026
@@ -33,6 +33,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - `pull`: use each artifact's ``url`` from pulp_results.json when present instead of synthesizing download URLs from distribution entries
 
 ### Fixed
+- Generic `/api/v3/content/` responses that are a bare JSON array (not `{"results": [...]}`) no longer crash gather-by-href or `_find_artifact_content` with `TypeError: list indices must be integers or slices, not str`
 - Results JSON RPM URLs with `--signed-by`: use the `rpms-signed` distribution base (`distributions.rpms_signed` / correct artifact `url`) instead of the unsigned `rpms` path
 - RPM distribution URLs: ``Packages/<letter>/`` uses the lowercase first character of the RPM **basename** only (correct for paths like ``Packages/W/foo.rpm``, ``arch/pkg.rpm``, or plain ``foo.rpm``)
 - Clear error when no auth credentials provided (client_id/client_secret or username/password)

@@ -41,6 +41,7 @@
 
 # Local imports
 from ..utils import create_session_with_retry
+from ..utils.response_utils import content_find_results_from_json
 from ..utils.artifact_detection import rpm_packages_letter_and_basename
 from ..utils.constants import DEFAULT_CHUNK_SIZE, SUPPORTED_ARCHITECTURES
 from ..utils.validation import sanitize_build_id_for_repository, validate_build_id
@@ -1689,8 +1690,7 @@ def gather_content_data(
 
         try:
             resp = self.find_content("build_id", build_id)
-            resp_json = resp.json()
-            raw_results = resp_json["results"]
+            raw_results = content_find_results_from_json(resp.json())
         except Exception:
             logging.error("Failed to get content by build ID", exc_info=True)
             raise
@@ -1708,8 +1708,7 @@ def gather_content_data(
                 if href_list:
                     href_query = ",".join(href_list)
                     resp = self.find_content("href", href_query)
-                    resp_json = resp.json()
-                    raw_results = resp_json["results"]
+                    raw_results = content_find_results_from_json(resp.json())
                     logging.info("Found %d content items by href query", len(raw_results))
             except Exception:
                 logging.error("Failed to get content by href", exc_info=True)

@@ -29,6 +29,7 @@
     from ..api.pulp_client import PulpClient
 
 from ..utils import PulpHelper, validate_file_path, create_labels
+from ..utils.response_utils import content_find_results_from_json
 from ..utils.constants import (
     SBOM_EXTENSIONS,
     SUPPORTED_ARCHITECTURES,
@@ -815,7 +816,7 @@ def _find_artifact_content(client: "PulpClient", task_response: TaskResponse) ->
         logging.error("No content artifact found in task response")
         return None
 
-    content_resp = client.find_content("href", artifact_href).json()["results"]
+    content_resp = content_find_results_from_json(client.find_content("href", artifact_href).json())
     if not content_resp:
         logging.error("No content found for href: %s", artifact_href)
         return None

@@ -7,7 +7,7 @@
 
 import logging
 import traceback
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
 
 import httpx
 
@@ -130,6 +130,23 @@ def extract_results_list(response: httpx.Response, operation: str, *, allow_empt
     return results
 
 
+def content_find_results_from_json(data: Any) -> List[Dict[str, Any]]:
+    """
+    Normalize JSON from Pulp's generic ``/api/v3/content/`` list endpoint.
+
+    Most deployments return a paginated object ``{"results": [...], "count": ...}``,
+    but some responses are a bare JSON array of content objects. Both shapes are
+    accepted so callers do not assume a dict.
+    """
+    if isinstance(data, list):
+        return [item for item in data if isinstance(item, dict)]
+    if isinstance(data, dict):
+        raw = data.get("results")
+        if isinstance(raw, list):
+            return [item for item in raw if isinstance(item, dict)]
+    return []
+
+
 def extract_single_result(response: httpx.Response, operation: str) -> Dict[str, Any]:
     """
     Extract single result from response results list.
@@ -178,6 +195,7 @@ def get_response_field(
     "extract_created_resources",
     "check_task_success",
     "extract_results_list",
+    "content_find_results_from_json",
     "extract_single_result",
     "get_response_field",
 ]
@@ -1293,6 +1293,28 @@ def test_gather_content_data_with_extra_artifacts(self, mock_pulp_client, mock_c
         assert len(content_data.content_results) == 1  # From API response
         assert len(content_data.artifacts) == 1  # Extracted from content_results
 
+    def test_gather_content_data_href_fallback_bare_list_json(self, mock_pulp_client, mock_content_data, httpx_mock):
+        """When build_id finds nothing, href query may return a bare JSON array instead of {\"results\": ...}."""
+        httpx_mock.get(
+            re.compile(
+                r"https://pulp\.example\.com/pulp/api/v3/test-domain/api/v3/content/"
+                r"\?pulp_label_select=build_id~test-bare-list"
+            )
+        ).mock(return_value=httpx.Response(200, json={"results": []}))
+
+        row = mock_content_data["results"][0]
+        httpx_mock.get(re.compile(r".*api/v3/content/\?pulp_href__in=.*")).mock(
+            return_value=httpx.Response(200, json=[row])
+        )
+
+        href = row["pulp_href"]
+        extra = [ExtraArtifactRef.model_validate({"pulp_href": href})]
+        content_data = mock_pulp_client.gather_content_data("test-bare-list", extra)
+
+        assert len(content_data.content_results) == 1
+        assert content_data.content_results[0].pulp_href == href
+        assert len(content_data.artifacts) >= 1
+
     def test_build_results_structure(self, mock_pulp_client, mock_content_data, mock_file_locations, httpx_mock):
         """Test build_results_structure method."""
         from pulp_tool.models import PulpResultsModel, RepositoryRefs, FileInfoModel

@@ -918,6 +918,29 @@ def test_find_artifact_content_success(self, mock_pulp_client, httpx_mock):
         assert result[0] == "test.txt@sha256:abc123"
         assert result[1] == "abc123"
 
+    def test_find_artifact_content_bare_list_json(self, mock_pulp_client, httpx_mock):
+        """find_content JSON may be a list of content objects instead of paginated dict."""
+        from pulp_tool.services.upload_service import _find_artifact_content
+        from pulp_tool.models.pulp_api import TaskResponse
+
+        task_response = TaskResponse(
+            pulp_href="/api/v3/tasks/123/",
+            state="completed",
+            created_resources=["/api/v3/content/file/files/12345/"],
+        )
+
+        mock_content_response = Mock(spec=httpx.Response)
+        mock_content_response.json.return_value = [{"artifacts": {"test.txt": "/api/v3/artifacts/12345/"}}]
+        mock_pulp_client.find_content = Mock(return_value=mock_content_response)
+
+        mock_artifact_response = Mock(spec=httpx.Response)
+        mock_artifact_response.json.return_value = {"results": [{"file": "test.txt@sha256:abc123", "sha256": "abc123"}]}
+        mock_pulp_client.get_file_locations = Mock(return_value=mock_artifact_response)
+
+        result = _find_artifact_content(mock_pulp_client, task_response)
+
+        assert result == ("test.txt@sha256:abc123", "abc123")
+
 
 class TestParseOciReference:
     """Test _parse_oci_reference function."""

@@ -9,6 +9,7 @@
     extract_created_resources,
     check_task_success,
     extract_results_list,
+    content_find_results_from_json,
     extract_single_result,
     get_response_field,
 )
@@ -166,6 +167,33 @@ def test_extract_results_list_missing_results_key(self):
         assert result == []
 
 
+class TestContentFindResultsFromJson:
+    """Test content_find_results_from_json utility."""
+
+    def test_paginated_dict(self):
+        """Standard Pulp paginated list body."""
+        data = {"results": [{"pulp_href": "/c/1/"}], "count": 1}
+        assert content_find_results_from_json(data) == [{"pulp_href": "/c/1/"}]
+
+    def test_bare_list(self):
+        """Bare JSON array as returned by some content list responses."""
+        data = [{"pulp_href": "/c/1/"}]
+        assert content_find_results_from_json(data) == [{"pulp_href": "/c/1/"}]
+
+    def test_filters_non_dict_entries_in_list(self):
+        """Non-dict list entries are skipped."""
+        assert content_find_results_from_json([{"a": 1}, "skip", None]) == [{"a": 1}]
+
+    def test_dict_missing_results(self):
+        assert content_find_results_from_json({"count": 0}) == []
+
+    def test_dict_results_not_list(self):
+        assert content_find_results_from_json({"results": None}) == []
+
+    def test_non_collection_returns_empty(self):
+        assert content_find_results_from_json(123) == []
+
+
 class TestExtractSingleResult:
     """Test extract_single_result utility."""