Skip to content

Commit 8d14408

Browse files
authored
Merge pull request #58 from arewm/fix-batch-signer-indentation
fix: correct indentation bug in MsgBatchSigner._prepare_messages
2 parents df19a66 + d0a90d0 commit 8d14408

2 files changed

Lines changed: 86 additions & 2 deletions

File tree

src/pubtools/sign/signers/msgsigner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -759,8 +759,8 @@ def _prepare_messages(self: Self, operation: ContainerSignOperation) -> List[Lis
759759
)
760760
batch_data.append(fdata)
761761

762-
ret = run_in_parallel(self._create_msg_batch_message, batch_data)
763-
messages.extend(list(ret.values()))
762+
ret = run_in_parallel(self._create_msg_batch_message, batch_data)
763+
messages.extend(list(ret.values()))
764764
return messages
765765

766766
def load_config(self: Self, config_data: Dict[str, Any]) -> None:

tests/test_msg_batch_signer.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -828,3 +828,87 @@ def test_msgsigresult_doc_arguments():
828828
"sample": {"status": "ok", "error_message": ""},
829829
}
830830
}
831+
832+
833+
def test_prepare_messages_multi_repo_no_duplication(f_config_msg_batch_signer_ok):
834+
"""Test that _prepare_messages doesn't duplicate batches across multiple repos.
835+
836+
This test verifies the fix for a bug where `run_in_parallel` and `messages.extend`
837+
were incorrectly indented inside the repo loop, causing batch_data to be processed
838+
multiple times (once per repo iteration), leading to O(N²) message duplication.
839+
840+
With chunk_size=2:
841+
- repo-a (3 digests) -> 2 batches (2 + 1)
842+
- repo-b (2 digests) -> 1 batch
843+
- repo-c (1 digest) -> 1 batch
844+
Total expected: 4 batches, NOT 10 (2 + 4 + 4 from the buggy code)
845+
"""
846+
signer = MsgBatchSigner()
847+
signer.load_config(load_config(f_config_msg_batch_signer_ok))
848+
849+
# Create operation with references spanning 3 different repos
850+
container_sign_operation = ContainerSignOperation(
851+
task_id="1",
852+
digests=[
853+
# repo-a: 3 digests
854+
"sha256:aaa1",
855+
"sha256:aaa2",
856+
"sha256:aaa3",
857+
# repo-b: 2 digests
858+
"sha256:bbb1",
859+
"sha256:bbb2",
860+
# repo-c: 1 digest
861+
"sha256:ccc1",
862+
],
863+
references=[
864+
# repo-a: 3 references
865+
"registry.example.com/namespace/repo-a:tag1",
866+
"registry.example.com/namespace/repo-a:tag2",
867+
"registry.example.com/namespace/repo-a:tag3",
868+
# repo-b: 2 references
869+
"registry.example.com/namespace/repo-b:tag1",
870+
"registry.example.com/namespace/repo-b:tag2",
871+
# repo-c: 1 reference
872+
"registry.example.com/namespace/repo-c:tag1",
873+
],
874+
signing_keys=["test-signing-key"],
875+
)
876+
877+
# Call _prepare_messages directly to test the batching logic
878+
with patch("pubtools.sign.signers.msgsigner.run_in_parallel") as mock_run_parallel:
879+
# Mock run_in_parallel to return message placeholders
880+
mock_run_parallel.return_value = {i: [f"msg_{i}"] for i in range(10)}
881+
882+
signer._prepare_messages(container_sign_operation)
883+
884+
# run_in_parallel should be called exactly ONCE with all batch_data
885+
assert mock_run_parallel.call_count == 1, (
886+
f"run_in_parallel should be called once, but was called "
887+
f"{mock_run_parallel.call_count} times. This indicates the bug where "
888+
f"run_in_parallel was incorrectly inside the repo loop."
889+
)
890+
891+
# Verify the batch_data passed to run_in_parallel
892+
call_args = mock_run_parallel.call_args
893+
batch_data = list(call_args[0][1]) # Second positional arg is the data iterable
894+
895+
# With chunk_size=2:
896+
# - repo-a (3 digests): 2 batches (chunk of 2, chunk of 1)
897+
# - repo-b (2 digests): 1 batch (chunk of 2)
898+
# - repo-c (1 digest): 1 batch (chunk of 1)
899+
# Total: 4 batches
900+
expected_batch_count = 4
901+
assert len(batch_data) == expected_batch_count, (
902+
f"Expected {expected_batch_count} batches, got {len(batch_data)}. "
903+
f"Batch data: {[fd.args[1] for fd in batch_data]}" # Show repo names
904+
)
905+
906+
# Verify each repo appears the correct number of times
907+
repo_counts = {}
908+
for fdata in batch_data:
909+
repo = fdata.args[1] # repo is the second arg
910+
repo_counts[repo] = repo_counts.get(repo, 0) + 1
911+
912+
assert repo_counts.get("namespace/repo-a") == 2, "repo-a should have 2 batches"
913+
assert repo_counts.get("namespace/repo-b") == 1, "repo-b should have 1 batch"
914+
assert repo_counts.get("namespace/repo-c") == 1, "repo-c should have 1 batch"

0 commit comments

Comments
 (0)