@@ -828,3 +828,87 @@ def test_msgsigresult_doc_arguments():
828828 "sample" : {"status" : "ok" , "error_message" : "" },
829829 }
830830 }
831+
832+
833+ def test_prepare_messages_multi_repo_no_duplication (f_config_msg_batch_signer_ok ):
834+ """Test that _prepare_messages doesn't duplicate batches across multiple repos.
835+
836+ This test verifies the fix for a bug where `run_in_parallel` and `messages.extend`
837+ were incorrectly indented inside the repo loop, causing batch_data to be processed
838+ multiple times (once per repo iteration), leading to O(N²) message duplication.
839+
840+ With chunk_size=2:
841+ - repo-a (3 digests) -> 2 batches (2 + 1)
842+ - repo-b (2 digests) -> 1 batch
843+ - repo-c (1 digest) -> 1 batch
844+ Total expected: 4 batches, NOT 10 (2 + 4 + 4 from the buggy code)
845+ """
846+ signer = MsgBatchSigner ()
847+ signer .load_config (load_config (f_config_msg_batch_signer_ok ))
848+
849+ # Create operation with references spanning 3 different repos
850+ container_sign_operation = ContainerSignOperation (
851+ task_id = "1" ,
852+ digests = [
853+ # repo-a: 3 digests
854+ "sha256:aaa1" ,
855+ "sha256:aaa2" ,
856+ "sha256:aaa3" ,
857+ # repo-b: 2 digests
858+ "sha256:bbb1" ,
859+ "sha256:bbb2" ,
860+ # repo-c: 1 digest
861+ "sha256:ccc1" ,
862+ ],
863+ references = [
864+ # repo-a: 3 references
865+ "registry.example.com/namespace/repo-a:tag1" ,
866+ "registry.example.com/namespace/repo-a:tag2" ,
867+ "registry.example.com/namespace/repo-a:tag3" ,
868+ # repo-b: 2 references
869+ "registry.example.com/namespace/repo-b:tag1" ,
870+ "registry.example.com/namespace/repo-b:tag2" ,
871+ # repo-c: 1 reference
872+ "registry.example.com/namespace/repo-c:tag1" ,
873+ ],
874+ signing_keys = ["test-signing-key" ],
875+ )
876+
877+ # Call _prepare_messages directly to test the batching logic
878+ with patch ("pubtools.sign.signers.msgsigner.run_in_parallel" ) as mock_run_parallel :
879+ # Mock run_in_parallel to return message placeholders
880+ mock_run_parallel .return_value = {i : [f"msg_{ i } " ] for i in range (10 )}
881+
882+ signer ._prepare_messages (container_sign_operation )
883+
884+ # run_in_parallel should be called exactly ONCE with all batch_data
885+ assert mock_run_parallel .call_count == 1 , (
886+ f"run_in_parallel should be called once, but was called "
887+ f"{ mock_run_parallel .call_count } times. This indicates the bug where "
888+ f"run_in_parallel was incorrectly inside the repo loop."
889+ )
890+
891+ # Verify the batch_data passed to run_in_parallel
892+ call_args = mock_run_parallel .call_args
893+ batch_data = list (call_args [0 ][1 ]) # Second positional arg is the data iterable
894+
895+ # With chunk_size=2:
896+ # - repo-a (3 digests): 2 batches (chunk of 2, chunk of 1)
897+ # - repo-b (2 digests): 1 batch (chunk of 2)
898+ # - repo-c (1 digest): 1 batch (chunk of 1)
899+ # Total: 4 batches
900+ expected_batch_count = 4
901+ assert len (batch_data ) == expected_batch_count , (
902+ f"Expected { expected_batch_count } batches, got { len (batch_data )} . "
903+ f"Batch data: { [fd .args [1 ] for fd in batch_data ]} " # Show repo names
904+ )
905+
906+ # Verify each repo appears the correct number of times
907+ repo_counts = {}
908+ for fdata in batch_data :
909+ repo = fdata .args [1 ] # repo is the second arg
910+ repo_counts [repo ] = repo_counts .get (repo , 0 ) + 1
911+
912+ assert repo_counts .get ("namespace/repo-a" ) == 2 , "repo-a should have 2 batches"
913+ assert repo_counts .get ("namespace/repo-b" ) == 1 , "repo-b should have 1 batch"
914+ assert repo_counts .get ("namespace/repo-c" ) == 1 , "repo-c should have 1 batch"
0 commit comments