@@ -1678,3 +1678,167 @@ def test_send_continues_after_single_failure(
16781678 assert len (failures ) == 1
16791679 assert failures [0 ] == "2"
16801680 assert mocks ["new_account" ].call_count == 3
1681+
1682+
1683+ class TestFormatDataForRedcapOperations :
1684+ """Tests for format_data_for_redcap_operations."""
1685+
1686+ class TestDeduplication :
1687+ """Tests for Step 8: repeat instance deduplication logic."""
1688+
1689+ def test_keeps_all_values_from_latest_repeat_instance (self ):
1690+ """Checkbox fields with multiple values in the latest instance are kept."""
1691+ df = pd .DataFrame (
1692+ {
1693+ "record" : ["1465" ] * 6 ,
1694+ "field_name" : [
1695+ "parent_involvement" ,
1696+ "parent_involvement" ,
1697+ "email" ,
1698+ "parent_involvement" ,
1699+ "parent_involvement" ,
1700+ "email" ,
1701+ ],
1702+ "value" : [
1703+ "1" ,
1704+ "2" ,
1705+ "old@example.com" ,
1706+ "1" ,
1707+ "2" ,
1708+ "new@example.com" ,
1709+ ],
1710+ "redcap_event_name" : ["event_1" ] * 6 ,
1711+ "redcap_repeat_instrument" : ["adult_consent" ] * 6 ,
1712+ "redcap_repeat_instance" : [1 , 1 , 1 , 2 , 2 , 2 ],
1713+ }
1714+ )
1715+
1716+ result = format_data_for_redcap_operations (df )
1717+
1718+ # Should keep both parent_involvement rows from instance 2
1719+ pi_rows = result [result ["field_name" ] == "parent_involvement" ]
1720+ assert len (pi_rows ) == 2
1721+ assert set (pi_rows ["value" ]) == {"1" , "2" }
1722+
1723+ # Should keep only the email from instance 2
1724+ email_rows = result [result ["field_name" ] == "email" ]
1725+ assert len (email_rows ) == 1
1726+ assert email_rows .iloc [0 ]["value" ] == "new@example.com"
1727+
1728+ def test_discards_older_repeat_instances (self ):
1729+ """Only the highest repeat instance per record+instrument is kept."""
1730+ df = pd .DataFrame (
1731+ {
1732+ "record" : ["100" ] * 5 ,
1733+ "field_name" : [
1734+ "parent_involvement" ,
1735+ "parent_involvement" ,
1736+ "parent_involvement" ,
1737+ "parent_involvement" ,
1738+ "parent_involvement" ,
1739+ ],
1740+ "value" : ["0" , "1" , "2" , "1" , "3" ],
1741+ "redcap_event_name" : ["event_1" ] * 5 ,
1742+ "redcap_repeat_instrument" : ["adult_consent" ] * 5 ,
1743+ "redcap_repeat_instance" : [1 , 1 , 1 , 2 , 2 ],
1744+ }
1745+ )
1746+
1747+ result = format_data_for_redcap_operations (df )
1748+
1749+ pi_rows = result [result ["field_name" ] == "parent_involvement" ]
1750+ assert len (pi_rows ) == 2
1751+ assert set (pi_rows ["value" ]) == {"1" , "3" }
1752+
1753+ def test_non_repeated_rows_deduplicate_by_record_and_field (self ):
1754+ """Non-repeated rows (NaN instance) deduplicate on record + field_name."""
1755+ df = pd .DataFrame (
1756+ {
1757+ "record" : ["200" , "200" ],
1758+ "field_name" : ["email" , "email" ],
1759+ "value" : ["first@example.com" , "second@example.com" ],
1760+ "redcap_event_name" : ["event_1" , "event_1" ],
1761+ "redcap_repeat_instrument" : [None , None ],
1762+ "redcap_repeat_instance" : [None , None ],
1763+ }
1764+ )
1765+
1766+ result = format_data_for_redcap_operations (df )
1767+
1768+ email_rows = result [result ["field_name" ] == "email" ]
1769+ assert len (email_rows ) == 1
1770+
1771+ def test_mixed_repeated_and_non_repeated (self ):
1772+ """Records with both repeated and non-repeated fields are handled."""
1773+ df = pd .DataFrame (
1774+ {
1775+ "record" : ["300" ] * 5 ,
1776+ "field_name" : [
1777+ "dob" ,
1778+ "parent_involvement" ,
1779+ "parent_involvement" ,
1780+ "parent_involvement" ,
1781+ "email" ,
1782+ ],
1783+ "value" : ["2010-01-01" , "0" , "1" , "2" , "test@example.com" ],
1784+ "redcap_event_name" : ["event_1" ] * 5 ,
1785+ "redcap_repeat_instrument" : [
1786+ None ,
1787+ "adult_consent" ,
1788+ "adult_consent" ,
1789+ "adult_consent" ,
1790+ None ,
1791+ ],
1792+ "redcap_repeat_instance" : [None , 1 , 1 , 1 , None ],
1793+ }
1794+ )
1795+
1796+ result = format_data_for_redcap_operations (df )
1797+
1798+ # All 3 checkbox values kept (only one instance, so all kept)
1799+ pi_rows = result [result ["field_name" ] == "parent_involvement" ]
1800+ assert len (pi_rows ) == 3
1801+
1802+ # Non-repeated fields kept as single rows
1803+ assert len (result [result ["field_name" ] == "dob" ]) == 1
1804+ assert len (result [result ["field_name" ] == "email" ]) == 1
1805+
1806+ class TestFieldRenaming :
1807+ """Tests for field name rename and fan-out logic."""
1808+
1809+ def test_permission_audiovideo_1821_renamed (self ):
1810+ """Test that `permission_audiovideo_1821` is renamed."""
1811+ df = pd .DataFrame (
1812+ {
1813+ "record" : ["400" ],
1814+ "field_name" : ["permission_audiovideo_1821" ],
1815+ "value" : ["1" ],
1816+ "redcap_event_name" : ["event_1" ],
1817+ "redcap_repeat_instrument" : [None ],
1818+ "redcap_repeat_instance" : [None ],
1819+ }
1820+ )
1821+
1822+ result = format_data_for_redcap_operations (df )
1823+
1824+ assert "permission_audiovideo_1821" not in result ["field_name" ].values
1825+ assert "permission_audiovideo_participant" in result ["field_name" ].values
1826+
1827+ def test_permission_collab_1821_renamed_and_decremented (self ):
1828+ """Test that `permission_collab_1821` is renamed and decremented."""
1829+ df = pd .DataFrame (
1830+ {
1831+ "record" : ["500" ],
1832+ "field_name" : ["permission_collab_1821" ],
1833+ "value" : ["1" ],
1834+ "redcap_event_name" : ["event_1" ],
1835+ "redcap_repeat_instrument" : [None ],
1836+ "redcap_repeat_instance" : [None ],
1837+ }
1838+ )
1839+
1840+ result = format_data_for_redcap_operations (df )
1841+
1842+ collab_rows = result [result ["field_name" ] == "permission_collab" ]
1843+ assert len (collab_rows ) == 1
1844+ assert collab_rows .iloc [0 ]["value" ] == "0" # decremented from 1
0 commit comments