1- """
2- MIT License
3-
4- Copyright (c) 2019 Andrew Riha
5-
6- Permission is hereby granted, free of charge, to any person obtaining a copy
7- of this software and associated documentation files (the "Software"), to deal
8- in the Software without restriction, including without limitation the rights
9- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10- copies of the Software, and to permit persons to whom the Software is
11- furnished to do so, subject to the following conditions:
12-
13- The above copyright notice and this permission notice shall be included in all
14- copies or substantial portions of the Software.
15-
16- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22- SOFTWARE.
23-
24- """
25-
26- import os
271from unittest import TestCase
282
293import numpy as np
30- import pandas as pd
31- from pandas .api .types import is_object_dtype , is_string_dtype
32- from snps .io .reader import NORMALIZED_DTYPES
4+ from snps .testing import SNPsTestMixin , create_simulated_snp_df
335
346from lineage import Lineage
357
368
37- def get_complement (base ):
38- """Get the complement of a DNA base."""
39- if base == "A" :
40- return "T"
41- elif base == "G" :
42- return "C"
43- elif base == "C" :
44- return "G"
45- elif base == "T" :
46- return "A"
47- else :
48- return base
49-
50-
51- def complement_one_chrom (genotype ):
52- """Complement the genotype for one chromosome."""
53- if pd .isnull (genotype ):
54- return np .nan
55-
56- complement = ""
57- for base in list (genotype ):
58- complement += get_complement (base )
59- complement += genotype [1 ]
60- return complement
61-
62-
63- def complement_two_chroms (genotype ):
64- """Complement the genotype for both chromosomes."""
65- if pd .isnull (genotype ):
66- return np .nan
67-
68- complement = ""
69- for base in list (genotype ):
70- complement += get_complement (base )
71- return complement
72-
73-
74- def simulate_snps (
75- ind ,
76- chrom = "1" ,
77- pos_start = 1 ,
78- pos_max = 111700002 ,
79- pos_step = 10000 ,
80- genotype = "AA" ,
81- insert_nulls = True ,
82- null_snp_step = 101 ,
83- complement_genotype_one_chrom = False ,
84- complement_genotype_two_chroms = False ,
85- complement_snp_step = 50 ,
86- ):
87- """Simulate SNP data for an individual."""
88- ind ._build = 37
89-
90- positions = np .arange (pos_start , pos_max , pos_step , dtype = np .int64 )
91- snps = pd .DataFrame (
92- {"chrom" : chrom },
93- index = pd .Index (["rs" + str (x + 1 ) for x in range (len (positions ))], name = "rsid" ),
94- )
95- snps ["pos" ] = positions
96- snps ["genotype" ] = genotype
97-
98- if insert_nulls :
99- snps .loc [snps .iloc [0 ::null_snp_step , :].index , "genotype" ] = np .nan
100-
101- indices = snps .iloc [0 ::complement_snp_step , :].index
102- if complement_genotype_two_chroms :
103- snps .loc [indices , "genotype" ] = snps .loc [indices , "genotype" ].apply (
104- complement_two_chroms
105- )
106- elif complement_genotype_one_chrom :
107- snps .loc [indices , "genotype" ] = snps .loc [indices , "genotype" ].apply (
108- complement_one_chrom
109- )
110-
111- ind ._snps = snps
112-
113- return ind
114-
115-
116- class BaseLineageTestCase (TestCase ):
9+ class BaseLineageTestCase (SNPsTestMixin , TestCase ):
11710 def setUp (self ):
11811 self .l = Lineage ()
11912
@@ -131,120 +24,19 @@ def simulate_snps(
13124 complement_genotype_two_chroms = False ,
13225 complement_snp_step = 50 ,
13326 ):
134- """Simulate SNP data for an individual (wrapper for standalone simulate_snps)."""
135- return simulate_snps (
136- ind ,
137- chrom ,
138- pos_start ,
139- pos_max ,
140- pos_step ,
141- genotype ,
142- insert_nulls ,
143- null_snp_step ,
144- complement_genotype_one_chrom ,
145- complement_genotype_two_chroms ,
146- complement_snp_step ,
147- )
148-
149- @staticmethod
150- def create_snp_df (rsid , chrom , pos , genotype ):
151- df = pd .DataFrame (
152- {"rsid" : rsid , "chrom" : chrom , "pos" : pos , "genotype" : genotype },
153- columns = ["rsid" , "chrom" , "pos" , "genotype" ],
27+ """Simulate SNP data for an individual."""
28+ ind ._build = 37
29+ ind ._snps = create_simulated_snp_df (
30+ chrom = chrom ,
31+ pos_start = pos_start ,
32+ pos_max = pos_max ,
33+ pos_step = pos_step ,
34+ pos_dtype = np .int64 ,
35+ genotype = genotype ,
36+ insert_nulls = insert_nulls ,
37+ null_snp_step = null_snp_step ,
38+ complement_genotype_one_allele = complement_genotype_one_chrom ,
39+ complement_genotype_two_alleles = complement_genotype_two_chroms ,
40+ complement_snp_step = complement_snp_step ,
15441 )
155- df = df .astype (NORMALIZED_DTYPES )
156- df = df .set_index ("rsid" )
157- return df
158-
159- def generic_snps (self ):
160- return self .create_snp_df (
161- rsid = ["rs" + str (i ) for i in range (1 , 9 )],
162- chrom = ["1" ] * 8 ,
163- pos = list (range (101 , 109 )),
164- genotype = ["AA" , "CC" , "GG" , "TT" , np .nan , "GC" , "TC" , "AT" ],
165- )
166-
167- def assert_series_equal_with_string_dtype (self , left , right , ** kwargs ):
168- """Assert Series are equal, accepting both object and StringDtype for string data.
169-
170- In Python 3.14+, pandas infers StringDtype for string data instead of object.
171- This wrapper compares Series without strict dtype matching for string data.
172-
173- Parameters
174- ----------
175- left : pd.Series
176- First Series to compare
177- right : pd.Series
178- Second Series to compare
179- **kwargs : dict
180- Additional arguments passed to pd.testing.assert_series_equal
181- """
182- # Verify string series have string or object dtypes
183- if is_string_dtype (left .dtype ) or is_object_dtype (left .dtype ):
184- self .assertTrue (
185- is_string_dtype (right .dtype ) or is_object_dtype (right .dtype ),
186- f"Right series dtype { right .dtype } should be string/object type" ,
187- )
188- # Compare Series without strict dtype matching
189- pd .testing .assert_series_equal (left , right , check_dtype = False , ** kwargs )
190-
191- def assert_frame_equal_with_string_index (self , left , right , ** kwargs ):
192- """Assert DataFrames are equal, accepting both object and StringDtype for string columns.
193-
194- In Python 3.14+, pandas infers StringDtype for string columns/indices instead of object.
195- This wrapper validates that string columns have string types, then compares the
196- DataFrames without strict dtype matching for object/string columns.
197-
198- Parameters
199- ----------
200- left : pd.DataFrame
201- First DataFrame to compare
202- right : pd.DataFrame
203- Second DataFrame to compare
204- **kwargs : dict
205- Additional arguments passed to pd.testing.assert_frame_equal
206- """
207- # Verify index dtypes are string types if they're named 'rsid'
208- if left .index .name == "rsid" :
209- self .assertTrue (
210- is_string_dtype (left .index .dtype ),
211- f"Left index dtype { left .index .dtype } is not a string type" ,
212- )
213- if right .index .name == "rsid" :
214- self .assertTrue (
215- is_string_dtype (right .index .dtype ),
216- f"Right index dtype { right .index .dtype } is not a string type" ,
217- )
218-
219- # Verify string columns (chrom, genotype) have string dtypes
220- for col in ["chrom" , "genotype" ]:
221- if col in left .columns :
222- self .assertTrue (
223- is_string_dtype (left [col ].dtype )
224- or is_object_dtype (left [col ].dtype ),
225- f"Left column '{ col } ' dtype { left [col ].dtype } is not a string/object type" ,
226- )
227- if col in right .columns :
228- self .assertTrue (
229- is_string_dtype (right [col ].dtype )
230- or is_object_dtype (right [col ].dtype ),
231- f"Right column '{ col } ' dtype { right [col ].dtype } is not a string/object type" ,
232- )
233-
234- # Compare DataFrames without strict dtype matching for string columns
235- pd .testing .assert_frame_equal (
236- left , right , check_index_type = False , check_dtype = False , ** kwargs
237- )
238-
239- @property
240- def downloads_enabled (self ):
241- """Property indicating if downloads are enabled.
242-
243- Only download from external resources when an environment variable named
244- "DOWNLOADS_ENABLED" is set to "true".
245-
246- Returns
247- -------
248- bool
249- """
250- return True if os .getenv ("DOWNLOADS_ENABLED" ) == "true" else False
42+ return ind
0 commit comments