9
9
from unittest import TestCase , main
10
10
import os
11
11
import pandas as pd
12
- import numpy as np
13
12
from q2_qemistree import get_classyfire_taxonomy
14
13
15
14
16
15
class TestClassyfire (TestCase ):
17
16
def setUp (self ):
18
- THIS_DIR = os . path . dirname ( os . path . abspath ( __file__ ))
19
- no_smiles = os . path . join ( THIS_DIR , 'data/feature_data_no_smiles.txt' )
20
- self .no_smiles = pd .read_csv ( no_smiles , sep = ' \t ' )
21
- self . no_smiles . set_index ( 'label' )
22
- smiles = os . path . join ( THIS_DIR , 'data/feature_data_smiles.txt' )
23
- self . smiles = pd . read_csv ( smiles , sep = ' \t ' )
24
- self . smiles . set_index ( 'label' )
17
+ self . no_smiles = pd . DataFrame ( index = [ 'a' , 'b' , 'c' ], data = [ 1 , 2 , 3 ],
18
+ columns = [ '#featureID' ] )
19
+ self .smiles = pd .DataFrame ( index = [ 'a' , 'b' , 'c' ], data = [
20
+ [ 'missing' , 'missing' ],
21
+ [ ' O=C(O)[C@@H](N)Cc1ccccc1' , 'missing' ],
22
+ [ 'missing' , 'CC(=NC(=O)CC(=NC(=O)C)OOC(=O)C)O' ]],
23
+ columns = [ 'csi_smiles' , 'ms2_smiles' ] )
25
24
self .nan_smiles = pd .DataFrame (index = ['a' , 'b' , 'c' ],
26
- data = [[np . nan , np . nan ],
27
- [np . nan , np . nan ],
28
- [np . nan , np . nan ]],
25
+ data = [['missing' , 'missing' ],
26
+ ['missing' , 'missing' ],
27
+ ['missing' , 'missing' ]],
29
28
columns = ['csi_smiles' , 'ms2_smiles' ])
30
29
self .mal_smiles = pd .DataFrame (index = ['a' , 'b' ],
31
- data = [[np . nan , 'foo' ],
32
- ['bar' , np . nan ]],
30
+ data = [['missing' , 'foo' ],
31
+ ['bar' , 'missing' ]],
33
32
columns = ['csi_smiles' , 'ms2_smiles' ])
34
33
self .levels = set (['kingdom' , 'superclass' , 'class' , 'subclass' ,
35
- 'direct_parent' , 'structure_source' ])
34
+ 'direct_parent' , 'structure_source' ])
36
35
37
36
def test_no_smiles (self ):
38
37
msg = ('Feature data table must contain the columns `csi_smiles` '
@@ -55,7 +54,7 @@ def test_classyfire_output(self):
55
54
classified = get_classyfire_taxonomy (self .smiles )
56
55
classified_mols = classified [classified ['kingdom' ] != 'unclassified' ]
57
56
self .assertTrue (pd .isna (classified_mols ).shape , 0 )
58
- self .assertTrue (classified_mols .loc [1 ,
57
+ self .assertTrue (classified_mols .loc ['b' ,
59
58
'kingdom' ] == 'Organic compounds' )
60
59
self .assertTrue ((self .levels .issubset (set (classified .columns ))))
61
60
0 commit comments