11import numpy as np
22from _props import aa_props
33
4- from pyaptamer .utils import is_valid_aa
4+ from pyaptamer .utils import AMINO_ACIDS , is_valid_aa
55
66
77class PSeAAC :
@@ -32,28 +32,6 @@ def __init__(self):
3232 """
3333 Initialize PSeAAC with a protein sequence.
3434 """
35- self .amino_acid = [
36- "A" ,
37- "C" ,
38- "D" ,
39- "E" ,
40- "F" ,
41- "G" ,
42- "H" ,
43- "I" ,
44- "K" ,
45- "L" ,
46- "M" ,
47- "N" ,
48- "P" ,
49- "Q" ,
50- "R" ,
51- "S" ,
52- "T" ,
53- "V" ,
54- "W" ,
55- "Y" ,
56- ]
5735
5836 # Load normalized property matrix (20x21, rows=AA, cols=NP1-NP21)
5937 self .np_matrix = aa_props (type = "numpy" , normalize = True )
@@ -86,10 +64,8 @@ def _average_aa(self, seq):
8664 from collections import Counter
8765
8866 counts = Counter (seq )
89- total = len (self .amino_acid )
90- return {
91- aa : counts .get (aa , 0 ) / total if total > 0 else 0 for aa in self .amino_acid
92- }
67+ total = len (AMINO_ACIDS )
68+ return {aa : counts .get (aa , 0 ) / total if total > 0 else 0 for aa in AMINO_ACIDS }
9369
9470 def _theta_rirj (self , ri , rj , prop_group ):
9571 """
@@ -109,8 +85,8 @@ def _theta_rirj(self, ri, rj, prop_group):
10985 float
11086 Theta value.
11187 """
112- idx_ri = self . amino_acid .index (ri )
113- idx_rj = self . amino_acid .index (rj )
88+ idx_ri = AMINO_ACIDS .index (ri )
89+ idx_rj = AMINO_ACIDS .index (rj )
11490 diffs = (
11591 self .np_matrix [idx_rj , list (prop_group )]
11692 - self .np_matrix [idx_ri , list (prop_group )]
@@ -151,7 +127,7 @@ def transform(self, protein_sequence):
151127 if not is_valid_aa (protein_sequence ):
152128 raise ValueError (
153129 "Invalid amino acid found in protein_sequence. Only "
154- f"{ '' .join (self . amino_acid )} are allowed."
130+ f"{ '' .join (AMINO_ACIDS )} are allowed."
155131 )
156132
157133 lambda_val = 30
@@ -181,7 +157,7 @@ def transform(self, protein_sequence):
181157 denominator_val = sum_all_aa_freq + (weight * sum_all_theta_val )
182158
183159 # First 20 features: normalized amino acid composition
184- aa_composition = np .array ([aa_freq [aa ] for aa in self . amino_acid ])
160+ aa_composition = np .array ([aa_freq [aa ] for aa in AMINO_ACIDS ])
185161 all_pseaac .extend (np .round (aa_composition / denominator_val , 3 ))
186162
187163 # Next 30 features: theta values
0 commit comments