Skip to content

Commit c347988

Browse files
committed
Refactored code
1 parent dc78e44 commit c347988

2 files changed

Lines changed: 7 additions & 49 deletions

File tree

pyaptamer/AptaNet/pseaac.py

Lines changed: 7 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22
from _props import aa_props
33

4-
from pyaptamer.utils import is_valid_aa
4+
from pyaptamer.utils import AMINO_ACIDS, is_valid_aa
55

66

77
class PSeAAC:
@@ -32,28 +32,6 @@ def __init__(self):
3232
"""
3333
Initialize PSeAAC with a protein sequence.
3434
"""
35-
self.amino_acid = [
36-
"A",
37-
"C",
38-
"D",
39-
"E",
40-
"F",
41-
"G",
42-
"H",
43-
"I",
44-
"K",
45-
"L",
46-
"M",
47-
"N",
48-
"P",
49-
"Q",
50-
"R",
51-
"S",
52-
"T",
53-
"V",
54-
"W",
55-
"Y",
56-
]
5735

5836
# Load normalized property matrix (20x21, rows=AA, cols=NP1-NP21)
5937
self.np_matrix = aa_props(type="numpy", normalize=True)
@@ -86,10 +64,8 @@ def _average_aa(self, seq):
8664
from collections import Counter
8765

8866
counts = Counter(seq)
89-
total = len(self.amino_acid)
90-
return {
91-
aa: counts.get(aa, 0) / total if total > 0 else 0 for aa in self.amino_acid
92-
}
67+
total = len(AMINO_ACIDS)
68+
return {aa: counts.get(aa, 0) / total if total > 0 else 0 for aa in AMINO_ACIDS}
9369

9470
def _theta_rirj(self, ri, rj, prop_group):
9571
"""
@@ -109,8 +85,8 @@ def _theta_rirj(self, ri, rj, prop_group):
10985
float
11086
Theta value.
11187
"""
112-
idx_ri = self.amino_acid.index(ri)
113-
idx_rj = self.amino_acid.index(rj)
88+
idx_ri = AMINO_ACIDS.index(ri)
89+
idx_rj = AMINO_ACIDS.index(rj)
11490
diffs = (
11591
self.np_matrix[idx_rj, list(prop_group)]
11692
- self.np_matrix[idx_ri, list(prop_group)]
@@ -151,7 +127,7 @@ def transform(self, protein_sequence):
151127
if not is_valid_aa(protein_sequence):
152128
raise ValueError(
153129
"Invalid amino acid found in protein_sequence. Only "
154-
f"{''.join(self.amino_acid)} are allowed."
130+
f"{''.join(AMINO_ACIDS)} are allowed."
155131
)
156132

157133
lambda_val = 30
@@ -181,7 +157,7 @@ def transform(self, protein_sequence):
181157
denominator_val = sum_all_aa_freq + (weight * sum_all_theta_val)
182158

183159
# First 20 features: normalized amino acid composition
184-
aa_composition = np.array([aa_freq[aa] for aa in self.amino_acid])
160+
aa_composition = np.array([aa_freq[aa] for aa in AMINO_ACIDS])
185161
all_pseaac.extend(np.round(aa_composition / denominator_val, 3))
186162

187163
# Next 30 features: theta values

pyaptamer/AptaNet/test_pseaac.py

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,24 +4,6 @@
44
from pseaac import PSeAAC
55

66

7-
def _normalize_properties(property_dicts):
8-
"""
9-
Takes multiple amino acid property dictionaries and returns their
10-
normalized versions.
11-
Normalization: (value - mean) / std deviation
12-
Returns a list of normalized dictionaries in the same order.
13-
"""
14-
normalized = []
15-
for prop in property_dicts:
16-
values = list(prop.values())
17-
mean_val = sum(values) / len(values)
18-
std_val = (sum((v - mean_val) ** 2 for v in values) / len(values)) ** 0.5
19-
normalized.append(
20-
{aa: round((val - mean_val) / std_val, 3) for aa, val in prop.items()}
21-
)
22-
return normalized
23-
24-
257
def test_normalized_values():
268
"""
279
Test that normalized property matrix matches expected normalized values.

0 commit comments

Comments
 (0)