Skip to content

Commit 6edde74

Browse files
committed
add cpg related index to genome
1 parent d2e3e49 commit 6edde74

2 files changed

Lines changed: 43 additions & 2 deletions

File tree

scprinter/genome.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from __future__ import annotations
33

44
import os.path
5+
import pickle
56
from pathlib import Path
67

78
import gffutils
@@ -278,6 +279,41 @@ def fetch_bias_bw(self, verify=False):
278279

279280
return bias_bw
280281

282+
def fetch_cpg_index(self):
283+
fa_path = str(self.fetch_fa())
284+
cpg_index = fa_path + "_cpg_index.pkl"
285+
if not os.path.exists(cpg_index):
286+
print("Building CpG index...")
287+
cpg_index_all, cpg_num, cpg_coord = build_CpG_index(self)
288+
with open(cpg_index, "wb") as f:
289+
pickle.dump((cpg_index_all, cpg_num, cpg_coord), f)
290+
else:
291+
with open(cpg_index, "rb") as f:
292+
cpg_index_all, cpg_num, cpg_coord = pickle.load(f)
293+
return cpg_index_all, cpg_num, cpg_coord
294+
295+
296+
def build_CpG_index(genome):
297+
fa_path = Path(genome.fetch_fa())
298+
fasta = Fasta(fa_path, as_raw=True)
299+
cpg_index_all = {}
300+
cpg_num = {}
301+
chroms_all = set(genome.chrom_sizes.keys())
302+
cpg_coord = {}
303+
for chrom in chroms_all:
304+
cpg_index_all[chrom] = {}
305+
cpg_coord[chrom] = []
306+
seq = fasta[chrom][:].upper()
307+
count = 0
308+
for i in trange(len(seq) - 1):
309+
if seq[i : i + 2] == "CG":
310+
cpg_index_all[chrom][i + 1] = count
311+
cpg_coord[chrom].append(i + 1)
312+
count += 1
313+
cpg_num[chrom] = count
314+
fasta.close()
315+
return cpg_index_all, cpg_num, cpg_coord
316+
281317

282318
def predict_genome_tn5_bias(
283319
fa_file,

scprinter/seq/scripts/evaluation_model.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ def main(
254254
unfinished_id_strs = []
255255
for id, id_str in zip(ids, id_strs):
256256
# id_str = "-".join([str(x) for x in id])
257+
257258
if os.path.exists(
258259
os.path.join(
259260
save_dir,
@@ -426,11 +427,15 @@ def main(
426427
id_str = id_strs[index]
427428
# id_str = "-".join([str(i) for i in id]) if id[0] is not None else None
428429
bar.set_description(f"working on {id_str}")
430+
filename = f"model_{id_str}." if id[0] is not None else ""
431+
filename += f"hypo.{wrapper}.{method}{extra}.{decay}."
432+
if (id[0] is None) and ((start != 0) or (end != dataset.summits.shape[0])):
433+
filename += f"{start}-{end}."
434+
filename += "npz"
429435
if os.path.exists(
430436
os.path.join(
431437
save_dir,
432-
(f"model_{id_str}." if id[0] is not None else "")
433-
+ f"hypo.{wrapper}.{method}{extra}.{decay}.npz",
438+
filename,
434439
)
435440
) and (not overwrite):
436441
if verbose:

0 commit comments

Comments
 (0)