-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy paths5_vis.py
More file actions
138 lines (114 loc) · 10.3 KB
/
Copy paths5_vis.py
File metadata and controls
138 lines (114 loc) · 10.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import sys
import scanpy as sc
adata=sc.read_h5ad('d6_GSE202051_celltype.h5ad')
sys.path.insert(0, '/lustre/home/jydeng/single-cell/codes')
from util import *
sc.set_figure_params(dpi=200, figsize=(10,10), facecolor="white")
marker_dict = get_marker_gene_dict()
for ct in marker_dict.keys():
sc.tl.score_genes(adata, marker_dict[ct], score_name='score_' + ct)
sc.pl.umap(adata, color=['score_' + ct for ct in marker_dict.keys()], cmap='Reds', ncols=3, show=False)
save_fig_to_path('scores_for_clusters.png')
# adata.obs.filter()
'''
Seems not a good marker:
GCG
Seems a good marker:
B cell: MS4A1, BLK
'''
manual_ann=dict()
annotate(manual_ann, 'Acinar', ['18', '24']) # PRSS1, REG3A, AMY2B, REG1B, CTRB1, CTRB2
annotate(manual_ann, 'B cell', 37) #MS4A1, BANK1, ABCB4 (part)
annotate(manual_ann, 'T cell', 9) # CD69, PTPRC, CD2, IL7R, CD247
annotate(manual_ann, 'Endothelial', [2, 31, 34, 36])
annotate(manual_ann, 'Epithelial', [0, 16, 20, 22, 11, 17, 21, 25, 28, 54, 38, 42]) # ELF3, SLC15A1, KRT19, ABCA14, MACC1, EPCAM, CDH1 看不清!
annotate(manual_ann, 'Smooth muscle', [12, 19, 27]) #RGS5
#不知道怎么分:ductal
annotate(manual_ann, 'Myeloid', 3) #ITGAX, ITGAM, CD163
annotate(manual_ann, 'Schwann', 35) #CDH19, PMP22
adata.obs["manual_ann"] = adata.obs['leiden_r2'].map(manual_ann)
sc.pl.umap(adata, color=["manual_ann"], legend_fontsize="small", legend_loc="on data", show=False, save='manual_ann.png')
fil = manual_ann.keys()
unassigned = []
for i in adata.obs['leiden_r2'].unique():
if i not in fil:
unassigned.append(i)
# plot
sc.pl.umap(adata, color='leiden_r2', groups=unassigned, show=False, save='Unassigned.png') #!!!!
['5', '14', '1', '32', '8', '7', '30', '6', '10', '4', '26', '15', '23', '41', '13', '39', '29', '40', '33']
# 5和epi完全重叠,重新画一下marker
# 右下角也重叠,可以重新做
for ct in unassigned:
sc.pl.umap(adata, color='leiden_r2', show=False, legend_loc='on data', legend_fontsize='small', groups=[ct], save=f'_{ct}.png')
adata = adata[adata.obs['leiden_r2'].isin(['5', '14', '1', '32', '8', '7', '30', '6', '10', '4', '26', '15', '23', '41', '13', '39', '29', '40', '33'])]
subset = adata[adata.obs['leiden_r2'].isin(['26', '30', '32', '33'])]
sc.pp.neighbors(subset)
sc.tl.umap(subset)
sc.pl.umap(subset, show=False, color='leiden_r2', save='_upper.png', legend_loc='on data', s=5)
subset.write_h5ad('subset1_upper.h5ad') # 9k? obs
subset = adata[adata.obs['leiden_r2'].isin(['1', '4', '7', '8',' 15', '23', '39'])]
sc.pp.neighbors(subset)
sc.tl.umap(subset)
sc.pl.umap(subset, show=False, color='leiden_r2', save='_right.png', legend_loc='on data', s=5)
subset.write_h5ad('subset2_right.h5ad') # 50261 obs
subset = adata[adata.obs['leiden_r2'].isin(['40', '29', '41', '14', '15', '6', '13', '10', '5'])] # 48561
sc.pp.neighbors(subset)
sc.tl.umap(subset)
sc.pl.umap(subset, show=False, color='leiden_r2', save='_left.png', legend_loc='on data', s=6)
subset.write_h5ad('subset3_left.h5ad') # 48561 obs
# the upper part should be endocrine
# let's see the marker genes
marker_dict = {
# 'Pericyte': ['CSPG4', 'ACTA2', 'CD146', 'NES', 'PDGFRb'], # CD146, PDGFRb不在
'Beta cell': ['ADCYAP1', 'NPTX2', 'NPY', 'MAFA', 'LIFR', 'CDKN1C', 'NKX6-1', 'BMPR1A', 'ITGA1', 'MNX1', 'IAPP', 'TFCP2L1', 'PDX1', 'PIR', 'SIX3', 'TGFBR3', 'INS', 'DLK1', 'SYT13', 'BMP5', 'SMAD9'],
'Beta-like cell': ['CD9'],
'Endocrine cell': ['CHGA', 'GCG', 'CHGB', 'PPY', 'INS', 'ISL1', 'SST', 'IAPP'],
'Alpha cell': ['GCG', 'ARX'],
'Delta cell': ['SST', 'PPY'],
'Non-beta endocrine cell': ['GCG', 'PPY', 'PDK4', 'SOD2', 'SST'],
'Pancreatic polypeptide cell': ['SPP1', 'FOLR1', 'PPA1']
}
# label the upper part
import scanpy as sc
adata=sc.read_h5ad('subset1_upper.h5ad')
for ct in marker_dict:
sc.tl.score_genes(adata, marker_dict[ct], score_name='score_' + ct)
sc.pl.umap(adata, color=['score_' + ct for ct in marker_dict.keys()], cmap='Reds', ncols=3, show=False, save='_upper_endocrine.png')
annotate(manual_ann, 'Endocrine', ['26', '30', '32', '33', '40'])
for ct in marker_dict.keys():
fig = sc.pl.umap(
adata, color=marker_dict[ct], vmin=0, vmax="p99", sort_order=False, frameon=False, cmap="Reds", show=False, save=f'_genes_{ct}.png'
)
layer2_annotation = dict()
annotate(layer2_annotation, 'Alpha cell', 30)
annotate(layer2_annotation, 'Beta cell', 26)
annotate(layer2_annotation, 'Delta cell', 32) # 32 还可以分成两个
cluster33 = ['LINC00907', 'GRM7-AS3', 'USH2A', 'KCNB2', 'RIMBP2', 'TMEM132D'] # 看不懂这是啥
adata=sc.read_h5ad('subset2_right.h5ad')
marker_dict = {
'Acinar cell': ['CTRB1', 'CTRB2', 'GCG', 'PPY', 'AMY2B', 'REG1B', 'PDK4', 'SOD2', 'PRSS1', 'SST', 'REG3A'], 'Alpha cell': ['GCG', 'ARX'], 'Antigen presentation cancer-associated fibroblast': ['SAA3P', 'CD74'], 'B cell': ['ABCB4', 'REG1A', 'AIDA', 'CD52', 'CD79B', 'TTR', 'AFF4', 'ABCB9', 'CD19', 'IAPP', 'PPY', 'INS', 'SST', 'IRF8', 'BLK', 'MS4A1', 'AIM2', 'GCG', 'CD79A', 'BANK1'], 'Basal cell': ['ARL13B', 'COL4A1'], 'Beta cell': ['ADCYAP1', 'NPTX2', 'NPY', 'MAFA', 'LIFR', 'CDKN1C', 'NKX6-1', 'BMPR1A', 'ITGA1', 'MNX1', 'IAPP', 'TFCP2L1', 'PDX1', 'PIR', 'SIX3', 'TGFBR3', 'INS', 'DLK1', 'SYT13', 'BMP5', 'SMAD9'], 'Beta-like cell': ['CD9'], 'Cancer cell': ['MYC', 'KRAS', 'TP53', 'KARS1'], 'Delta cell': ['SST', 'PPY'], 'Dendritic cell': ['CD1C', 'FCER1A', 'CD1A', 'CD34'], 'Ductal cell': ['MUC6', 'ANXA4', 'PDX1', 'AMBP', 'SCTR', 'BICC1', 'KRT8', 'CFTR', 'SOX9', 'HNF1B', 'BMPR1A', 'MMP7', 'KRT19', 'KRT7', 'SLC4A4'], 'Ductal epithelial cell': ['CLDN1', 'S100A14'], 'Emt-like cell': ['ZEB1', 'SNAI2', 'CDH2'], 'Endocrine cell': ['CHGA', 'GCG', 'CHGB', 'PPY', 'INS', 'ISL1', 'SST', 'IAPP'], 'Endothelial cell': ['SELE', 'CDH5', 'VWF', 'CD36', 'FLT1', 'ADGRL2', 'ICAM1', 'CLDN5', 'ECSCR', 'VEGFA', 'PLVAP', 'PECAM1', 'ACYP1', 'B2M', 'SLCO2A1', 'KDR'], 'Epithelial cell': ['SLC15A1', 'KRT19', 'DSC2', 'ANKRD30A', 'CA2', 'ADGB', 'CDH1', 'ELF3', 'EPCAM', 'BACE2', 'TJP2', 'MUC1', 'MACC1', 'SFTPB', 'SFTPC', 'ABCB10', 'MYLK', 'ABCA13'], 'Epithelial ductal cell': ['KRT19', 'CDH1', 'AQP5', 'SOX9'], 'Fibroblast': ['NECTIN1', 'SPARC', 'THY1', 'FN1', 'ACTA2', 'LRP1', 'COL1A1', 'LUM', 'C5AR2', 'DCN', 'FAP', 'COL3A1', 'PTPN13'], 'Follicular b cell': ['CR2', 'FCER2', 'CD19', 'CD22'], 'Gamma cell': ['PPY', 'SST'], 'Hematopoietic stem cell': ['CD34'], 'Immune cell': ['CD8A', 'PTPRC', 'ITGAM'], 'Inflammatory cancer-associated fibroblast': ['ACTA2', 'CXCL12', 'IL6'], 'Lymphatic endothelial cell': ['FLT4', 'PROX1', 'LYVE1', 'PDPN'], 'Lymphocyte': ['CD3D', 'CD3G', 'IL7R'], 'Macrophage': ['FCGR1A', 'CD86', 'CSF1R', 'HLA-DRB1', 'AIF1', 'CD68', 'HLA-DPA1', 'ZEB2', 'CSF2RA', 'CD74', 'HLA-DRA', 'CD14', 'PTPRC'], 'Mast cell': ['CMA1', 'CAVIN2', 'GCSAML', 'KIT', 'CAPN3', 'ASIC4', 'ADIRF', 'ENPP3', 'MAOB', 'SLC18A2', 'BACE2', 'MAML1', 'CADPS', 'CDK15'], 'Mesenchymal cell': ['INHBA', 'VIM', 'FAP', 'KDELR3', 'SERPINE2', 'GPC6'], 'Monocyte': ['VCAN', 'S100A9', 'CD68', 'FCN1', 'S100A8'], 'Monocyte derived dendritic cell': ['CLEC10A', 'CD1C', 'FCER1A', 'CD1E'], 'Monocyte-derived macrophage': ['CXCL8', 'CD68', 'APOE', 'CCL3', 'CD63', 'CCL2', 'MARCO'], 'Myeloid cell': ['FCER1G', 'TYROBP', 'CD68', 'ITGAX', 'AIF1', 'ITGAM', 'CD14', 'PTPRC', 'CD163'], 'Myofibroblastic cancer-associated fibroblast': ['ACTA2', 'TAGLN'], 'Natural killer t(nkt) cell': ['NCAM1'], 'Non-beta endocrine cell': ['GCG', 'PPY', 'PDK4', 'SOD2', 'SST'], 'Pancreatic polypeptide cell': ['SPP1', 'FOLR1', 'PPA1'], 'Pancreatic progenitor cell': ['NKX6-1'], 'Pancreatic stellate cell': ['COL1A2'], 'Pro-inflammatory macrophage': ['IL1B'], 'Progenitor cell': ['GP2', 'PDX1', 'SOX9', 'HNF1B', 'TTYH1', 'PTF1A'], 'Proliferative cell': ['MKI67'], 'Proliferative ductal cell': ['CDK1', 'AURKA', 'PLK1'], 'Schwann cell': ['PMP22', 'S100B', 'SCN7A', 'CDH19', 'CRYAB'], 'Smooth muscle cell': ['NOTCH3', 'CSRP2', 'RGS5'], 'Stellate cell': ['ADIRF', 'ACTA2', 'RGS5', 'PDGFRB', 'COL1A2'], 'T cell': ['CD8A', 'CD4', 'CD69', 'CD247', 'IL7R', 'HHLA2', 'CD2', 'CD3G', 'CXCR4', 'CD3D', 'PTPRC', 'CD3E'], 'Tissue resident macrophage': ['AMY2A', 'PRSS1', 'CELA3A', 'INS'], 'Undifferentiated pancreatic progenitor cell': ['SPP1'], 'White blood cell': ['PTPRC'], 'Astrocyte': ['ALDH1L1', 'ALDOC', 'SLC1A3', 'GFAP', 'AGT', 'AGXT2L1'], 'Erythrocyte': ['GYPA', 'ALAS2', 'HBB', 'HBE1', 'HBA1', 'HBG1', 'CA1'], 'Gmp': ['AREG', 'ANXA1', 'ASPM', 'CDKN3', 'KIT', 'CD38', 'CLSPN', 'MCM10', 'SDC4', 'RMI2', 'ADK', 'APPL1', 'AP3S1', 'CD123', 'ALDH4A1', 'MUCB2', 'APLP2', 'DEPDC7'], 'Hsc': ['ABCG2', 'PROM1', 'KIT', 'THY1', 'VCAM1', 'CD41', 'CMAH', 'ITGA5', 'ACE', 'ALDH1A1', 'BMI1', 'CD34', 'CD164'], 'Macro/mono/dc': ['BHLHE40', 'CREM', 'CSF1R', 'ADGRE2', 'CD1A', 'CD68', 'MRC1', 'CD93', 'CD209', 'ICAM4', 'CD14', 'ACPP', 'CCL18', 'ADGRE3', 'CD83', 'ACSL3'], 'Malignant cell': ['KRT8', 'KLK3', 'KRT18', 'EPCAM', 'FOLH1', 'KRT19'], 'Neuron': ['ENO2', 'MAP2', 'STMN2', 'DLG4', 'RBFOX3', 'TUBB3', 'CSF3'], 'Neutrophil': ['CXCL8', 'LGALS13', 'ADGRG3', 'AQP9', 'MNDA', 'FCGR3B', 'NFE4', 'IL5RA', 'CSF3R', 'G0S2', 'BTNL8', 'ANXA3', 'USP10'], 'Nk cell': ['KLRD1', 'KLRB1', 'NKG7', 'CD247', 'KLRK1', 'XCL1', 'XCL2', 'KLRC1', 'FCGR3A', 'NCR1', 'GZMB', 'NCR3'], 'Oligodendrocyte': ['MBP', 'OLIG2', 'MOG', 'MAG', 'PLP1', 'OLIG1', 'SOX10', 'PDGFRA'], 'Plasma cell': ['BRSK1', 'AC026202.3', 'PARM1', 'JSRP1', 'TAS1R3', 'LINC00582', 'MZB1'], 'Progenitor': ['FLT3', 'CD38', 'CD90', 'MME', 'CAR', 'CD123', 'ALDH', 'CASR', 'KDR'], 'Cdc1': ['CLEC9A'], 'Cdc2': ['CLEC10A']
}
# pancreatic stellate, mesenchymal, myofibroblast, CAF
marker_dict = {
'Fibroblast': ['MFAP5', 'DCN', 'WNT5A', 'COL1A1', 'COL1A2', 'LUM', 'ACTA2', 'SPARC'],
'CAF': ['PDGFRA', 'COL1A1', 'THY1', 'FAP', 'COL8A1', 'THBS2', 'CTHRC1', 'ENG', 'VIM','CDH11'],
'Myofibroblast': ['ACTA2', 'TAGLN', 'FAP', 'MMP11', 'POSTN', 'CTHRC1']
}
for ct in marker_dict:
sc.tl.score_genes(adata, marker_dict[ct], score_name='score_' + ct)
sc.pl.umap(adata, color=['score_' + ct for ct in marker_dict.keys()], cmap='Reds', ncols=3, save='_right_fibro.png')
annotate(manual_ann, 'Fibroblast', ['1', '4', '7', '8',' 15', '23', '39'])
adata.obs["manual_ann"] = adata.obs['leiden_r2'].map(manual_ann)
extra_ann = {
'40': 'Endocrine',
'41': 'Schwann',
'15': 'Fibroblast',
'5': 'Epithelial',
'6': 'Epithelial',
'10': 'Epithelial',
'13': 'Epithelial',
'14': 'Epithelial',
'29': 'Epithelial',
}
manual_ann.update(extra_ann)
adata.obs['manual_ann'] = adata.obs['leiden_r2'].map(manual_ann)