-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathGSEA.py
113 lines (86 loc) · 3.58 KB
/
GSEA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import requests
import json
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import csv
import io
from pandas.core.frame import DataFrame
def perform_enrichment_analysis(gene_names):
url = 'https://maayanlab.cloud/Enrichr/addList'
payload = {
'list': (None, '\n'.join(gene_names)),
'description': (None, 'Gene List')
}
response = requests.post(url, files=payload)
if response.ok:
return response.json()
else:
raise Exception(f"Error occurred while performing enrichment analysis. Status code: {response.status_code} Error message: {response.text}")
def get_enrichment_results(data):
user_list_id = data['userListId']
url = f'https://maayanlab.cloud/Enrichr/enrich?backgroundType=KEGG_2021_Human&userListId={user_list_id}'
response = requests.get(url)
if response.ok:
return response.json()
else:
raise Exception(f"Error occurred while fetching enrichment results. Status code: {response.status_code} Error message: {response.text}")
def save_stream_to_file(stream, filepath):
# Create a Matplotlib figure from the stream
fig = plt.figure()
fig.canvas.draw()
# Save the figure to the file
fig.savefig(filepath, format='png', bbox_inches='tight')
# Close the figure to free up resources
plt.close(fig)
print(f"Image saved to {filepath}")
def plot_results(data, csv_filename):
kegg = data['KEGG_2021_Human']
# sort the results by p-value
kegg.sort(key=lambda x: x[2])
# Get the pathway names
pathways = [result[1] for result in kegg]
# Get the p-values and apply -log10 transformation
p_values = [-np.log10(result[2]) for result in kegg]
pathways = pathways[:10]
p_values = p_values[:10]
plt.cla()
stream = None
# fig, ax = plt.subplots()
# stream = io.BytesIO()
# # Plot explained variance
# ax.plot(range(pca.n_components_), np.cumsum(pca.explained_variance_ratio_))
# Create a bar plot with seaborn
# ax.figure(figsize=(10, 8))
sns.set(style="whitegrid")
ax = sns.barplot(x=p_values, y=pathways, palette="viridis", orient="h")
plt.xlabel('-log10(p-value)', fontsize=14)
plt.ylabel('Pathway', fontsize=14)
plt.title('Pathway Enrichment Analysis', fontsize=16)
plt.tight_layout()
plt.savefig('figure/GSEA.png')
# fig.savefig(stream, format='png')
# stream.seek(0)
# plt.close(fig)
# Save the pathway names and p-values to a CSV file
# with open(csv_filename, 'w', newline='') as csvfile:
# fieldnames = ['Pathway', 'P-value']
# writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
# writer.writeheader()
# for pathway, p_value in zip(pathways, p_values):
# writer.writerow({'Pathway': pathway, 'P-value': p_value})
df = DataFrame({'Pathway': pathways, 'P-value': p_values})
return stream, df
def run_gsea_analysis(gene_names_file, csv_filename):
with open(gene_names_file, 'r') as file:
gene_names = [line.strip() for line in file.readlines()]
enrichment_data = perform_enrichment_analysis(gene_names)
print(enrichment_data)
results = get_enrichment_results(enrichment_data)
print(results)
stream, df = plot_results(results, csv_filename)
return stream, df
if __name__ == "__main__":
gene_names_file = 'significant_gene.txt'
stream = run_gsea_analysis(gene_names_file, 'pathway_with_pvalues.csv')
save_stream_to_file(stream, 'tmp.png')