-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgen_scores.py
115 lines (93 loc) · 4.18 KB
/
gen_scores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os
import json
import mysql.connector
import time
start_time = time.time()
from prompt import get_relationship_summary_and_score
connection = mysql.connector.connect(host='localhost',
port='3306',
user='root',
password='XXXXXXXX')
cursor = connection.cursor()
cursor.execute("USE `hbdb2`;")
tables_dir_name = ['concept_abnormality_metadata', 'concept_chemical_metadata', 'concept_molecular function_metadata', 'concept_gene_metadata', 'concept_location_metadata', 'concept_animal model_metadata']
tables = ['concept_abnorm_metadata', 'concept_chemical_metadata', 'concept_function_metadata', 'concept_gene_metadata', 'concept_location_metadata', 'concept_model_metadata']
# compound_id = 28
# all compound: 1 to 1052
for compound_id in range(1, 1052):
print(f"compound_id = {compound_id}\n")
for idx, table in enumerate(tables):
print(f"idx = {idx}\n")
print(table)
cursor.execute(f"SELECT * FROM {table} WHERE `compound_id`={compound_id} AND `is_related`=1;")
ret = cursor.fetchall()
cursor.execute(f"SELECT `name` FROM `compounds` WHERE `id`={compound_id};")
compound_data = cursor.fetchall()
if compound_data == []:
print("can't find compound name\n")
continue
term_a = compound_data[0][0]
for i in ret:
concept_id = i[2]
if table == 'concept_abnorm_metadata':
reference_id = i[7]
paragraph = i[5]
sentance = i[6]
else:
reference_id = i[3]
paragraph = i[6]
sentance = i[7]
if paragraph is not None and len(paragraph) > 20:
paragraph = paragraph[:20]
else:
paragraph = paragraph or ""
print(f"concept id = {concept_id}\n")
cursor.execute(f"SELECT `concept_name` FROM `concepts` WHERE `id`={concept_id};")
concept_data = cursor.fetchall()
# print(concept_data)
if concept_data == []:
print("can't find concept name\n")
continue
term_b = concept_data[0][0]
print(f"now in {table}\n{compound_id}\n{concept_id}\n{reference_id}\n\n")
query = """
SELECT *
FROM `sentences`
WHERE `content` LIKE %s AND `reference_id` = %s;
"""
cursor.execute(query, (sentance, reference_id))
k = cursor.fetchall()
if k == []:
print("No results found\n")
start_idx = k[0][0] - 3
end_idx = k[0][0] + 3
cursor.execute(f"SELECT `content` FROM `sentences` WHERE `id` BETWEEN {start_idx} AND {end_idx};")
kk = cursor.fetchall()
context=""
for j in kk:
context += j[0] + ' '
# print(f"word_a = {word_a}\nword_b = {word_b}\ncontext = {context}\n")
# generate summary
llm_response = get_relationship_summary_and_score(context, term_a, term_b)
parsed_response = json.loads(llm_response)
response_with_context = {
"context": context,
"term_A": term_a,
"term_B": term_b,
"llm_generation": parsed_response,
}
# initialize 6 tables
for table_name in tables_dir_name:
save_path = f"./all_results/{compound_id}_{term_a}/{table_name}/"
os.makedirs(os.path.dirname(save_path), exist_ok=True)
save_path = f"./all_results/{compound_id}_{term_a}/{tables_dir_name[idx]}/{reference_id}/{term_a}_{term_b}_{paragraph}.json"
os.makedirs(os.path.dirname(save_path), exist_ok=True)
with open(save_path, "w") as file:
json.dump(response_with_context, file, indent=4)
# print(llm_response)
cursor.close()
connection.close()
end_time = time.time()
execution_time = end_time - start_time
print(f"Results saved to {save_path}")
print(f"Total execution time: {execution_time:.2f} seconds")