Skip to content

Commit a4285b7

Browse files
committed
minor changes
1 parent 835789f commit a4285b7

File tree

2 files changed

+21
-9
lines changed

2 files changed

+21
-9
lines changed

lires/core/textUtils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,9 +282,20 @@ async def retrieveRelevantSections(
282282
query_text = query_text.replace("\n", " ")
283283
query_vec = iconn.featurize(query_text)
284284

285+
286+
def __containsNoneEnglish(sentence):
287+
import re
288+
non_english_regex = re.compile(r'[^a-zA-Z0-9]+')
289+
return bool(non_english_regex.findall(sentence))
290+
285291
src_vec_dict: dict[str, list[float]] = {}
286292
for sentence in sentences:
287293
await asyncio.sleep(0)
294+
# TODO: replace this
295+
# ignore sentences with none english words
296+
if __containsNoneEnglish(sentence):
297+
continue
298+
288299
src_vec_dict[sentence] = iconn.featurize(sentence)
289300

290301
# compute the similarity

test/test_info_retrive.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def __call__(self, input_str: str, verbose: bool = True):
2626
)
2727
ans = ""
2828
for t in res:
29-
if verbose: print(t, end = "")
29+
if verbose: print(t, end = "", flush=True)
3030
ans += t
3131
self.conv_dict["conversations"].append((
3232
"user", input_str
@@ -40,6 +40,7 @@ def extractAction(ans: str):
4040
"""
4141
Ask LLM to summarize the action into JSON format.
4242
"""
43+
global MODEL
4344

4445
system = "You are an AI that is responsible for summarizing some words into JSON format. "
4546

@@ -58,14 +59,14 @@ def extractAction(ans: str):
5859
"-----\n"
5960
"Here is the speaking, please summarize it in the above mentioned json format: {}".format(ans),
6061
conv_dict=conv_dict,
61-
model_name="gpt-3.5-turbo-16k"
62+
model_name=MODEL
6263
# model_name="stabilityai/StableBeluga-7B"
6364
)
6465
ret = ""
6566
for t in res:
66-
print(t, end = "")
67+
print(t, end = "", flush=True)
6768
ret += t
68-
return ret.replace('"', "'")
69+
return ret.replace('"', "'").replace('\_', '_')
6970

7071
def searchForInfo(query: str):
7172
print("\nSearching for information: {}{}".format(BCOLORS.GREEN, query), end=BCOLORS.ENDC + "\n")
@@ -113,15 +114,14 @@ def refineSelectedSections(question: str, related_sections: str) -> str:
113114
("user", "I'm searching for the answer for a question from a literature database, you should help me pick the most relavent search result from the searched sections."),
114115
("assistant", "Sure! Please provide me with the sections you have searched so that I can assist you in selecting the most relevant ones.")
115116
]
116-
prompt = "I'm searching for the answer for a question from a literature database, you should help me pick the most relavent search result from the searched sections."
117+
prompt = "I'm searching for the answer for a question from a literature database, you should help me pick the most relavent search result from the searched sections.\n" \
118+
"For your reference, here is the question that I'm searching: {} \n".format(question) + "\n" \
117119
"Please help me pick at most 5 of the most relevant sections from the following searched sections: \n"\
118120
"---\n"\
119-
"\n".join(related_sections) + \
121+
"\n".join(['- ' + r for r in related_sections]) + \
120122
"---\n"\
121-
"For your reference, here is the question that I'm searching: {} \n".format(question) + "\n" \
122123
"You can slightly refine each section by adding or removing some words to make it better answer the question, but you shouldn't change the meaning of the section. \n" \
123124
"You answer should not contain anything else other than explicitly listing the selected informations. \n"
124-
# breakpoint()
125125

126126
ai = AI(iconn)
127127
ai.conv_dict = {
@@ -143,7 +143,8 @@ def refineSelectedSections(question: str, related_sections: str) -> str:
143143
# MODEL: ChatStreamIterType = "stabilityai/StableBeluga-7B"
144144
# MODEL: ChatStreamIterType = "Open-Orca/LlongOrca-7B-16k"
145145
# MODEL: ChatStreamIterType = "gpt-3.5-turbo-16k"
146-
MODEL: ChatStreamIterType = "LOCAL"
146+
MODEL: ChatStreamIterType = "DEFAULT"
147+
# MODEL: ChatStreamIterType = "LOCAL"
147148

148149
question = sys.argv[1]
149150

0 commit comments

Comments
 (0)