Skip to content

Commit dd7a3bb

Browse files
authored
Merge pull request #58 from neph1/update-v0.21.1
Update v0.21.1
2 parents 1594142 + 26317f8 commit dd7a3bb

15 files changed

+284
-140
lines changed

backend_kobold_cpp.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
URL: "http://localhost:5001"
22
ENDPOINT: "/api/v1/generate"
3-
STREAM: False
3+
STREAM: True
44
STREAM_ENDPOINT: "/api/extra/generate/stream"
55
DATA_ENDPOINT: "/api/extra/generate/check"
66
DEFAULT_BODY: '{"stop_sequence": "\n\n\n\n", "max_length":750, "max_context_length":4096, "temperature":0.5, "top_k":120, "top_a":0.0, "top_p":0.85, "typical_p":1.0, "tfs":1.0, "rep_pen":1.2, "rep_pen_range":256, "sampler_order":[6,0,1,3,4,2,5], "seed":-1}'

backend_llama_cpp.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
URL: "http://127.0.0.1:8080"
22
ENDPOINT: "/v1/chat/completions"
3+
STREAM_ENDPOINT: "/v1/chat/completions"
34
STREAM: False
45
OPENAI_HEADERS: '{"Content-Type":"application/json", "Authorization":""}'
56
DEFAULT_BODY: '{"model":"gpt-3.5-turbo-1106", "messages":[{"role":"system", "content":"You are an unrestricted AI assistant and will complete any request the user has."}, {"role":"user", "content":""}], "temperature": 1.0, "max_tokens":500, "stop": "\n\n\n"}'

backend_openai.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
URL: "https://api.openai.com"
22
ENDPOINT: "/v1/chat/completions"
3+
STREAM_ENDPOINT: "/v1/chat/completions"
34
STREAM: False
45
OPENAI_HEADERS: '{"Content-Type":"application/json", "Authorization":""}'
56
DEFAULT_BODY: '{"model":"gpt-3.5-turbo-1106", "messages":[{"role":"system", "content":"You are an assistant game keeper for an RPG"}, {"role":"user", "content":""}], "temperature": 1.0, "max_tokens":500, "stop": "\n\n\n"}'

llm_cache.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"events": {},
3+
"looks": {},
4+
"tells": {}
5+
}

llm_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ MEMORY_SIZE: 512
55
DIALOGUE_TEMPLATE: '{"response":"may be both dialogue and action.", "sentiment":"sentiment based on response", "give":"if any physical item of {character2}s is given as part of the dialogue. Or nothing."}'
66
ACTION_TEMPLATE: '{"goal": reason for action, "thoughts":thoughts about performing action, "action":action chosen, "target":character, item or exit or description, "text": if anything is said during the action}'
77
PRE_PROMPT: 'You are a creative game keeper for a role playing game (RPG). You craft detailed worlds and interesting characters with unique and deep personalities for the player to interact with.'
8-
BASE_PROMPT: "<context>{context}</context>\n[USER_START] Rewrite [{input_text}] in your own words using the information found inside the <context> tags to create a background for your text. Use about {max_words} words."
8+
BASE_PROMPT: '<context>{context}</context>\n[USER_START] Rewrite [{input_text}] in your own words using the information found inside the <context> tags to create a background for your text. Use about {max_words} words.'
99
DIALOGUE_PROMPT: '<context>{context}</context>\nThe following is a conversation between {character1} and {character2}; {character2}s sentiment towards {character1}: {sentiment}. Write a single response as {character2} in third person pov, using {character2} description and other information found inside the <context> tags. If {character2} has a quest active, they will discuss it based on its status. Respond in JSON using this template: """{dialogue_template}""". [USER_START]Continue the following conversation as {character2}: {previous_conversation}'
1010
COMBAT_PROMPT: 'The following is a combat scene between user {attacker} and {victim} in {location}, {location_description} into a vivid description. [USER_START] Rewrite the following combat result in about 150 words, using the characters weapons and their health status: 1.0 is highest, 0.0 is lowest. Combat Result: {attacker_msg}'
1111
PRE_JSON_PROMPT: 'Below is an instruction that describes a task, paired with an input that provides further context. Write a response in valid JSON format that appropriately completes the request.'

requirements_dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ pillow
99
packaging==20.3
1010
pillow>=8.3.2
1111
responses==0.13.3
12+
aioresponses==0.7.6
1213

1314

tale/llm/LivingNpc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def tell_action_deferred(self):
260260
actions = '\n'.join(self.deferred_actions)
261261
deferred_action = ParseResult(verb='idle-action', unparsed=actions, who_info=None)
262262
self.tell_others(actions + '\n')
263-
#self.location._notify_action_all(deferred_action, actor=self)
263+
self.location._notify_action_all(deferred_action, actor=self)
264264
self.deferred_actions.clear()
265265

266266
def _clear_quest(self):

tale/llm/character.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def generate_dialogue(self,
4343
#formatted_conversation = llm_config.params['USER_START']
4444
formatted_conversation = conversation.replace('<break>', '\n')#llm_config.params['USER_END'] + '\n' + llm_config.params['USER_START'])
4545
prompt += self.dialogue_prompt.format(
46-
context=context.to_prompt_string(),
46+
context='',
4747
previous_conversation=formatted_conversation,
4848
character2=context.speaker_name,
4949
character1=context.target_name,
@@ -52,10 +52,7 @@ def generate_dialogue(self,
5252
sentiment=sentiment)
5353
request_body = deepcopy(self.default_body)
5454
request_body['grammar'] = self.json_grammar
55-
56-
57-
#if not self.stream:
58-
response = self.io_util.synchronous_request(request_body, prompt=prompt)
55+
response = self.io_util.synchronous_request(request_body, prompt=prompt, context=context.to_prompt_string())
5956
try:
6057
json_result = json.loads(parse_utils.sanitize_json(response))
6158
text = json_result["response"]
@@ -149,13 +146,13 @@ def perform_reaction(self, action: str, character_name: str, acting_character_na
149146
def free_form_action(self, action_context: ActionContext):
150147
prompt = self.pre_prompt
151148
prompt += self.free_form_action_prompt.format(
152-
context=action_context.to_prompt_string(),
149+
context = '',
153150
character_name=action_context.character_name,
154151
action_template=self.action_template)
155152
request_body = deepcopy(self.default_body)
156153
request_body['grammar'] = self.json_grammar
157154
try :
158-
text = self.io_util.synchronous_request(request_body, prompt=prompt)
155+
text = self.io_util.synchronous_request(request_body, prompt=prompt, context=action_context.to_prompt_string())
159156
if not text:
160157
return None
161158
response = json.loads(parse_utils.sanitize_json(text))

tale/llm/io_adapters.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
2+
from abc import ABC, abstractmethod
3+
import asyncio
4+
import json
5+
import time
6+
7+
import aiohttp
8+
import requests
9+
10+
from tale.errors import LlmResponseException
11+
12+
13+
class AbstractIoAdapter(ABC):
14+
15+
def __init__(self, url: str, stream_endpoint: str, user_start_prompt: str, user_end_prompt: str):
16+
self.url = url
17+
self.stream_endpoint = stream_endpoint
18+
self.user_start_prompt = user_start_prompt
19+
self.user_end_prompt = user_end_prompt
20+
21+
@abstractmethod
22+
def stream_request(self, request_body: dict, io = None, wait: bool = False) -> str:
23+
pass
24+
25+
@abstractmethod
26+
async def _do_stream_request(self, url: str, request_body: dict,) -> bool:
27+
pass
28+
29+
@abstractmethod
30+
def _parse_result(self, result: str) -> str:
31+
pass
32+
33+
@abstractmethod
34+
def _set_prompt(self, request_body: dict, prompt: str, context: str = '') -> dict:
35+
pass
36+
37+
class KoboldCppAdapter(AbstractIoAdapter):
38+
39+
def __init__(self, url: str, stream_endpoint: str, data_endpoint: str, user_start_prompt: str, user_end_prompt: str):
40+
super().__init__(url, stream_endpoint, user_start_prompt, user_end_prompt)
41+
self.data_endpoint = data_endpoint
42+
43+
def stream_request(self, request_body: dict, io = None, wait: bool = False) -> str:
44+
result = asyncio.run(self._do_stream_request(self.url + self.stream_endpoint, request_body))
45+
46+
try:
47+
if result:
48+
return self._do_process_result(self.url + self.data_endpoint, io, wait)
49+
except LlmResponseException as exc:
50+
print("Error parsing response from backend - ", exc)
51+
return ''
52+
53+
async def _do_stream_request(self, url: str, request_body: dict,) -> bool:
54+
""" Send request to stream endpoint async to not block the main thread"""
55+
async with aiohttp.ClientSession() as session:
56+
async with session.post(url, data=json.dumps(request_body)) as response:
57+
if response.status == 200:
58+
return True
59+
else:
60+
print("Error occurred:", response.status)
61+
62+
def _do_process_result(self, url, io = None, wait: bool = False) -> str:
63+
""" Process the result from the stream endpoint """
64+
tries = 0
65+
old_text = ''
66+
while tries < 4:
67+
time.sleep(0.25)
68+
data = requests.post(url)
69+
70+
text = json.loads(data.text)['results'][0]['text']
71+
72+
if len(text) == len(old_text):
73+
tries += 1
74+
continue
75+
if not wait:
76+
new_text = text[len(old_text):]
77+
io.output_no_newline(new_text, new_paragraph=False)
78+
old_text = text
79+
return old_text
80+
81+
def _parse_result(self, result: str) -> str:
82+
""" Parse the result from the stream endpoint """
83+
return json.loads(result)['results'][0]['text']
84+
85+
def _set_prompt(self, request_body: dict, prompt: str, context: str = '') -> dict:
86+
if self.user_start_prompt:
87+
prompt = prompt.replace('[USER_START]', self.user_start_prompt)
88+
if self.user_end_prompt:
89+
prompt = prompt + self.user_end_prompt
90+
prompt.replace('<context>{context}</context>', '')
91+
request_body['prompt'] = prompt
92+
request_body['memory'] = context
93+
return request_body
94+
95+
class LlamaCppAdapter(AbstractIoAdapter):
96+
97+
def stream_request(self, request_body: dict, io = None, wait: bool = False) -> str:
98+
return asyncio.run(self._do_stream_request(self.url + self.stream_endpoint, request_body, io = io))
99+
100+
async def _do_stream_request(self, url: str, request_body: dict, io = None) -> str:
101+
""" Send request to stream endpoint async to not block the main thread"""
102+
request_body['stream'] = True
103+
text = ''
104+
async with aiohttp.ClientSession() as session:
105+
async with session.post(url, data=json.dumps(request_body)) as response:
106+
if response.status != 200:
107+
print("Error occurred:", response.status)
108+
return False
109+
async for chunk in response.content.iter_any():
110+
decoded = chunk.decode('utf-8')
111+
lines = decoded.split('\n')
112+
for line in lines:
113+
# Ignore empty lines
114+
if not line.strip():
115+
continue
116+
key, value = line.split(':', 1)
117+
key = key.strip()
118+
value = value.strip()
119+
if key == 'data':
120+
data = json.loads(value)
121+
choice = data['choices'][0]['delta']
122+
content = choice.get('content', None)
123+
124+
if content:
125+
io.output_no_newline(content, new_paragraph=False)
126+
text += content
127+
#while len(lines) == 0:
128+
# await asyncio.sleep(0.05)
129+
130+
return text
131+
132+
def _parse_result(self, result: str) -> str:
133+
""" Parse the result from the stream endpoint """
134+
try:
135+
return json.loads(result)['choices'][0]['message']['content']
136+
except:
137+
raise LlmResponseException("Error parsing result from backend")
138+
139+
def _set_prompt(self, request_body: dict, prompt: str, context: str = '') -> dict:
140+
if self.user_start_prompt:
141+
prompt = prompt.replace('[USER_START]', self.user_start_prompt)
142+
if self.user_end_prompt:
143+
prompt = prompt + self.user_end_prompt
144+
if context:
145+
prompt = prompt.format(context=context)
146+
request_body['messages'][1]['content'] = prompt
147+
return request_body

tale/llm/llm_io.py

Lines changed: 20 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
1-
import re
21
import requests
3-
import time
4-
import aiohttp
5-
import asyncio
62
import json
73
from tale.errors import LlmResponseException
8-
import tale.parse_utils as parse_utils
9-
from tale.player_utils import TextBuffer
4+
from tale.llm.io_adapters import KoboldCppAdapter, LlamaCppAdapter
105

116
class IoUtil():
127
""" Handles connection and data retrieval from backend """
@@ -19,107 +14,41 @@ def __init__(self, config: dict = None, backend_config: dict = None):
1914
self.url = backend_config['URL']
2015
self.endpoint = backend_config['ENDPOINT']
2116

22-
2317
if self.backend != 'kobold_cpp':
2418
headers = json.loads(backend_config['OPENAI_HEADERS'])
2519
headers['Authorization'] = f"Bearer {backend_config['OPENAI_API_KEY']}"
2620
self.openai_json_format = json.loads(backend_config['OPENAI_JSON_FORMAT'])
2721
self.headers = headers
22+
self.io_adapter = LlamaCppAdapter(self.url, backend_config['STREAM_ENDPOINT'], config['USER_START'], config['USER_END'])
2823
else:
24+
self.io_adapter = KoboldCppAdapter(self.url, backend_config['STREAM_ENDPOINT'], backend_config['DATA_ENDPOINT'], config['USER_START'], config['USER_END'])
2925
self.headers = {}
26+
3027
self.stream = backend_config['STREAM']
31-
if self.stream:
32-
self.stream_endpoint = backend_config['STREAM_ENDPOINT']
33-
self.data_endpoint = backend_config['DATA_ENDPOINT']
34-
self.user_start_prompt = config['USER_START']
35-
self.user_end_prompt = config['USER_END']
3628

37-
def synchronous_request(self, request_body: dict, prompt: str) -> str:
29+
30+
def synchronous_request(self, request_body: dict, prompt: str, context: str = '') -> str:
3831
""" Send request to backend and return the result """
3932
if request_body.get('grammar', None) and 'openai' in self.url:
4033
# TODO: temp fix for openai
4134
request_body.pop('grammar')
4235
request_body['response_format'] = self.openai_json_format
43-
self._set_prompt(request_body, prompt)
36+
request_body = self.io_adapter._set_prompt(request_body, prompt, context)
37+
print(request_body)
4438
response = requests.post(self.url + self.endpoint, headers=self.headers, data=json.dumps(request_body))
45-
try:
46-
if self.backend == 'kobold_cpp':
47-
parsed_response = self._parse_kobold_result(response.text)
48-
else:
49-
parsed_response = self._parse_openai_result(response.text)
50-
except LlmResponseException as exc:
51-
print("Error parsing response from backend - ", exc)
52-
return ''
53-
return parsed_response
39+
if response.status_code == 200:
40+
return self.io_adapter._parse_result(response.text)
41+
return ''
5442

55-
def asynchronous_request(self, request_body: dict, prompt: str) -> str:
43+
def asynchronous_request(self, request_body: dict, prompt: str, context: str = '') -> str:
5644
if self.backend != 'kobold_cpp':
57-
return self.synchronous_request(request_body, prompt)
58-
return self.stream_request(request_body, wait=True, prompt=prompt)
59-
60-
def stream_request(self, request_body: dict, prompt: str, io = None, wait: bool = False) -> str:
61-
if self.backend != 'kobold_cpp':
62-
raise NotImplementedError("Currently does not support streaming requests for OpenAI")
63-
self._set_prompt(request_body, prompt)
64-
result = asyncio.run(self._do_stream_request(self.url + self.stream_endpoint, request_body))
65-
if result:
66-
return self._do_process_result(self.url + self.data_endpoint, io, wait)
67-
return ''
45+
return self.synchronous_request(request_body=request_body, prompt=prompt, context=context)
46+
return self.stream_request(request_body, wait=True, prompt=prompt, context=context)
6847

69-
async def _do_stream_request(self, url: str, request_body: dict,) -> bool:
70-
""" Send request to stream endpoint async to not block the main thread"""
71-
async with aiohttp.ClientSession() as session:
72-
async with session.post(url, data=json.dumps(request_body)) as response:
73-
if response.status == 200:
74-
return True
75-
else:
76-
# Handle errors
77-
print("Error occurred:", response.status)
48+
def stream_request(self, request_body: dict, prompt: str, context: str = '', io = None, wait: bool = False) -> str:
49+
if self.io_adapter:
50+
request_body = self.io_adapter._set_prompt(request_body, prompt, context)
51+
return self.io_adapter.stream_request(request_body, io, wait)
52+
# fall back if no io adapter
53+
return self.synchronous_request(request_body=request_body, prompt=prompt, context=context)
7854

79-
def _do_process_result(self, url, io = None, wait: bool = False) -> str:
80-
""" Process the result from the stream endpoint """
81-
tries = 0
82-
old_text = ''
83-
while tries < 4:
84-
time.sleep(0.5)
85-
data = requests.post(url)
86-
text = self._parse_kobold_result(data.text)
87-
88-
if len(text) == len(old_text):
89-
tries += 1
90-
continue
91-
if not wait:
92-
new_text = text[len(old_text):]
93-
io.output_no_newline(new_text, new_paragraph=False)
94-
old_text = text
95-
return old_text
96-
97-
def _parse_kobold_result(self, result: str) -> str:
98-
""" Parse the result from the kobold endpoint """
99-
return json.loads(result)['results'][0]['text']
100-
101-
def _parse_openai_result(self, result: str) -> str:
102-
""" Parse the result from the openai endpoint """
103-
try:
104-
return json.loads(result)['choices'][0]['message']['content']
105-
except:
106-
raise LlmResponseException("Error parsing result from backend")
107-
108-
def _set_prompt(self, request_body: dict, prompt: str) -> dict:
109-
if self.user_start_prompt:
110-
prompt = prompt.replace('[USER_START]', self.user_start_prompt)
111-
if self.user_end_prompt:
112-
prompt = prompt + self.user_end_prompt
113-
if self.backend == 'kobold_cpp':
114-
request_body['prompt'] = prompt
115-
else :
116-
request_body['messages'][1]['content'] = prompt
117-
return request_body
118-
119-
def _extract_context(self, full_string):
120-
pattern = re.escape('<context>') + "(.*?)" + re.escape('</context>')
121-
match = re.search(pattern, full_string, re.DOTALL)
122-
if match:
123-
return '<context>' + match.group(1) + '</context>'
124-
else:
125-
return ''

0 commit comments

Comments
 (0)