Skip to content

Commit a17a850

Browse files
authored
Merge pull request #968 from maiko/add_website_memory
Add visited website to memory for recalling content without being limited by the website summary.
2 parents 40ed086 + 5a60535 commit a17a850

File tree

4 files changed

+42
-8
lines changed

4 files changed

+42
-8
lines changed

.env.template

+6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@
33
################################################################################
44
# EXECUTE_LOCAL_COMMANDS - Allow local command execution (Example: False)
55
EXECUTE_LOCAL_COMMANDS=False
6+
# BROWSE_CHUNK_MAX_LENGTH - When browsing website, define the length of chunk stored in memory
7+
BROWSE_CHUNK_MAX_LENGTH=8192
8+
# BROWSE_SUMMARY_MAX_TOKEN - Define the maximum length of the summary generated by GPT agent when browsing website
9+
BROWSE_SUMMARY_MAX_TOKEN=300
10+
# USER_AGENT - Define the user-agent used by the requests library to browse website (string)
11+
# USER_AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
612
# AI_SETTINGS_FILE - Specifies which AI Settings file to use (defaults to ai_settings.yaml)
713
AI_SETTINGS_FILE=ai_settings.yaml
814

scripts/browse.py

+24-6
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
import requests
22
from bs4 import BeautifulSoup
3+
from memory import get_memory
34
from config import Config
45
from llm_utils import create_chat_completion
56
from urllib.parse import urlparse, urljoin
67

78
cfg = Config()
9+
memory = get_memory(cfg)
10+
11+
session = requests.Session()
12+
session.headers.update({'User-Agent': cfg.user_agent})
813

914

1015
# Function to check if the URL is valid
@@ -27,7 +32,7 @@ def check_local_file_access(url):
2732
return any(url.startswith(prefix) for prefix in local_prefixes)
2833

2934

30-
def get_response(url, headers=cfg.user_agent_header, timeout=10):
35+
def get_response(url, timeout=10):
3136
try:
3237
# Restrict access to local files
3338
if check_local_file_access(url):
@@ -39,7 +44,7 @@ def get_response(url, headers=cfg.user_agent_header, timeout=10):
3944

4045
sanitized_url = sanitize_url(url)
4146

42-
response = requests.get(sanitized_url, headers=headers, timeout=timeout)
47+
response = session.get(sanitized_url, timeout=timeout)
4348

4449
# Check if the response contains an HTTP error
4550
if response.status_code >= 400:
@@ -106,7 +111,7 @@ def scrape_links(url):
106111
return format_hyperlinks(hyperlinks)
107112

108113

109-
def split_text(text, max_length=8192):
114+
def split_text(text, max_length=cfg.browse_chunk_max_length):
110115
"""Split text into chunks of a maximum length"""
111116
paragraphs = text.split("\n")
112117
current_length = 0
@@ -133,7 +138,7 @@ def create_message(chunk, question):
133138
}
134139

135140

136-
def summarize_text(text, question):
141+
def summarize_text(url, text, question):
137142
"""Summarize text using the LLM model"""
138143
if not text:
139144
return "Error: No text to summarize"
@@ -145,15 +150,28 @@ def summarize_text(text, question):
145150
chunks = list(split_text(text))
146151

147152
for i, chunk in enumerate(chunks):
153+
print(f"Adding chunk {i + 1} / {len(chunks)} to memory")
154+
155+
memory_to_add = f"Source: {url}\n" \
156+
f"Raw content part#{i + 1}: {chunk}"
157+
158+
memory.add(memory_to_add)
159+
148160
print(f"Summarizing chunk {i + 1} / {len(chunks)}")
149161
messages = [create_message(chunk, question)]
150162

151163
summary = create_chat_completion(
152164
model=cfg.fast_llm_model,
153165
messages=messages,
154-
max_tokens=300,
166+
max_tokens=cfg.browse_summary_max_token,
155167
)
156168
summaries.append(summary)
169+
print(f"Added chunk {i + 1} summary to memory")
170+
171+
memory_to_add = f"Source: {url}\n" \
172+
f"Content summary part#{i + 1}: {summary}"
173+
174+
memory.add(memory_to_add)
157175

158176
print(f"Summarized {len(chunks)} chunks.")
159177

@@ -163,7 +181,7 @@ def summarize_text(text, question):
163181
final_summary = create_chat_completion(
164182
model=cfg.fast_llm_model,
165183
messages=messages,
166-
max_tokens=300,
184+
max_tokens=cfg.browse_summary_max_token,
167185
)
168186

169187
return final_summary

scripts/commands.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ def browse_website(url, question):
191191
def get_text_summary(url, question):
192192
"""Return the results of a google search"""
193193
text = browse.scrape_text(url)
194-
summary = browse.summarize_text(text, question)
194+
summary = browse.summarize_text(url, text, question)
195195
return """ "Result" : """ + summary
196196

197197

scripts/config.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ def __init__(self):
4545
self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
4646
self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000))
4747
self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000))
48+
self.browse_chunk_max_length = int(os.getenv("BROWSE_CHUNK_MAX_LENGTH", 8192))
49+
self.browse_summary_max_token = int(os.getenv("BROWSE_SUMMARY_MAX_TOKEN", 300))
4850

4951
self.openai_api_key = os.getenv("OPENAI_API_KEY")
5052
self.temperature = float(os.getenv("TEMPERATURE", "1"))
@@ -78,7 +80,7 @@ def __init__(self):
7880

7981
# User agent headers to use when browsing web
8082
# Some websites might just completely deny request with an error code if no user agent was found.
81-
self.user_agent_header = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"}
83+
self.user_agent = os.getenv("USER_AGENT", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36")
8284
self.redis_host = os.getenv("REDIS_HOST", "localhost")
8385
self.redis_port = os.getenv("REDIS_PORT", "6379")
8486
self.redis_password = os.getenv("REDIS_PASSWORD", "")
@@ -159,6 +161,14 @@ def set_smart_token_limit(self, value: int):
159161
"""Set the smart token limit value."""
160162
self.smart_token_limit = value
161163

164+
def set_browse_chunk_max_length(self, value: int):
165+
"""Set the browse_website command chunk max length value."""
166+
self.browse_chunk_max_length = value
167+
168+
def set_browse_summary_max_token(self, value: int):
169+
"""Set the browse_website command summary max token value."""
170+
self.browse_summary_max_token = value
171+
162172
def set_openai_api_key(self, value: str):
163173
"""Set the OpenAI API key value."""
164174
self.openai_api_key = value

0 commit comments

Comments
 (0)