Skip to content

Commit 6286adb

Browse files
Merge branch 'main' of github.com:h2oai/sql-sidekick into main
2 parents 28e8240 + ac3544c commit 6286adb

11 files changed

+212
-157
lines changed

app.toml

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[App]
2-
name = "ai.h2o.wave.sql-sidekick"
3-
title = "SQL-Sidekick"
4-
description = "QnA with tabular data using NLQ"
2+
Name = "ai.h2o.wave.sql-sidekick"
3+
Title = "SQL-Sidekick"
4+
Description = "QnA with tabular data using NLQ"
55
LongDescription = "about.md"
66
InstanceLifecycle = "MANAGED"
77
Tags = ["DATA_SCIENCE", "MACHINE_LEARNING", "NLP", "GENERATIVE_AI"]
@@ -22,3 +22,28 @@ EnableOIDC = true
2222
[[Env]]
2323
Name = "H2O_WAVE_MAX_REQUEST_SIZE"
2424
Value = "20M"
25+
26+
[[Env]]
27+
Name = "HEAP_ID"
28+
Secret = "heap-analytics"
29+
SecretKey = "id"
30+
31+
[[Env]]
32+
Name = "H2OGPT_URL"
33+
Secret = "h2ogpt-oss-sqlsidekick"
34+
SecretKey = "h2ogpt-url"
35+
36+
[[Env]]
37+
Name = "H2OGPT_API_TOKEN"
38+
Secret = "h2ogpt-oss-sqlsidekick"
39+
SecretKey = "h2ogpt-key"
40+
41+
[[Env]]
42+
Name = "H2OGPTE_URL"
43+
Secret = "h2ogpte-sqlsidekick"
44+
SecretKey = "h2ogpte-url"
45+
46+
[[Env]]
47+
Name = "H2OGPTE_API_TOKEN"
48+
Secret = "h2ogpte-sqlsidekick"
49+
SecretKey = "h2ogpte-key"

examples/notebooks/Guardrails_SQL_injection.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,10 @@
8787
"# env variables\n",
8888
"\n",
8989
"os.environ['OPENAI_API_KEY'] = \"\"\n",
90-
"os.environ['H2O_BASE_MODEL_URL'] = ''\n",
91-
"os.environ['H2O_BASE_MODEL_API_KEY'] = \"\"\n",
92-
"os.environ['RECOMMENDATION_MODEL_REMOTE_URL'] = \"https://h2ogpte.genai.h2o.ai\" # e.g. https://<>.h2ogpte.h2o.ai\n",
93-
"os.environ['RECOMMENDATION_MODEL_API_KEY'] = \"\"\n",
90+
"os.environ['H2OGPT_URL'] = ''\n",
91+
"os.environ['H2OGPT_API_TOKEN'] = \"\"\n",
92+
"os.environ['H2OGPTE_URL'] = \"https://h2ogpte.genai.h2o.ai\" # e.g. https://<>.h2ogpte.h2o.ai\n",
93+
"os.environ['H2OGPTE_API_TOKEN'] = \"\"\n",
9494
"\n",
9595
"\n",
9696
"base_path = \".\"\n",

examples/notebooks/databricks_db.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,10 @@
146146
"os.environ['DATABRICKS_TOKEN'] = \"\"\n",
147147
"os.environ['OPENAI_API_KEY'] = \"\"\n",
148148
"\n",
149-
"os.environ['H2O_BASE_MODEL_URL'] = 'http://38.128.233.247'\n",
150-
"os.environ['H2O_BASE_MODEL_API_KEY'] = \"\"\n",
151-
"os.environ['RECOMMENDATION_MODEL_REMOTE_URL'] = \"https://h2ogpte.genai.h2o.ai\" # e.g. https://<>.h2ogpte.h2o.ai\n",
152-
"os.environ['RECOMMENDATION_MODEL_API_KEY'] = \"\""
149+
"os.environ['H2OGPT_URL'] = 'http://38.128.233.247'\n",
150+
"os.environ['H2OGPT_API_TOKEN'] = \"\"\n",
151+
"os.environ['H2OGPTE_URL'] = \"https://h2ogpte.genai.h2o.ai\" # e.g. https://<>.h2ogpte.h2o.ai\n",
152+
"os.environ['H2OGPTE_API_TOKEN'] = \"\""
153153
]
154154
},
155155
{

examples/notebooks/sdk_quick_tutorial.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,11 @@
7171
"import os\n",
7272
"\n",
7373
"os.environ['OPENAI_API_KEY'] = \"\"\n",
74-
"os.environ['H2O_BASE_MODEL_URL'] = 'http://38.128.233.247'\n",
75-
"os.environ['H2O_BASE_MODEL_API_KEY'] = \"\"\n",
74+
"os.environ['H2OGPT_URL'] = 'http://38.128.233.247'\n",
75+
"os.environ['H2OGPT_API_TOKEN'] = \"\"\n",
7676
"# To get access to h2ogpte endpoint, reach out to [email protected]\n",
77-
"os.environ['RECOMMENDATION_MODEL_REMOTE_URL'] = \"https://h2ogpte.genai.h2o.ai\" # e.g. https://<>.h2ogpte.h2o.ai\n",
78-
"os.environ['RECOMMENDATION_MODEL_API_KEY'] = \"\""
77+
"os.environ['H2OGPTE_URL'] = \"https://h2ogpte.genai.h2o.ai\" # e.g. https://<>.h2ogpte.h2o.ai\n",
78+
"os.environ['H2OGPTE_API_TOKEN'] = \"\""
7979
]
8080
},
8181
{

sidekick/configs/env.toml

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22
OPENAI_API_KEY = "" # Needed only for openAI models
33
MODEL_NAME = "h2ogpt-sql-sqlcoder-34b-alpha" # Others: e.g. gpt-4, gpt-4-32k, text-davinci-003
44
QUANT_TYPE = '4bit'
5-
H2O_BASE_MODEL_URL = 'http://38.128.233.247'
6-
H2O_BASE_MODEL_API_KEY = ""
7-
RECOMMENDATION_MODEL_REMOTE_URL = ""
8-
RECOMMENDATION_MODEL_API_KEY = ""
5+
6+
H2OGPT_URL = 'http://38.128.233.247'
7+
H2OGPT_API_TOKEN = ""
8+
H2OGPTE_URL = ""
9+
H2OGPTE_API_TOKEN = ""
10+
11+
RECOMMENDATION_MODEL = "h2oai/h2ogpt-4096-llama2-70b-chat"
912
VULNERABILITY_SCANNER = "h2oai/h2ogpt-4096-llama2-70b-chat" # other options openai models depending on availability (e.g. 'gpt-3.5-turbo')
1013
SELF_CORRECTION_MODEL = "h2oai/h2ogpt-4096-llama2-70b-chat" # other options openai models depending on availability (e.g. 'gpt-3.5-turbo')
1114

sidekick/configs/prompt_template.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,6 @@
5454
# Add explanation and reasoning for each SQL query
5555
"""
5656

57-
# DEBUGGING_PROMPT = {
58-
# "system_prompt": "Act as a SQL expert for {dialect} database",
59-
# "user_prompt": """
60-
# ### Help fix syntax errors for provided incorrect SQL Query.
61-
# # Error: {ex_traceback}
62-
# # Query:\n {qry_txt}
63-
# # Output: Add ``` as prefix and ``` as suffix to generated SQL
64-
# """,
65-
# }
66-
6757
DEBUGGING_PROMPT = {
6858
"system_prompt": "Act as a SQL expert for {dialect} database",
6959
"user_prompt": """

sidekick/prompter.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,29 @@
3232
env_settings = toml.load(f"{app_base_path}/sidekick/configs/env.toml")
3333
db_dialect = env_settings["DB-DIALECT"]["DB_TYPE"]
3434
model_name = env_settings["MODEL_INFO"]["MODEL_NAME"]
35-
h2o_remote_url = env_settings["MODEL_INFO"]["RECOMMENDATION_MODEL_REMOTE_URL"]
36-
h2o_key = env_settings["MODEL_INFO"]["RECOMMENDATION_MODEL_API_KEY"]
35+
h2o_remote_url = env_settings["MODEL_INFO"]["H2OGPTE_URL"]
36+
h2o_key = env_settings["MODEL_INFO"]["H2OGPTE_API_TOKEN"]
3737
# h2ogpt base model urls
38-
h2ogpt_base_model_url = env_settings["MODEL_INFO"]["H2O_BASE_MODEL_URL"]
39-
h2ogpt_base_model_key = env_settings["MODEL_INFO"]["H2O_BASE_MODEL_API_KEY"]
38+
h2ogpt_base_model_url = env_settings["MODEL_INFO"]["H2OGPT_URL"]
39+
h2ogpt_base_model_key = env_settings["MODEL_INFO"]["H2OGPT_API_TOKEN"]
40+
4041
self_correction_model = env_settings["MODEL_INFO"]["SELF_CORRECTION_MODEL"]
42+
recommendation_model = env_settings["MODEL_INFO"]['RECOMMENDATION_MODEL']
4143

4244
os.environ["TOKENIZERS_PARALLELISM"] = "False"
4345
# Env variables
44-
if not os.getenv("H2O_BASE_MODEL_URL"):
45-
os.environ["H2O_BASE_MODEL_URL"] = h2ogpt_base_model_url
46-
if not os.getenv("H2O_BASE_MODEL_API_KEY"):
47-
os.environ["H2O_BASE_MODEL_API_KEY"] = h2ogpt_base_model_key
48-
if not os.getenv("RECOMMENDATION_MODEL_REMOTE_URL"):
49-
os.environ["RECOMMENDATION_MODEL_REMOTE_URL"] = h2o_remote_url
50-
if not os.getenv("RECOMMENDATION_MODEL_API_KEY"):
51-
os.environ["RECOMMENDATION_MODEL_API_KEY"] = h2o_key
46+
if not os.getenv("H2OGPT_URL"):
47+
os.environ["H2OGPT_URL"] = h2ogpt_base_model_url
48+
if not os.getenv("H2OGPT_API_TOKEN"):
49+
os.environ["H2OGPT_API_TOKEN"] = h2ogpt_base_model_key
50+
if not os.getenv("H2OGPTE_URL"):
51+
os.environ["H2OGPTE_URL"] = h2o_remote_url
52+
if not os.getenv("H2OGPTE_API_TOKEN"):
53+
os.environ["H2OGPTE_API_TOKEN"] = h2o_key
5254
if not os.getenv("SELF_CORRECTION_MODEL"):
5355
os.environ["SELF_CORRECTION_MODEL"] = self_correction_model
56+
if not os.getenv("RECOMMENDATION_MODEL"):
57+
os.environ["RECOMMENDATION_MODEL"] = recommendation_model
5458

5559
def color(fore="", back="", text=None):
5660
return f"{fore}{back}{text}{Style.RESET_ALL}"
@@ -159,17 +163,17 @@ def recommend_suggestions(cache_path: str, table_name: str, n_qs: int=10):
159163
r_url = _key = None
160164
# First check for keys in env variables
161165
logger.debug(f"Checking environment settings ...")
162-
env_url = os.environ["RECOMMENDATION_MODEL_REMOTE_URL"]
163-
env_key = os.environ["RECOMMENDATION_MODEL_API_KEY"]
166+
env_url = os.environ["H2OGPTE_URL"]
167+
env_key = os.environ["H2OGPTE_API_TOKEN"]
164168
if env_url and env_key:
165169
r_url = env_url
166170
_key = env_key
167171
elif Path(f"{app_base_path}/sidekick/configs/env.toml").exists():
168172
# Reload .env info
169173
logger.debug(f"Checking configuration file ...")
170174
env_settings = toml.load(f"{app_base_path}/sidekick/configs/env.toml")
171-
r_url = env_settings["MODEL_INFO"]["RECOMMENDATION_MODEL_REMOTE_URL"]
172-
_key = env_settings["MODEL_INFO"]["RECOMMENDATION_MODEL_API_KEY"]
175+
r_url = env_settings["MODEL_INFO"]["H2OGPTE_URL"]
176+
_key = env_settings["MODEL_INFO"]["H2OGPTE_API_TOKEN"]
173177
else:
174178
raise Exception("Model url or key is missing.")
175179

@@ -666,8 +670,8 @@ def ask(
666670
logger.debug(f"Attempt: {attempt+1}")
667671
_tmp = err.split("\n")
668672
_err = _tmp[0].split("Error occurred:")[1] if len(_tmp) > 0 else None
669-
env_url = os.environ["RECOMMENDATION_MODEL_REMOTE_URL"]
670-
env_key = os.environ["RECOMMENDATION_MODEL_API_KEY"]
673+
env_url = os.environ["H2OGPTE_URL"]
674+
env_key = os.environ["H2OGPTE_API_TOKEN"]
671675
corr_sql = sql_g.self_correction(input_query=_val, error_msg=_err, remote_url=env_url, client_key=env_key)
672676
q_res, err = DBConfig.execute_query(query=corr_sql)
673677
if not 'Error occurred'.lower() in str(err).lower():

sidekick/query.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ def generate_sql(
444444
else:
445445
if self.h2ogpt_client is None:
446446
# Check if env variable has info about remote hosting
447-
remote_h2ogpt_base_url = os.environ.get("H2O_BASE_MODEL_URL", None)
447+
remote_h2ogpt_base_url = os.environ.get("H2OGPT_URL", None)
448448
if model_name == 'h2ogpt-sql-sqlcoder-34b-alpha':
449449
remote_h2ogpt_base_url = f"{remote_h2ogpt_base_url}:5000/v1"
450450
elif model_name == 'h2ogpt-sql-sqlcoder2':
@@ -453,7 +453,7 @@ def generate_sql(
453453
remote_h2ogpt_base_url = f"{remote_h2ogpt_base_url}:5002/v1"
454454
else:
455455
remote_h2ogpt_base_url = None
456-
remote_h2ogpt_key = os.environ.get("H2O_BASE_MODEL_API_KEY", None)
456+
remote_h2ogpt_key = os.environ.get("H2OGPT_API_TOKEN", None)
457457
_api_key = remote_h2ogpt_key if remote_h2ogpt_key else "EMPTY"
458458
if remote_h2ogpt_base_url:
459459
client_args = dict(base_url=remote_h2ogpt_base_url, api_key=_api_key, timeout=20.0)
@@ -784,8 +784,8 @@ def generate_sql(
784784
except (sqlglot.errors.ParseError, ValueError, RuntimeError) as e:
785785
_, ex_value, ex_traceback = sys.exc_info()
786786
logger.info(f"Attempting to fix syntax error ...,\n {e}")
787-
env_url = os.environ["RECOMMENDATION_MODEL_REMOTE_URL"]
788-
env_key = os.environ["RECOMMENDATION_MODEL_API_KEY"]
787+
env_url = os.environ["H2OGPTE_URL"]
788+
env_key = os.environ["H2OGPTE_API_TOKEN"]
789789
try:
790790
result = self.self_correction(input_query=res, error_msg=str(ex_traceback), remote_url=env_url, client_key=env_key)
791791
except Exception as se:

sidekick/utils.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -548,8 +548,8 @@ def check_vulnerability(input_query: str):
548548
# Step 2 is optional, if remote url is provided, check for SQL injection patterns in the generated SQL code via LLM
549549
# Currently, only support only for models as an endpoints
550550
logger.debug(f"Requesting additional scan using configured models")
551-
remote_url = os.environ["RECOMMENDATION_MODEL_REMOTE_URL"]
552-
api_key = os.environ["RECOMMENDATION_MODEL_API_KEY"]
551+
remote_url = os.environ["H2OGPTE_URL"]
552+
api_key = os.environ["H2OGPTE_API_TOKEN"]
553553

554554
_system_prompt = GUARDRAIL_PROMPT["system_prompt"].strip()
555555
output_schema = """{
@@ -618,12 +618,13 @@ def generate_suggestions(remote_url, client_key:str, column_names: list, n_qs: i
618618
input_prompt = RECOMMENDATION_PROMPT.format(data_schema=column_info, n_questions=n_qs
619619
)
620620

621+
recommender_model = os.getenv("RECOMMENDATION_MODEL", "h2oai/h2ogpt-4096-llama2-70b-chat")
621622
client = H2OGPTE(address=remote_url, api_key=client_key)
622623
text_completion = client.answer_question(
623624
system_prompt=f"Act as a data analyst, based on below data schema help answer the question",
624625
text_context_list=[],
625626
question=input_prompt,
626-
llm='h2oai/h2ogpt-4096-llama2-70b-chat'
627+
llm=recommender_model
627628
)
628629
_res = text_completion.content.split("\n")[2:]
629630
results = "\n".join(_res)

0 commit comments

Comments
 (0)