Skip to content

Commit 0abb995

Browse files
committed
integrating OPENAI and Langchain into the ChatUI
Signed-off-by: BergZain <50025962+bergzain@users.noreply.github.com>
1 parent 12ad655 commit 0abb995

3 files changed

Lines changed: 130 additions & 113 deletions

File tree

src/ChatUI_streamlit/LLMModel.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
from dotenv import load_dotenv
2+
import os
3+
from langchain.chat_models import ChatOpenAI
4+
from langchain.vectorstores import FAISS
5+
from langchain.chains import RetrievalQA
6+
from langchain.embeddings.openai import OpenAIEmbeddings
7+
from langchain.text_splitter import CharacterTextSplitter
8+
from langchain.document_loaders import TextLoader
9+
from langchain.prompts import PromptTemplate
10+
11+
12+
YourAPIKey = os.environ['OPENAI_API_KEY']
13+
14+
load_dotenv()
15+
16+
openai_api_key=os.getenv('OPENAI_API_KEY', 'YourAPIKey')
17+
18+
llm = ChatOpenAI(model_name='gpt-3.5-turbo', openai_api_key=openai_api_key)
19+
20+
21+
embeddings = OpenAIEmbeddings(disallowed_special=(), openai_api_key=openai_api_key)
22+
23+
root_dir = '/Users/zainhazzouri/projects/RAG-Playground/core/src/sdk/python/rtdip_sdk/pipelines'
24+
docs = []
25+
26+
# Go through each folder
27+
for dirpath, dirnames, filenames in os.walk(root_dir):
28+
29+
# Go through each file
30+
for file in filenames:
31+
try:
32+
# Load up the file as a doc and split
33+
loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
34+
docs.extend(loader.load_and_split())
35+
except Exception as e:
36+
pass
37+
38+
docsearch = FAISS.from_documents(docs, embeddings)
39+
40+
# Get our retriever ready
41+
RAG = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())

src/ChatUI_streamlit/app.py

Lines changed: 18 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,25 @@
11
import streamlit as st
22
import replicate
33
import os
4+
from LLMModel import RAG as RAG
45

56
# App title
6-
st.set_page_config(page_title="RTDIP Chatbot")
7+
st.set_page_config(page_title="RTDIP PipeLine Chatbot")
78

89
# Replicate Credentials
910
with st.sidebar:
10-
st.title('Pipeline Generation Chatbot')
11-
if 'REPLICATE_API_TOKEN' in st.secrets:
12-
st.success('API key already provided!', icon='✅')
13-
replicate_api = st.secrets['REPLICATE_API_TOKEN']
14-
else:
15-
replicate_api = st.text_input('Enter Replicate API token:', type='password')
16-
if not (replicate_api.startswith('r8_') and len(replicate_api) == 40):
17-
st.warning('Please enter your credentials!', icon='⚠️')
18-
else:
19-
st.success('Proceed to entering your prompt message!', icon='👉')
20-
os.environ['REPLICATE_API_TOKEN'] = replicate_api
11+
st.title('RTDIP Pipeline Generation Chatbot')
12+
# if 'REPLICATE_API_TOKEN' in st.secrets:
13+
# st.success('API key already provided!', icon='✅')
14+
# replicate_api = st.secrets['REPLICATE_API_TOKEN']
15+
# else:
16+
# replicate_api = st.text_input('Enter Replicate API token:', type='password')
17+
# if not (replicate_api.startswith('r8_') and len(replicate_api) == 40):
18+
# st.warning('Please enter your credentials!', icon='⚠️')
19+
# else:
20+
# st.success('Proceed to entering your prompt message!', icon='👉')
21+
# os.environ['REPLICATE_API_TOKEN'] = replicate_api
22+
openai_api_key = os.getenv('OPENAI_API_KEY', 'YourAPIKey')
2123

2224
# Store LLM generated responses
2325
if "conversations" not in st.session_state.keys():
@@ -48,29 +50,9 @@ def clear_chat_history():
4850
st.session_state.conversations = [{"title": "Default Conversation", "messages": [{"role": "assistant", "content": "How may I assist you today?"}]}]
4951
st.sidebar.button('Clear Chat History', on_click=clear_chat_history)
5052

51-
# Function for generating LLaMA2 response. Refactored from https://github.com/a16z-infra/llama2-chatbot
52-
def generate_llama2_response(prompt_input):
53-
string_dialogue = "You are a helpful assistant. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'."
54-
for conversation in st.session_state.conversations:
55-
for dict_message in conversation["messages"]:
56-
role = dict_message["role"]
57-
content = dict_message["content"]
58-
if role == "user":
59-
string_dialogue += f"User: {content}\n\n"
60-
else:
61-
string_dialogue += f"Assistant: {content}\n\n"
62-
63-
output = replicate.run(
64-
'a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5',
65-
input={
66-
"prompt": f"{string_dialogue} {prompt_input} Assistant: ",
67-
"repetition_penalty": 1
68-
}
69-
)
70-
return output
7153

7254
# User-provided prompt
73-
if prompt := st.chat_input(disabled=not replicate_api):
55+
if prompt := st.chat_input(): #
7456
# Use the user's prompt as the title
7557
title = prompt
7658

@@ -83,15 +65,15 @@ def generate_llama2_response(prompt_input):
8365
# Generate a new response if the last message is not from the assistant
8466
if st.session_state.conversations[-1]["messages"][-1]["role"] != "assistant":
8567
with st.chat_message("assistant"):
86-
with st.spinner("Thinking..."):
87-
response = generate_llama2_response(prompt)
68+
with st.spinner("Generating..."):
69+
response = RAG.run(prompt)
8870
placeholder = st.empty()
8971
full_response = ''
9072
for item in response:
9173
full_response += item
9274
placeholder.markdown(full_response)
9375
placeholder.markdown(full_response)
94-
76+
9577
message = {"role": "assistant", "content": full_response}
9678
st.session_state.conversations[-1]["messages"].append(message)
9779

src/RAG/RAG.ipynb

Lines changed: 71 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,18 @@
55
"execution_count": 1,
66
"id": "initial_id",
77
"metadata": {
8+
"collapsed": true,
89
"ExecuteTime": {
9-
"end_time": "2023-11-25T08:06:52.534235Z",
10-
"start_time": "2023-11-25T08:06:52.514168Z"
11-
},
12-
"collapsed": true
10+
"end_time": "2023-11-27T20:36:37.381119Z",
11+
"start_time": "2023-11-27T20:36:37.371708Z"
12+
}
1313
},
1414
"outputs": [],
1515
"source": [
1616
"from dotenv import load_dotenv\n",
1717
"import os\n",
1818
"\n",
19-
"YourAPIKey = \"\"\n",
19+
"YourAPIKey = \"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\"\n",
2020
"\n",
2121
"load_dotenv()\n",
2222
"\n",
@@ -25,14 +25,14 @@
2525
},
2626
{
2727
"cell_type": "code",
28-
"execution_count": 2,
28+
"execution_count": 8,
2929
"id": "640e19e3961c5559",
3030
"metadata": {
31+
"collapsed": false,
3132
"ExecuteTime": {
32-
"end_time": "2023-11-25T08:06:53.629849Z",
33-
"start_time": "2023-11-25T08:06:52.521264Z"
34-
},
35-
"collapsed": false
33+
"end_time": "2023-11-27T20:37:28.740784Z",
34+
"start_time": "2023-11-27T20:37:28.697880Z"
35+
}
3636
},
3737
"outputs": [],
3838
"source": [
@@ -47,14 +47,14 @@
4747
},
4848
{
4949
"cell_type": "code",
50-
"execution_count": 3,
50+
"execution_count": 9,
5151
"id": "33597feb02573078",
5252
"metadata": {
53+
"collapsed": false,
5354
"ExecuteTime": {
54-
"end_time": "2023-11-25T08:06:53.663555Z",
55-
"start_time": "2023-11-25T08:06:53.630098Z"
56-
},
57-
"collapsed": false
55+
"end_time": "2023-11-27T20:37:28.924429Z",
56+
"start_time": "2023-11-27T20:37:28.892412Z"
57+
}
5858
},
5959
"outputs": [],
6060
"source": [
@@ -63,18 +63,18 @@
6363
},
6464
{
6565
"cell_type": "code",
66-
"execution_count": 4,
66+
"execution_count": 10,
6767
"id": "ce645e118f29cf79",
6868
"metadata": {
69+
"collapsed": false,
6970
"ExecuteTime": {
70-
"end_time": "2023-11-25T08:06:53.686887Z",
71-
"start_time": "2023-11-25T08:06:53.664778Z"
72-
},
73-
"collapsed": false
71+
"end_time": "2023-11-27T20:37:29.445589Z",
72+
"start_time": "2023-11-27T20:37:29.422117Z"
73+
}
7474
},
7575
"outputs": [],
7676
"source": [
77-
"root_dir = '/amos2023ws05-pipeline-config-chat-ai/src/RAG/pipelines'\n",
77+
"root_dir = '/Users/zainhazzouri/projects/amos2023ws05-pipeline-config-chat-ai/src/RAG/pipelines'\n",
7878
"docs = []\n",
7979
"\n",
8080
"# Go through each folder\n",
@@ -92,21 +92,21 @@
9292
},
9393
{
9494
"cell_type": "code",
95-
"execution_count": 5,
95+
"execution_count": 11,
9696
"id": "c6e41366a23e6224",
9797
"metadata": {
98+
"collapsed": false,
9899
"ExecuteTime": {
99-
"end_time": "2023-11-25T08:06:53.687897Z",
100-
"start_time": "2023-11-25T08:06:53.685060Z"
101-
},
102-
"collapsed": false
100+
"end_time": "2023-11-27T20:37:32.037810Z",
101+
"start_time": "2023-11-27T20:37:32.031816Z"
102+
}
103103
},
104104
"outputs": [
105105
{
106106
"name": "stdout",
107107
"output_type": "stream",
108108
"text": [
109-
"You have 190 documents\n",
109+
"You have 219 documents\n",
110110
"\n",
111111
"------ Start Document ------\n",
112112
"# Copyright 2022 RTDIP\n",
@@ -129,14 +129,14 @@
129129
},
130130
{
131131
"cell_type": "code",
132-
"execution_count": 6,
132+
"execution_count": 12,
133133
"id": "e9847352294eee40",
134134
"metadata": {
135+
"collapsed": false,
135136
"ExecuteTime": {
136-
"end_time": "2023-11-25T08:06:57.051920Z",
137-
"start_time": "2023-11-25T08:06:53.687581Z"
138-
},
139-
"collapsed": false
137+
"end_time": "2023-11-27T20:37:37.370376Z",
138+
"start_time": "2023-11-27T20:37:33.328494Z"
139+
}
140140
},
141141
"outputs": [],
142142
"source": [
@@ -145,14 +145,14 @@
145145
},
146146
{
147147
"cell_type": "code",
148-
"execution_count": 7,
148+
"execution_count": 13,
149149
"id": "90fd0d8a51a5cf31",
150150
"metadata": {
151+
"collapsed": false,
151152
"ExecuteTime": {
152-
"end_time": "2023-11-25T08:06:59.831522Z",
153-
"start_time": "2023-11-25T08:06:59.822748Z"
154-
},
155-
"collapsed": false
153+
"end_time": "2023-11-27T20:37:38.431113Z",
154+
"start_time": "2023-11-27T20:37:38.428419Z"
155+
}
156156
},
157157
"outputs": [],
158158
"source": [
@@ -162,14 +162,14 @@
162162
},
163163
{
164164
"cell_type": "code",
165-
"execution_count": 10,
165+
"execution_count": 14,
166166
"id": "103f11e7d6f49f6e",
167167
"metadata": {
168+
"collapsed": false,
168169
"ExecuteTime": {
169-
"end_time": "2023-11-25T08:11:12.280923Z",
170-
"start_time": "2023-11-25T08:10:35.004427Z"
171-
},
172-
"collapsed": false
170+
"end_time": "2023-11-27T20:38:16.799202Z",
171+
"start_time": "2023-11-27T20:37:39.004977Z"
172+
}
173173
},
174174
"outputs": [],
175175
"source": [
@@ -179,59 +179,53 @@
179179
},
180180
{
181181
"cell_type": "code",
182-
"execution_count": 11,
182+
"execution_count": 15,
183183
"id": "7b73d941ef97f4bb",
184184
"metadata": {
185+
"collapsed": false,
185186
"ExecuteTime": {
186-
"end_time": "2023-11-25T08:11:13.673641Z",
187-
"start_time": "2023-11-25T08:11:13.668368Z"
188-
},
189-
"collapsed": false
187+
"end_time": "2023-11-27T20:38:18.862176Z",
188+
"start_time": "2023-11-27T20:38:18.858254Z"
189+
}
190190
},
191191
"outputs": [
192192
{
193193
"name": "stdout",
194194
"output_type": "stream",
195195
"text": [
196-
"Sure! Here's an example of how you can use RTDIP components to read from an Eventhub using a connection string and consumer group, transform the data from binary to string, and then write it to a Delta table:\n",
196+
"Certainly! Here's the code snippet that reads from an Eventhub using a connection string and consumer group, applies the BinaryToStringTransformer and EdgeXOPCUAJsonToPCDMTransformer transformations, and writes the data to a Delta table:\n",
197197
"\n",
198198
"```python\n",
199-
"from rtdip_sdk.pipelines.sources import SparkEventhubSource\n",
200-
"from rtdip_sdk.pipelines.transforms import BinaryToStringTransformer, EdgeXTransformer\n",
201-
"from rtdip_sdk.pipelines.destinations import DeltaDestination\n",
199+
"from rtdip_sdk.pipelines.sources.spark.eventhub import SparkEventhubSource\n",
200+
"from rtdip_sdk.pipelines.transformers.spark.binary_to_string import BinaryToStringTransformer\n",
201+
"from rtdip_sdk.pipelines.transformers.spark.edgex_opcua_json_to_pcdm import EdgeXOPCUAJsonToPCDMTransformer\n",
202+
"from rtdip_sdk.pipelines.destinations.spark.delta import SparkDeltaDestination\n",
202203
"from rtdip_sdk.pipelines.utilities import SparkSessionUtility\n",
204+
"import json\n",
203205
"\n",
204-
"# Not required if using Databricks\n",
205-
"spark = SparkSessionUtility(config={}).execute()\n",
206-
"\n",
207-
"# Eventhub connection string and consumer group\n",
208-
"connection_string = \"YOUR_EVENTHUB_CONNECTION_STRING\"\n",
209-
"consumer_group = \"YOUR_CONSUMER_GROUP\"\n",
210-
"\n",
211-
"# Create the Eventhub source\n",
212-
"eventhub_source = SparkEventhubSource(spark=spark, options={\"eventhubs.connectionString\": connection_string, \"eventhubs.consumerGroup\": consumer_group})\n",
206+
"def pipeline():\n",
207+
" spark = SparkSessionUtility(config={}).execute()\n",
213208
"\n",
214-
"# Read from Eventhub\n",
215-
"data = eventhub_source.read_stream()\n",
209+
" ehConf = {\n",
210+
" \"eventhubs.connectionString\": \"{EventhubConnectionString}\",\n",
211+
" \"eventhubs.consumerGroup\": \"{EventhubConsumerGroup}\",\n",
212+
" \"eventhubs.startingPosition\": json.dumps(\n",
213+
" {\"offset\": \"0\", \"seqNo\": -1, \"enqueuedTime\": None, \"isInclusive\": True}\n",
214+
" ),\n",
215+
" }\n",
216216
"\n",
217-
"# Transform data from binary to string\n",
218-
"binary_to_string_transformer = BinaryToStringTransformer()\n",
219-
"transformed_data = binary_to_string_transformer.transform(data)\n",
217+
" source = SparkEventhubSource(spark, ehConf).read_batch()\n",
218+
" string_data = BinaryToStringTransformer(source, \"body\", \"body\").transform()\n",
219+
" PCDM_data = EdgeXOPCUAJsonToPCDMTransformer(string_data, \"body\").transform()\n",
220+
" SparkDeltaDestination(\n",
221+
" data=PCDM_data, options={}, destination=\"{path/to/table}\"\n",
222+
" ).write_batch()\n",
220223
"\n",
221-
"# Apply EdgeX transformation\n",
222-
"edgex_transformer = EdgeXTransformer()\n",
223-
"transformed_data = edgex_transformer.transform(transformed_data)\n",
224-
"\n",
225-
"# Write transformed data to Delta table\n",
226-
"delta_destination = DeltaDestination(spark=spark, data=transformed_data, table_name=\"YOUR_DELTA_TABLE_NAME\")\n",
227-
"delta_destination.write_stream()\n",
224+
"if __name__ == \"__main__\":\n",
225+
" pipeline()\n",
228226
"```\n",
229227
"\n",
230-
"Make sure to replace `YOUR_EVENTHUB_CONNECTION_STRING`, `YOUR_CONSUMER_GROUP`, and `YOUR_DELTA_TABLE_NAME` with your actual values.\n",
231-
"\n",
232-
"This code will create a streaming pipeline that reads data from an Eventhub, transforms it using binary to string and EdgeX transformations, and then writes the transformed data to a Delta table.\n",
233-
"\n",
234-
"Note that you'll need to have the necessary dependencies installed and import the required modules for the components to work properly.\n"
228+
"Please replace `{EventhubConnectionString}`, `{EventhubConsumerGroup}`, and `{path/to/table}` with your specific values.\n"
235229
]
236230
}
237231
],

0 commit comments

Comments
 (0)