diff --git a/db/mongodb_vector_embedding.py b/db/mongodb_vector_embedding.py index 9711c8b..f10e672 100644 --- a/db/mongodb_vector_embedding.py +++ b/db/mongodb_vector_embedding.py @@ -5,23 +5,33 @@ # Load environment variables from .env file load_dotenv() -# Retrieve credentials from environment variables -username = os.getenv("MONGODB_USERNAME") -password = os.getenv("MONGODB_PASSWORD") -database_name = os.getenv("DATABASE_NAME") -collection_name = os.getenv("COLLECTION_NAME") - def get_mongo_collection(): - uri = f"mongodb+srv://{username}:{password}@hackthe6ix.iw7r6n4.mongodb.net/?retryWrites=true&w=majority&appName=Hackthe6ix" - print(uri) + username = os.getenv("MONGODB_USERNAME") + password = os.getenv("MONGODB_PASSWORD") + database_name = os.getenv("DATABASE_NAME") + collection_name = os.getenv("COLLECTION_NAME") + + url = f"mongodb+srv://{username}:{password}@hackthe6ix.iw7r6n4.mongodb.net/?retryWrites=true&w=majority&appName=Hackthe6ix" + print(url) # Create a new client and connect to the server - client = MongoClient(uri) + client = MongoClient(url) # Send a ping to confirm a successful connection try: client.admin.command('ping') print("Pinged your deployment. You successfully connected to MongoDB!") except Exception as e: print(e) -get_mongo_collection() + db = client[database_name] + collection = db[collection_name] + return collection + + +# Test the connection +collection = get_mongo_collection() + +if collection is not None: + print("Successfully obtained collection.") +else: + print("Failed to obtain collection.") diff --git a/engine/llm.py b/engine/llm.py index 8d56b3b..277f537 100644 --- a/engine/llm.py +++ b/engine/llm.py @@ -1,5 +1,7 @@ from langchain_openai import ChatOpenAI from dotenv import load_dotenv +from utils.embeddings import text_to_embedding, get_top_k_similar_embeddings +import pyperclip load_dotenv() @@ -24,3 +26,31 @@ def query_llm(text: str): return res.content + +def query_llm(prompt, collection): + + prompt_embedding = text_to_embedding(prompt) + + # Retrieve the top 5 similar documents + top_documents = get_top_k_similar_embeddings(prompt_embedding, collection) + + # Extract text from the top documents + context = " ".join([doc["text"] for doc in top_documents]) + + full_prompt = f"{context}\n\n{prompt}" + + llm = ChatOpenAI( + model="gpt-4o", + temperature=0, + max_tokens=None, + timeout=None, + max_retries=2, + ) + + response = llm.generate(full_prompt) + + # Copy the response to the clipboard + pyperclip.copy(response) + + return response + diff --git a/events/events.py b/events/events.py index d6f467b..6e74762 100644 --- a/events/events.py +++ b/events/events.py @@ -4,6 +4,8 @@ import platform import os import sys +from db.mongodb_vector_embedding import get_mongo_collection +from utils.embeddings import text_to_embedding import json from datetime import datetime @@ -11,6 +13,7 @@ sys.path.append("..") from engine.matlab import start from engine.llm import query_llm +from server import set_qrcode from server import jobs import validation @@ -19,7 +22,6 @@ logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" ) - import nest_asyncio import uvicorn @@ -60,8 +62,13 @@ def run_server(): CTRL_C_KEYS_MAC = {CONTROL, "c"} # Cmd+V # CTRL_V1_SHIFT_KEYS_MAC = {CONTROL, "v", "1"} # Cmd+V+1 # CTRL_V2_SHIFT_KEYS_MAC = {CONTROL, "v", "2"} +<<<<<<< HEAD +# CTRL_V3_SHIFT_KEYS_MAC = {CONTROL, "v", "3"} +# SHIFT_KEYS = {keyboard.Key.shift, keyboard.Key.shift_r} # Left and Right Shift +======= # CTRL_V4_SHIFT_KEYS_MAC = {CONTROL, "v", "5"} SHIFT_KEYS = {keyboard.Key.shift, keyboard.Key.shift_r} # Left and Right Shift +>>>>>>> 4859c2575676501a7780533243c8607f59030a09 CTRL_U_KEYS_MAC = {CONTROL, "u"} # Cmd+U CTRL_O_KEYS_MAC = {CONTROL, "o"} # Cmd+O @@ -75,6 +82,32 @@ def run_server(): TERMINATE_COMBINATION_MAC = {CONTROL, "p"} # Cmd+P UNDO_KEY = "z" controller = keyboard.Controller() +collection = get_mongo_collection() + + +def capture_and_store_clipboard(): + try: + clipboard_content = pyperclip.paste() + logging.info(f"Clipboard content: {clipboard_content}") + + # Convert text to vector embedding + vector_embedding = text_to_embedding(clipboard_content) + + # Prepare the document + document = { + "text": clipboard_content, + "embedding": vector_embedding + } + + # Insert the document into the MongoDB collection + if collection: + collection.insert_one(document) + logging.info("Vector embedding inserted into MongoDB.") + else: + logging.error("Failed to insert vector embedding into MongoDB. Collection not available.") + + except Exception as e: + logging.error(f"Error while processing clipboard content: {e}") def undo(): @@ -88,7 +121,6 @@ def undo(): - def show_paste_options(): # Fetch and print clipboard content clipboard_content = pyperclip.paste() @@ -118,6 +150,7 @@ def on_press(key): logging.info("Ctrl+V+1 pressed (Windows)") # Handle specific case for Ctrl+V+1 logging.info("Special key combination Ctrl+V+1 triggered!") + capture_and_store_clipboard() undo() start(pyperclip.paste(), logging) @@ -126,6 +159,7 @@ def on_press(key): logging.info("Ctrl+V+2 pressed (Windows)") # Handle specific case for Ctrl+V+2 logging.info("Special key combination Ctrl+V+2 triggered!") + capture_and_store_clipboard() undo() # set_qrcode(pyperclip.paste()) logging.info("QR code set") @@ -135,6 +169,12 @@ def on_press(key): logging.info("Ctrl+V+3 pressed (Windows)") # Handle specific case for Ctrl+V+3 logging.info("Special key combination Ctrl+V+3 triggered!") + clipboard_content = pyperclip.paste() + capture_and_store_clipboard() + undo() + response = query_llm(clipboard_content, collection) + print("Generated response:", response) + logging.info("Response generated and displayed.") # Check for terminating keys (Ctrl+P) if all(k in current_keys for k in TERMINATE_COMBINATION_WINDOWS): @@ -153,6 +193,7 @@ def on_press(key): # ): if all(k in current_keys for k in CTRL_1_KEYS_MAC): logging.info("MATLAB (macOS)") + capture_and_store_clipboard() undo() start(pyperclip.paste(), logging) @@ -161,15 +202,24 @@ def on_press(key): # ): if all(k in current_keys for k in CTRL_2_KEYS_MAC): logging.info("Adobe (macOS)") - # set_qrcode(pyperclip.paste()) + capture_and_store_clipboard() + undo() + set_qrcode(pyperclip.paste()) + logging.info("QR code set") # if all(k in current_keys for k in CTRL_V4_SHIFT_KEYS_MAC) and any( # k in current_keys for k in SHIFT_KEYS # ): if all(k in current_keys for k in CTRL_3_KEYS_MAC): logging.info("LLM(macOS)") + clipboard_content = pyperclip.paste() + capture_and_store_clipboard() undo() + # send job to frontend - res = query_llm(pyperclip.paste()) + res = query_llm(clipboard_content, collection) + + print("Generated response:", response) + logging.info("Response generated and displayed.") message = { "type": "text", @@ -251,7 +301,3 @@ def on_release(key): except KeyError: pass - -# Start the listener -with keyboard.Listener(on_press=on_press, on_release=on_release) as listener: - listener.join() diff --git a/utils/embeddings.py b/utils/embeddings.py new file mode 100644 index 0000000..845ace0 --- /dev/null +++ b/utils/embeddings.py @@ -0,0 +1,42 @@ +from langchain_community.embeddings import OpenAIEmbeddings +from db.mongodb_vector_embedding import get_mongo_collection +import numpy as np +import os + +# Initialize the embeddings model +def get_embedding_model(): + return OpenAIEmbeddings() + +# convert text to embedding +def text_to_embedding(text): + model = get_embedding_model() + embedding = model.embed_text(text) + return embedding + + +# not sure if langchain has this already set up (will do this manually) +def cosine_similarity(vec1, vec2): + + # regular dot product of two vectors + return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)) + + +def get_top_k_similar_embeddings(query_embedding, collection, k=5): + collection = get_mongo_collection() + + documents = collection.find() + similarities = [] + + # Calculate the cosine similarity between the query embedding and each document embedding + for doc in documents: + embedding = np.array(doc["embedding"]) + similarity = cosine_similarity(query_embedding, embedding) + similarities.append((doc, similarity)) + + # Sort the documents by similarity + similarities.sort(key=lambda x: x[1], reverse=True) + + # Get the top k most similar documents + top_k_documents = [doc for doc, _ in similarities[:k]] + + return top_k_documents