ylw311 · EdmondLi1 · Aug 4, 2024 · Aug 4, 2024 · Aug 4, 2024 · Aug 4, 2024
diff --git a/db/mongodb_vector_embedding.py b/db/mongodb_vector_embedding.py
@@ -5,23 +5,33 @@
 # Load environment variables from .env file
 load_dotenv()
 
-# Retrieve credentials from environment variables
-username = os.getenv("MONGODB_USERNAME")
-password = os.getenv("MONGODB_PASSWORD")
-database_name = os.getenv("DATABASE_NAME")
-collection_name = os.getenv("COLLECTION_NAME")
-
 def get_mongo_collection():
-    uri = f"mongodb+srv://{username}:{password}@hackthe6ix.iw7r6n4.mongodb.net/?retryWrites=true&w=majority&appName=Hackthe6ix"
-    print(uri)
+    username = os.getenv("MONGODB_USERNAME")
+    password = os.getenv("MONGODB_PASSWORD")
+    database_name = os.getenv("DATABASE_NAME")
+    collection_name = os.getenv("COLLECTION_NAME")
+
+    url = f"mongodb+srv://{username}:{password}@hackthe6ix.iw7r6n4.mongodb.net/?retryWrites=true&w=majority&appName=Hackthe6ix"
+    print(url)
 
     # Create a new client and connect to the server
-    client = MongoClient(uri)
+    client = MongoClient(url)
     # Send a ping to confirm a successful connection
     try:
         client.admin.command('ping')
         print("Pinged your deployment. You successfully connected to MongoDB!")
     except Exception as e:
         print(e)
-get_mongo_collection()
 
+    db = client[database_name]
+    collection = db[collection_name]
+    return collection   
+
+
+# Test the connection
+collection = get_mongo_collection()
+
+if collection is not None:
+    print("Successfully obtained collection.")
+else:
+    print("Failed to obtain collection.")
diff --git a/engine/llm.py b/engine/llm.py
@@ -1,5 +1,7 @@
 from langchain_openai import ChatOpenAI
 from dotenv import load_dotenv
+from utils.embeddings import text_to_embedding, get_top_k_similar_embeddings
+import pyperclip
 
 load_dotenv()
 
@@ -24,3 +26,31 @@ def query_llm(text: str):
 
     return res.content
 
+
+def query_llm(prompt, collection):
+
+    prompt_embedding = text_to_embedding(prompt)
+
+    # Retrieve the top 5 similar documents
+    top_documents = get_top_k_similar_embeddings(prompt_embedding, collection)
+
+    # Extract text from the top documents
+    context = " ".join([doc["text"] for doc in top_documents])
+
+    full_prompt = f"{context}\n\n{prompt}"
+
+    llm = ChatOpenAI(
+        model="gpt-4o",
+        temperature=0,
+        max_tokens=None,
+        timeout=None,
+        max_retries=2,
+    )
+
+    response = llm.generate(full_prompt)
+
+    # Copy the response to the clipboard
+    pyperclip.copy(response)
+
+    return response
+
diff --git a/events/events.py b/events/events.py
@@ -4,13 +4,16 @@
 import platform
 import os
 import sys
+from db.mongodb_vector_embedding import get_mongo_collection
+from utils.embeddings import text_to_embedding
 import json
 from datetime import datetime
 
 
 sys.path.append("..")
 from engine.matlab import start
 from engine.llm import query_llm
+from server import set_qrcode
 from server import jobs
 import validation
 
@@ -19,7 +22,6 @@
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
 )
-
 
 import nest_asyncio
 import uvicorn
@@ -60,8 +62,13 @@ def run_server():
 CTRL_C_KEYS_MAC = {CONTROL, "c"}  # Cmd+V
 # CTRL_V1_SHIFT_KEYS_MAC = {CONTROL, "v", "1"}  # Cmd+V+1
 # CTRL_V2_SHIFT_KEYS_MAC = {CONTROL, "v", "2"} 
+<<<<<<< HEAD
+# CTRL_V3_SHIFT_KEYS_MAC = {CONTROL, "v", "3"}
+# SHIFT_KEYS = {keyboard.Key.shift, keyboard.Key.shift_r}  # Left and Right Shift
+=======
 # CTRL_V4_SHIFT_KEYS_MAC = {CONTROL, "v", "5"}
 SHIFT_KEYS = {keyboard.Key.shift, keyboard.Key.shift_r}  # Left and Right Shift
+>>>>>>> 4859c2575676501a7780533243c8607f59030a09
 
 CTRL_U_KEYS_MAC = {CONTROL, "u"}  # Cmd+U
 CTRL_O_KEYS_MAC = {CONTROL, "o"}  # Cmd+O
@@ -75,6 +82,32 @@ def run_server():
 TERMINATE_COMBINATION_MAC = {CONTROL, "p"}  # Cmd+P
 UNDO_KEY = "z"
 controller = keyboard.Controller()
+collection = get_mongo_collection()
+
+
+def capture_and_store_clipboard():
+    try:
+        clipboard_content = pyperclip.paste()
+        logging.info(f"Clipboard content: {clipboard_content}")
+
+        # Convert text to vector embedding
+        vector_embedding = text_to_embedding(clipboard_content)
+
+        # Prepare the document
+        document = {
+            "text": clipboard_content,
+            "embedding": vector_embedding
+        }
+
+        # Insert the document into the MongoDB collection
+        if collection:
+            collection.insert_one(document)
+            logging.info("Vector embedding inserted into MongoDB.")
+        else:
+            logging.error("Failed to insert vector embedding into MongoDB. Collection not available.")
+
+    except Exception as e:
+        logging.error(f"Error while processing clipboard content: {e}")
 
 
 def undo():
@@ -88,7 +121,6 @@ def undo():
 
 
 
-
 def show_paste_options():
     # Fetch and print clipboard content
     clipboard_content = pyperclip.paste()
@@ -118,6 +150,7 @@ def on_press(key):
                 logging.info("Ctrl+V+1 pressed (Windows)")
                 # Handle specific case for Ctrl+V+1
                 logging.info("Special key combination Ctrl+V+1 triggered!")
+                capture_and_store_clipboard()
                 undo()
                 start(pyperclip.paste(), logging)
 
@@ -126,6 +159,7 @@ def on_press(key):
                 logging.info("Ctrl+V+2 pressed (Windows)")
                 # Handle specific case for Ctrl+V+2
                 logging.info("Special key combination Ctrl+V+2 triggered!")
+                capture_and_store_clipboard()
                 undo()
                 # set_qrcode(pyperclip.paste())
                 logging.info("QR code set")
@@ -135,6 +169,12 @@ def on_press(key):
                 logging.info("Ctrl+V+3 pressed (Windows)")
                 # Handle specific case for Ctrl+V+3
                 logging.info("Special key combination Ctrl+V+3 triggered!")
+                clipboard_content = pyperclip.paste()
+                capture_and_store_clipboard()
+                undo()
+                response = query_llm(clipboard_content, collection)
+                print("Generated response:", response)
+                logging.info("Response generated and displayed.")        
 
             # Check for terminating keys (Ctrl+P)
             if all(k in current_keys for k in TERMINATE_COMBINATION_WINDOWS):
@@ -153,6 +193,7 @@ def on_press(key):
             # ):
             if all(k in current_keys for k in CTRL_1_KEYS_MAC):
                 logging.info("MATLAB (macOS)")
+                capture_and_store_clipboard()
                 undo()
                 start(pyperclip.paste(), logging)
 
@@ -161,15 +202,24 @@ def on_press(key):
             # ):
             if all(k in current_keys for k in CTRL_2_KEYS_MAC):
                 logging.info("Adobe (macOS)")
-                # set_qrcode(pyperclip.paste())
+                capture_and_store_clipboard()
+                undo()
+                set_qrcode(pyperclip.paste())
+                logging.info("QR code set")
             # if all(k in current_keys for k in CTRL_V4_SHIFT_KEYS_MAC) and any(
             #     k in current_keys for k in SHIFT_KEYS
             # ):
             if all(k in current_keys for k in CTRL_3_KEYS_MAC):
                 logging.info("LLM(macOS)")
+                clipboard_content = pyperclip.paste()
+                capture_and_store_clipboard()
                 undo()
+
                 # send job to frontend
-                res = query_llm(pyperclip.paste())
+                res = query_llm(clipboard_content, collection)
+
+                print("Generated response:", response)
+                logging.info("Response generated and displayed.")     
 
                 message = {
                     "type": "text",
@@ -251,7 +301,3 @@ def on_release(key):
     except KeyError:
         pass
 
-
-# Start the listener
-with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
-    listener.join()
diff --git a/utils/embeddings.py b/utils/embeddings.py
@@ -0,0 +1,42 @@
+from langchain_community.embeddings import OpenAIEmbeddings
+from db.mongodb_vector_embedding import get_mongo_collection
+import numpy as np
+import os
+
+# Initialize the embeddings model
+def get_embedding_model():
+    return OpenAIEmbeddings()
+
+# convert text to embedding
+def text_to_embedding(text):
+    model = get_embedding_model()
+    embedding = model.embed_text(text)
+    return embedding
+
+
+# not sure if langchain has this already set up (will do this manually)
+def cosine_similarity(vec1, vec2):
+
+    # regular dot product of two vectors
+    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
+
+
+def get_top_k_similar_embeddings(query_embedding, collection, k=5):
+    collection = get_mongo_collection()
+
+    documents = collection.find()  
+    similarities = []
+
+    # Calculate the cosine similarity between the query embedding and each document embedding
+    for doc in documents:
+        embedding = np.array(doc["embedding"])
+        similarity = cosine_similarity(query_embedding, embedding)
+        similarities.append((doc, similarity))
+
+    # Sort the documents by similarity
+    similarities.sort(key=lambda x: x[1], reverse=True)
+
+    # Get the top k most similar documents
+    top_k_documents = [doc for doc, _ in similarities[:k]]
+
+    return top_k_documents