Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 20 additions & 10 deletions db/mongodb_vector_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,33 @@
# Load environment variables from .env file
load_dotenv()

# Retrieve credentials from environment variables
username = os.getenv("MONGODB_USERNAME")
password = os.getenv("MONGODB_PASSWORD")
database_name = os.getenv("DATABASE_NAME")
collection_name = os.getenv("COLLECTION_NAME")

def get_mongo_collection():
uri = f"mongodb+srv://{username}:{password}@hackthe6ix.iw7r6n4.mongodb.net/?retryWrites=true&w=majority&appName=Hackthe6ix"
print(uri)
username = os.getenv("MONGODB_USERNAME")
password = os.getenv("MONGODB_PASSWORD")
database_name = os.getenv("DATABASE_NAME")
collection_name = os.getenv("COLLECTION_NAME")

url = f"mongodb+srv://{username}:{password}@hackthe6ix.iw7r6n4.mongodb.net/?retryWrites=true&w=majority&appName=Hackthe6ix"
print(url)

# Create a new client and connect to the server
client = MongoClient(uri)
client = MongoClient(url)
# Send a ping to confirm a successful connection
try:
client.admin.command('ping')
print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
print(e)
get_mongo_collection()

db = client[database_name]
collection = db[collection_name]
return collection


# Test the connection
collection = get_mongo_collection()

if collection is not None:
print("Successfully obtained collection.")
else:
print("Failed to obtain collection.")
30 changes: 30 additions & 0 deletions engine/llm.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from utils.embeddings import text_to_embedding, get_top_k_similar_embeddings
import pyperclip

load_dotenv()

Expand All @@ -24,3 +26,31 @@ def query_llm(text: str):

return res.content


def query_llm(prompt, collection):

prompt_embedding = text_to_embedding(prompt)

# Retrieve the top 5 similar documents
top_documents = get_top_k_similar_embeddings(prompt_embedding, collection)

# Extract text from the top documents
context = " ".join([doc["text"] for doc in top_documents])

full_prompt = f"{context}\n\n{prompt}"

llm = ChatOpenAI(
model="gpt-4o",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
)

response = llm.generate(full_prompt)

# Copy the response to the clipboard
pyperclip.copy(response)

return response

62 changes: 54 additions & 8 deletions events/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
import platform
import os
import sys
from db.mongodb_vector_embedding import get_mongo_collection
from utils.embeddings import text_to_embedding
import json
from datetime import datetime


sys.path.append("..")
from engine.matlab import start
from engine.llm import query_llm
from server import set_qrcode
from server import jobs
import validation

Expand All @@ -19,7 +22,6 @@
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)


import nest_asyncio
import uvicorn
Expand Down Expand Up @@ -60,8 +62,13 @@ def run_server():
CTRL_C_KEYS_MAC = {CONTROL, "c"} # Cmd+V
# CTRL_V1_SHIFT_KEYS_MAC = {CONTROL, "v", "1"} # Cmd+V+1
# CTRL_V2_SHIFT_KEYS_MAC = {CONTROL, "v", "2"}
<<<<<<< HEAD
# CTRL_V3_SHIFT_KEYS_MAC = {CONTROL, "v", "3"}
# SHIFT_KEYS = {keyboard.Key.shift, keyboard.Key.shift_r} # Left and Right Shift
=======
# CTRL_V4_SHIFT_KEYS_MAC = {CONTROL, "v", "5"}
SHIFT_KEYS = {keyboard.Key.shift, keyboard.Key.shift_r} # Left and Right Shift
>>>>>>> 4859c2575676501a7780533243c8607f59030a09

CTRL_U_KEYS_MAC = {CONTROL, "u"} # Cmd+U
CTRL_O_KEYS_MAC = {CONTROL, "o"} # Cmd+O
Expand All @@ -75,6 +82,32 @@ def run_server():
TERMINATE_COMBINATION_MAC = {CONTROL, "p"} # Cmd+P
UNDO_KEY = "z"
controller = keyboard.Controller()
collection = get_mongo_collection()


def capture_and_store_clipboard():
try:
clipboard_content = pyperclip.paste()
logging.info(f"Clipboard content: {clipboard_content}")

# Convert text to vector embedding
vector_embedding = text_to_embedding(clipboard_content)

# Prepare the document
document = {
"text": clipboard_content,
"embedding": vector_embedding
}

# Insert the document into the MongoDB collection
if collection:
collection.insert_one(document)
logging.info("Vector embedding inserted into MongoDB.")
else:
logging.error("Failed to insert vector embedding into MongoDB. Collection not available.")

except Exception as e:
logging.error(f"Error while processing clipboard content: {e}")


def undo():
Expand All @@ -88,7 +121,6 @@ def undo():




def show_paste_options():
# Fetch and print clipboard content
clipboard_content = pyperclip.paste()
Expand Down Expand Up @@ -118,6 +150,7 @@ def on_press(key):
logging.info("Ctrl+V+1 pressed (Windows)")
# Handle specific case for Ctrl+V+1
logging.info("Special key combination Ctrl+V+1 triggered!")
capture_and_store_clipboard()
undo()
start(pyperclip.paste(), logging)

Expand All @@ -126,6 +159,7 @@ def on_press(key):
logging.info("Ctrl+V+2 pressed (Windows)")
# Handle specific case for Ctrl+V+2
logging.info("Special key combination Ctrl+V+2 triggered!")
capture_and_store_clipboard()
undo()
# set_qrcode(pyperclip.paste())
logging.info("QR code set")
Expand All @@ -135,6 +169,12 @@ def on_press(key):
logging.info("Ctrl+V+3 pressed (Windows)")
# Handle specific case for Ctrl+V+3
logging.info("Special key combination Ctrl+V+3 triggered!")
clipboard_content = pyperclip.paste()
capture_and_store_clipboard()
undo()
response = query_llm(clipboard_content, collection)
print("Generated response:", response)
logging.info("Response generated and displayed.")

# Check for terminating keys (Ctrl+P)
if all(k in current_keys for k in TERMINATE_COMBINATION_WINDOWS):
Expand All @@ -153,6 +193,7 @@ def on_press(key):
# ):
if all(k in current_keys for k in CTRL_1_KEYS_MAC):
logging.info("MATLAB (macOS)")
capture_and_store_clipboard()
undo()
start(pyperclip.paste(), logging)

Expand All @@ -161,15 +202,24 @@ def on_press(key):
# ):
if all(k in current_keys for k in CTRL_2_KEYS_MAC):
logging.info("Adobe (macOS)")
# set_qrcode(pyperclip.paste())
capture_and_store_clipboard()
undo()
set_qrcode(pyperclip.paste())
logging.info("QR code set")
# if all(k in current_keys for k in CTRL_V4_SHIFT_KEYS_MAC) and any(
# k in current_keys for k in SHIFT_KEYS
# ):
if all(k in current_keys for k in CTRL_3_KEYS_MAC):
logging.info("LLM(macOS)")
clipboard_content = pyperclip.paste()
capture_and_store_clipboard()
undo()

# send job to frontend
res = query_llm(pyperclip.paste())
res = query_llm(clipboard_content, collection)

print("Generated response:", response)
logging.info("Response generated and displayed.")

message = {
"type": "text",
Expand Down Expand Up @@ -251,7 +301,3 @@ def on_release(key):
except KeyError:
pass


# Start the listener
with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:
listener.join()
42 changes: 42 additions & 0 deletions utils/embeddings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from langchain_community.embeddings import OpenAIEmbeddings
from db.mongodb_vector_embedding import get_mongo_collection
import numpy as np
import os

# Initialize the embeddings model
def get_embedding_model():
return OpenAIEmbeddings()

# convert text to embedding
def text_to_embedding(text):
model = get_embedding_model()
embedding = model.embed_text(text)
return embedding


# not sure if langchain has this already set up (will do this manually)
def cosine_similarity(vec1, vec2):

# regular dot product of two vectors
return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))


def get_top_k_similar_embeddings(query_embedding, collection, k=5):
collection = get_mongo_collection()

documents = collection.find()
similarities = []

# Calculate the cosine similarity between the query embedding and each document embedding
for doc in documents:
embedding = np.array(doc["embedding"])
similarity = cosine_similarity(query_embedding, embedding)
similarities.append((doc, similarity))

# Sort the documents by similarity
similarities.sort(key=lambda x: x[1], reverse=True)

# Get the top k most similar documents
top_k_documents = [doc for doc, _ in similarities[:k]]

return top_k_documents