diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e61812f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +notes/ diff --git a/smart-notes-design/README.md b/smart-notes-design/README.md new file mode 100644 index 0000000..37f4ed3 --- /dev/null +++ b/smart-notes-design/README.md @@ -0,0 +1,32 @@ +# Smart Notes – Landing Page UI Design + +## Overview +This folder contains the UI/UX design exploration for the Smart Notes +landing page. The goal is to visually communicate the app’s privacy-first, +offline-by-default philosophy through a clean and focused interface. + +## Scope +- Landing page UI design +- No functional implementation included +- Design-first contribution + +## Screens Included +- Landing Page (Desktop) + +## Design Goals +- Clear value proposition +- Calm, distraction-free layout +- Emphasis on privacy and offline usage +- Developer-friendly design for easy implementation + +## Design Decisions +- Minimal color palette +- Bento-style feature cards +- Strong visual hierarchy +- Simple and focused navigation + +## Assets +- `design/landing-page.png` – Landing page UI mockup + +## Status +Initial design mock submitted for feedback and iteration. diff --git a/smart-notes-design/design/Landing page (smart notes).png b/smart-notes-design/design/Landing page (smart notes).png new file mode 100644 index 0000000..3f896a2 Binary files /dev/null and b/smart-notes-design/design/Landing page (smart notes).png differ diff --git a/smart-notes/rag_mvp/README.md b/smart-notes/rag_mvp/README.md new file mode 100644 index 0000000..fd51419 --- /dev/null +++ b/smart-notes/rag_mvp/README.md @@ -0,0 +1,84 @@ +# Smart Notes – Local Q&A (RAG MVP) + +This is a minimal, local-first MVP that allows users to ask natural-language questions over their markdown notes. + +## Features (Current MVP) + +- Loads markdown files from a local `notes/` directory +- Supports natural-language questions (e.g., "what is AI", "where is AI used") +- Returns sentence-level answers from notes +- Shows the source note filename +- Interactive CLI loop (type `exit` to quit) + +This is a starter implementation intended to be extended with embeddings and vector search in future iterations. + +--- + +## How it works + +1. Notes are loaded from the local `notes/` directory. +2. Question words (what, where, who, when, etc.) are filtered. +3. Notes are split into sentences. +4. Relevant sentences are returned based on keyword matching. + +--- + +## How to run + +```bash +python smart-notes/rag_mvp/qa_cli.py + + + +>> what is AI + +[1] From test.md: +Artificial Intelligence (AI) is the simulation of human intelligence in machines. + + +>> what is machine learning +how is machine learning used +difference between AI and ML + + + + + +# Smart Notes – RAG MVP (Embeddings & FAISS) + +This project is a simple **Retrieval-Augmented Generation (RAG)** pipeline for Smart Notes. +It allows users to store notes, convert them into embeddings, and search relevant notes using vector similarity. + +--- + +## 🚀 Features + +- Convert notes into embeddings using Sentence Transformers +- Store and search embeddings using FAISS (CPU) +- CLI tool to ask questions about your notes +- Simple chunking for text files +- Works fully offline after model download + +--- + +## 🧠 Tech Stack + +- Python 3.10+ +- sentence-transformers +- FAISS (faiss-cpu) +- HuggingFace Transformers + +--- + +## 📁 Project Structure + +```bash +smart-notes/ +├── rag_mvp/ +│ ├── embed.py # Embedding logic +│ ├── index.py # FAISS index creation +│ ├── qa_cli.py # CLI for asking questions +│ └── utils.py # Helper functions +├── notes/ # Put your .txt notes here +├── requirements.txt +└── README.md diff --git a/smart-notes/rag_mvp/__pycache__/qa_cli.cpython-313.pyc b/smart-notes/rag_mvp/__pycache__/qa_cli.cpython-313.pyc new file mode 100644 index 0000000..df45ac4 Binary files /dev/null and b/smart-notes/rag_mvp/__pycache__/qa_cli.cpython-313.pyc differ diff --git a/smart-notes/rag_mvp/embeddings/__init__.py b/smart-notes/rag_mvp/embeddings/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/smart-notes/rag_mvp/embeddings/chunker.py b/smart-notes/rag_mvp/embeddings/chunker.py new file mode 100644 index 0000000..6682853 --- /dev/null +++ b/smart-notes/rag_mvp/embeddings/chunker.py @@ -0,0 +1,31 @@ +""" +Chunking utilities for splitting long notes into overlapping chunks. +This helps embeddings capture local context. +""" + +from typing import List + + +def chunk_text(text: str, max_length: int = 500, overlap: int = 50) -> List[str]: + if not text: + return [] + + chunks = [] + start = 0 + text = text.strip() + + while start < len(text): + end = start + max_length + chunk = text[start:end].strip() + + if chunk: + chunks.append(chunk) + + if end >= len(text): + break + + start = end - overlap + if start < 0: + start = 0 + + return chunks diff --git a/smart-notes/rag_mvp/embeddings/embedder.py b/smart-notes/rag_mvp/embeddings/embedder.py new file mode 100644 index 0000000..1f296c0 --- /dev/null +++ b/smart-notes/rag_mvp/embeddings/embedder.py @@ -0,0 +1,30 @@ +""" +Embedding wrapper for converting text chunks into vectors. +Supports pluggable embedding backends later (Ollama, OpenAI, SentenceTransformers). +""" + +from typing import List +import numpy as np + +try: + from sentence_transformers import SentenceTransformer +except ImportError: + SentenceTransformer = None + + +class Embedder: + def __init__(self, model_name: str = "all-MiniLM-L6-v2"): + if SentenceTransformer is None: + raise ImportError( + "sentence-transformers not installed. Run: pip install sentence-transformers" + ) + + self.model_name = model_name + self.model = SentenceTransformer(model_name) + + def embed(self, texts: List[str]) -> np.ndarray: + if not texts: + return np.array([]) + + embeddings = self.model.encode(texts, convert_to_numpy=True) + return embeddings diff --git a/smart-notes/rag_mvp/embeddings/indexer.py b/smart-notes/rag_mvp/embeddings/indexer.py new file mode 100644 index 0000000..d1dc7d6 --- /dev/null +++ b/smart-notes/rag_mvp/embeddings/indexer.py @@ -0,0 +1,41 @@ +""" +Simple vector indexer using FAISS for similarity search. +""" + +from typing import List +import numpy as np + +try: + import faiss +except ImportError: + faiss = None + + +class VectorIndexer: + def __init__(self, dim: int): + if faiss is None: + raise ImportError("faiss not installed. Run: pip install faiss-cpu") + + self.dim = dim + self.index = faiss.IndexFlatL2(dim) + self.texts: List[str] = [] + + def add(self, embeddings: np.ndarray, chunks: List[str]): + if len(embeddings) == 0: + return + + self.index.add(embeddings) + self.texts.extend(chunks) + + def search(self, query_embedding: np.ndarray, k: int = 3): + if self.index.ntotal == 0: + return [] + + distances, indices = self.index.search(query_embedding.reshape(1, -1), k) + results = [] + + for idx in indices[0]: + if idx < len(self.texts): + results.append(self.texts[idx]) + + return results diff --git a/smart-notes/rag_mvp/pipelines/__init__.py b/smart-notes/rag_mvp/pipelines/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/smart-notes/rag_mvp/pipelines/__pycache__/__init__.cpython-313.pyc b/smart-notes/rag_mvp/pipelines/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..2915ec3 Binary files /dev/null and b/smart-notes/rag_mvp/pipelines/__pycache__/__init__.cpython-313.pyc differ diff --git a/smart-notes/rag_mvp/pipelines/__pycache__/embedding_pipeline.cpython-313.pyc b/smart-notes/rag_mvp/pipelines/__pycache__/embedding_pipeline.cpython-313.pyc new file mode 100644 index 0000000..480e783 Binary files /dev/null and b/smart-notes/rag_mvp/pipelines/__pycache__/embedding_pipeline.cpython-313.pyc differ diff --git a/smart-notes/rag_mvp/pipelines/embedding_pipeline.py b/smart-notes/rag_mvp/pipelines/embedding_pipeline.py new file mode 100644 index 0000000..2163bcd --- /dev/null +++ b/smart-notes/rag_mvp/pipelines/embedding_pipeline.py @@ -0,0 +1,47 @@ +# rag_mvp/pipelines/embedding_pipeline.py + +from sentence_transformers import SentenceTransformer +import faiss +import numpy as np + + +class EmbeddingPipeline: + def __init__(self, model_name="all-MiniLM-L6-v2"): + self.model = SentenceTransformer(model_name, cache_folder="D:/models_cache") + self.index = None + self.chunks = [] + + def chunk_text(self, text, max_length=300, overlap=50): + chunks = [] + start = 0 + + while start < len(text): + end = start + max_length + chunk = text[start:end] + chunks.append(chunk) + start = end - overlap + + return chunks + + def build_index(self, chunks): + embeddings = self.model.encode(chunks) + embeddings = np.array(embeddings).astype("float32") + + dim = embeddings.shape[1] + self.index = faiss.IndexFlatL2(dim) + self.index.add(embeddings) + + return embeddings + + def process_notes(self, text): + self.chunks = self.chunk_text(text) + embeddings = self.build_index(self.chunks) + return self.chunks, embeddings + + def semantic_search(self, query, top_k=3): + query_vec = self.model.encode([query]) + query_vec = np.array(query_vec).astype("float32") + + distances, indices = self.index.search(query_vec, top_k) + results = [self.chunks[i] for i in indices[0]] + return results diff --git a/smart-notes/rag_mvp/qa_cli.py b/smart-notes/rag_mvp/qa_cli.py new file mode 100644 index 0000000..4b3f19d --- /dev/null +++ b/smart-notes/rag_mvp/qa_cli.py @@ -0,0 +1,109 @@ +import os +import re + +#-------------------emedding-pipeline-chunking concept +from rag_mvp.pipelines.embedding_pipeline import EmbeddingPipeline + +def demo_embeddings_pipeline(): + pipeline = EmbeddingPipeline() + + note_text = """ + Python is a programming language. + It is widely used in AI and machine learning projects. + Smart Notes helps users organize knowledge using embeddings. + """ + + chunks, embeddings = pipeline.process_notes(note_text) + + print("\n--- Chunks Created ---") + for i, c in enumerate(chunks): + print(f"[{i}] {c}") + + query = "What is Python used for?" + results = pipeline.semantic_search(query) + + print("\n--- Search Results ---") + for r in results: + print("-", r) +#------------------------------------------------- + + + + +QUESTION_WORDS = { + "what", "where", "who", "when", "which", + "is", "are", "was", "were", "the", "a", "an", + "of", "to", "in", "on", "for" +} + +NOTES_DIR = "notes" + + +def load_notes(): + notes = [] + if not os.path.exists(NOTES_DIR): + print(f"Notes directory '{NOTES_DIR}' not found.") + return notes + + for file in os.listdir(NOTES_DIR): + if file.endswith(".md"): + path = os.path.join(NOTES_DIR, file) + with open(path, "r", encoding="utf-8") as f: + notes.append({ + "filename": file, + "content": f.read() + }) + return notes + + +def split_sentences(text): + return re.split(r'(?<=[.!?])\s+', text) + + +def search_notes(query, notes): + results = [] + + query_words = [ + word.lower() + for word in query.split() + if word.lower() not in QUESTION_WORDS + ] + + for note in notes: + sentences = split_sentences(note["content"]) + for sentence in sentences: + sentence_lower = sentence.lower() + if any(word in sentence_lower for word in query_words): + results.append({ + "filename": note["filename"], + "sentence": sentence.strip() + }) + + return results + + +if __name__ == "__main__": + + demo_embeddings_pipeline() # Temporary demo for embeddings pipeline + + notes = load_notes() + + print("Ask questions about your notes (type 'exit' to quit)\n") + + while True: + query = input(">> ").strip() + + if query.lower() == "exit": + print("Goodbye 👋") + break + + matches = search_notes(query, notes) + + if not matches: + print("No relevant notes found.\n") + else: + print("\n--- Answers ---\n") + for i, m in enumerate(matches, 1): + print(f"[{i}] From {m['filename']}:") + print(m["sentence"]) + print()