From 564741c74ff1601740df8e84eb0d07e5fd7974f7 Mon Sep 17 00:00:00 2001
From: Tuba Javed <javedtuba1@gmail.com@example.com>
Date: Mon, 9 Feb 2026 00:18:11 +0530
Subject: [PATCH 1/2] Add local Q&A CLI MVP for markdown notes

---
 .gitignore                    |  1 +
 smart-notes/rag_mvp/README.md | 42 +++++++++++++++++++
 smart-notes/rag_mvp/qa_cli.py | 77 +++++++++++++++++++++++++++++++++++
 3 files changed, 120 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 smart-notes/rag_mvp/README.md
 create mode 100644 smart-notes/rag_mvp/qa_cli.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e61812f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+notes/
diff --git a/smart-notes/rag_mvp/README.md b/smart-notes/rag_mvp/README.md
new file mode 100644
index 0000000..2ce2968
--- /dev/null
+++ b/smart-notes/rag_mvp/README.md
@@ -0,0 +1,42 @@
+# Smart Notes – Local Q&A (RAG MVP)
+
+This is a minimal, local-first MVP that allows users to ask natural-language questions over their markdown notes.
+
+## Features (Current MVP)
+
+- Loads markdown files from a local `notes/` directory
+- Supports natural-language questions (e.g., "what is AI", "where is AI used")
+- Returns sentence-level answers from notes
+- Shows the source note filename
+- Interactive CLI loop (type `exit` to quit)
+
+This is a starter implementation intended to be extended with embeddings and vector search in future iterations.
+
+---
+
+## How it works
+
+1. Notes are loaded from the local `notes/` directory.
+2. Question words (what, where, who, when, etc.) are filtered.
+3. Notes are split into sentences.
+4. Relevant sentences are returned based on keyword matching.
+
+---
+
+## How to run
+
+```bash
+python smart-notes/rag_mvp/qa_cli.py
+
+
+
+>> what is AI
+
+[1] From test.md:
+Artificial Intelligence (AI) is the simulation of human intelligence in machines.
+
+
+>>  what is machine learning
+how is machine learning used
+difference between AI and ML
+
diff --git a/smart-notes/rag_mvp/qa_cli.py b/smart-notes/rag_mvp/qa_cli.py
new file mode 100644
index 0000000..210d56a
--- /dev/null
+++ b/smart-notes/rag_mvp/qa_cli.py
@@ -0,0 +1,77 @@
+import os
+import re
+
+QUESTION_WORDS = {
+    "what", "where", "who", "when", "which",
+    "is", "are", "was", "were", "the", "a", "an",
+    "of", "to", "in", "on", "for"
+}
+
+NOTES_DIR = "notes"
+
+
+def load_notes():
+    notes = []
+    if not os.path.exists(NOTES_DIR):
+        print(f"Notes directory '{NOTES_DIR}' not found.")
+        return notes
+
+    for file in os.listdir(NOTES_DIR):
+        if file.endswith(".md"):
+            path = os.path.join(NOTES_DIR, file)
+            with open(path, "r", encoding="utf-8") as f:
+                notes.append({
+                    "filename": file,
+                    "content": f.read()
+                })
+    return notes
+
+
+def split_sentences(text):
+    return re.split(r'(?<=[.!?])\s+', text)
+
+
+def search_notes(query, notes):
+    results = []
+
+    query_words = [
+        word.lower()
+        for word in query.split()
+        if word.lower() not in QUESTION_WORDS
+    ]
+
+    for note in notes:
+        sentences = split_sentences(note["content"])
+        for sentence in sentences:
+            sentence_lower = sentence.lower()
+            if any(word in sentence_lower for word in query_words):
+                results.append({
+                    "filename": note["filename"],
+                    "sentence": sentence.strip()
+                })
+
+    return results
+
+
+if __name__ == "__main__":
+    notes = load_notes()
+
+    print("Ask questions about your notes (type 'exit' to quit)\n")
+
+    while True:
+        query = input(">> ").strip()
+
+        if query.lower() == "exit":
+            print("Goodbye 👋")
+            break
+
+        matches = search_notes(query, notes)
+
+        if not matches:
+            print("No relevant notes found.\n")
+        else:
+            print("\n--- Answers ---\n")
+            for i, m in enumerate(matches, 1):
+                print(f"[{i}] From {m['filename']}:")
+                print(m["sentence"])
+                print()

From 523813198988c614702b2a3a9afcc56855700d5a Mon Sep 17 00:00:00 2001
From: Tuba Javed <javedtuba1@gmail.com@example.com>
Date: Wed, 11 Feb 2026 01:14:57 +0530
Subject: [PATCH 2/2] embedding-pipeline-chunking

---
 smart-notes/rag_mvp/README.md                 |  42 ++++++++++++++++
 .../__pycache__/qa_cli.cpython-313.pyc        | Bin 0 -> 4226 bytes
 smart-notes/rag_mvp/embeddings/__init__.py    |   0
 smart-notes/rag_mvp/embeddings/chunker.py     |  31 ++++++++++++
 smart-notes/rag_mvp/embeddings/embedder.py    |  30 +++++++++++
 smart-notes/rag_mvp/embeddings/indexer.py     |  41 +++++++++++++++
 smart-notes/rag_mvp/pipelines/__init__.py     |   0
 .../__pycache__/__init__.cpython-313.pyc      | Bin 0 -> 203 bytes
 .../embedding_pipeline.cpython-313.pyc        | Bin 0 -> 2897 bytes
 .../rag_mvp/pipelines/embedding_pipeline.py   |  47 ++++++++++++++++++
 smart-notes/rag_mvp/qa_cli.py                 |  32 ++++++++++++
 11 files changed, 223 insertions(+)
 create mode 100644 smart-notes/rag_mvp/__pycache__/qa_cli.cpython-313.pyc
 create mode 100644 smart-notes/rag_mvp/embeddings/__init__.py
 create mode 100644 smart-notes/rag_mvp/embeddings/chunker.py
 create mode 100644 smart-notes/rag_mvp/embeddings/embedder.py
 create mode 100644 smart-notes/rag_mvp/embeddings/indexer.py
 create mode 100644 smart-notes/rag_mvp/pipelines/__init__.py
 create mode 100644 smart-notes/rag_mvp/pipelines/__pycache__/__init__.cpython-313.pyc
 create mode 100644 smart-notes/rag_mvp/pipelines/__pycache__/embedding_pipeline.cpython-313.pyc
 create mode 100644 smart-notes/rag_mvp/pipelines/embedding_pipeline.py

diff --git a/smart-notes/rag_mvp/README.md b/smart-notes/rag_mvp/README.md
index 2ce2968..fd51419 100644
--- a/smart-notes/rag_mvp/README.md
+++ b/smart-notes/rag_mvp/README.md
@@ -40,3 +40,45 @@ Artificial Intelligence (AI) is the simulation of human intelligence in machines
 how is machine learning used
 difference between AI and ML
 
+
+
+
+
+# Smart Notes – RAG MVP (Embeddings & FAISS)
+
+This project is a simple **Retrieval-Augmented Generation (RAG)** pipeline for Smart Notes.  
+It allows users to store notes, convert them into embeddings, and search relevant notes using vector similarity.
+
+---
+
+## 🚀 Features
+
+- Convert notes into embeddings using Sentence Transformers
+- Store and search embeddings using FAISS (CPU)
+- CLI tool to ask questions about your notes
+- Simple chunking for text files
+- Works fully offline after model download
+
+---
+
+## 🧠 Tech Stack
+
+- Python 3.10+
+- sentence-transformers  
+- FAISS (faiss-cpu)  
+- HuggingFace Transformers
+
+---
+
+## 📁 Project Structure
+
+```bash
+smart-notes/
+├── rag_mvp/
+│   ├── embed.py        # Embedding logic
+│   ├── index.py        # FAISS index creation
+│   ├── qa_cli.py       # CLI for asking questions
+│   └── utils.py        # Helper functions
+├── notes/              # Put your .txt notes here
+├── requirements.txt
+└── README.md
diff --git a/smart-notes/rag_mvp/__pycache__/qa_cli.cpython-313.pyc b/smart-notes/rag_mvp/__pycache__/qa_cli.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..df45ac456f685d62f0378214e1ce3c930d2f57f0
GIT binary patch
literal 4226
zcmaJ@+jA4w89#f|{eE#UB(O3HMg<87Fa%@l;7h;-%c&P7ZLOeLq_r(9?J8$i80(><
znLb1g(_ptHIMd9;naNCOpVB<!4`|;zcIATE_#w2J&g7vp#%`OWPyNnHD+wl(BkkF<
z=X~e8o$vg<Z`bX1AZV$#Z|BcB5&8%DFq^G8*j=Fzx{CxPP?r&IyeUk<H+`A@F@qT%
z3Cv!E*%L@$=P1kx9JUA+n6V00=xu@xdVAU-*!zBpkoYk|X|@NQZnEc5+S>lY&a|rs
z369eslQbUfM`<_AIL(>K7aA*_y~zuE)85HQ=SpL2t_v<h13PFyq_|C<Jer}>_8ye>
zrk$th#vbfX`wur+<;)$P_Kt}`gNL_0lMVm}zu+@D1kt4*Ayfu_IEX_AwLcy7AR&+r
z2|*5c!#H%3>)-%d!CIKCJw0n$z@zb_2syzo<G(_nb*$@aqGK{f>Hg8;bt#w2%QLU#
zOHv^(OIfmHHY*Gmcy?{z+Pi29`bINZ%W%Lll|qw*>Y0|JOm&DkrGryML!$|_xx%9<
zBEx8@)+{)otKo#wXvZ!l#e(r{iu0h8Y@5UQo`!J(CtE-haQm|@WihE`YX`s9ERRxF
zkm*H8wsovqQ#SKhz;*tHlo=GWB`cpecxbOJXtRpU=T%<hOIVq~VzCIW<O`xaQx<2W
zgh4f?5sLYIPAV+$WmU@Yd6~a3#*1=}FN)b&a5`U*L@bl7z~+XO)zrpTp(tXFPb!+E
z^0QK*q>}wu<rO?5%K3`K&&kSsLCVcYut2s;=5aO|RGxFh<8gj?wk*%7{4kb84dla=
zPEA$lH~Gq*H)cgczospQe$xsbZ0;05vFt2ADXHaxrZ#rQV;m;ut-C?ntfZ<LnP{kU
zC7hQv-7d-HqJ%+`?p39tC~Ns_Mm6}v?7FSg3^d(Nc4ai_mZn>>hJw1It-ns?bt<cK
zZ<Qsypj&Z60iD7sIbh;_qv_X1`qH^P2A?T-Aw8oiS)+^3OV{J3Gt(;ZW1Pr~4^Jeo
zPL2(wu8vJ4(_`|q(y^$fu{e_{-Ylix5;NICK2ciG_vECal4<LeG4;n`(1Qf8`Zjb`
zwB_aN-k$2EZI<=hs$<(Ow72JB_p$ZvW1rgV-F@q9SM}AeSo@kIy1~A<<?CKK^uQNc
zWNXxx&Amh|MV5j~LrZThWov~8wtY_+<nP(GAa`$dbko!I&~s?rb7-|(_r$9gTTH%i
zhm75|?9z#M-8&5Ogm*y|GytKBbV}4|QP$}^c~qTNrs09Hrs#}_C7pp&)){RUM)RTy
z-K?Us^AfDE^AHoHmt>uT9;V=x&(7+canM;J#B42y_=^D%$B>Ms%r!LG#KqPG3?^GJ
z(4L*Lm}B#zwonwjJk`3=0@bDjD4<U=COuqX6z!C3<S3JC<K2VWyUl%Y?aOfOsT4Ko
zewvzm2`&CDeZZpeeV|i|_<L;xD0#NEWdkra{UXy-<gARAq6Jpq1j|{5Mlu6H7BpAd
z?-Z3{CP~~q(@r<P1#6-OqL5*YpD7C<Y`y_u#8VM;6t!%Elbfb_bO7=28-E9(^UMJ>
zO%b5;Ck^mv-7vnl;_Brg4ESlKEawsxCQ-~`qH2XJYt!+UA*t9TIjfMI5~Fq7biN?T
zVo}nqSw+?)S&KO#4pp5kiQ24gk#6NxP1WtmiL0YRW@Ky<+klsDEx<GgCA@O#d>%;I
z8w!LntCS!rEn*2?Ih_SS%f(nsEYVpa3Ma^rnpRnO6NJDaGT|sFVy*#G_$*97jz#Jp
zpsS)ym!~?q<#gXUvv}stz~aCMv4_s+x-$yba?Ka{IJUa){*^V)*$rFYrZ-e`edP<>
zbNt%z!Py7C7i;XM(|hOq;`x>EL+63I^T1XxeDBQinS1@q{U6)!pQ;Cs*G9Jd;icTl
z@crJu`d|7~-1LR-xt3j<e01{w|KZK|Z~pGq6OP#*_%~ww!EYE8IJxeB>4_Ej4{iI9
z%U`|njT7b`_+R>;>H>BD6g@Z`=KdUZ3`gx*IJQ<J<`PWaJq{gY>ej;dg(s9Tkh9Ie
zh@u#sthf&k_Me|hL<ZlCrPU*Fax}U2YN?Rd;7C~l(<%|zbg0)@0azzz3ON7|z}h)@
zRN`+yLvZNJ@4UPgimvAB&LeB=5ez9EWjg_(1syeDe9d+j0eH|5_?>pa9@5MJ52Du@
zl|F`48ZK@eE-nKXg4q2`GwG*@UyODRxaP>^g;MZ`_q;}rA8T9`nFan7NSXslTt}tC
z2~=mf1$Bo7sICH)HD&!FeTud=n5Ar|D1qLKZrD<6uw|hMOs&gFbOwD=^duD9&$bjN
zFhmoO`aSSvdO=`2VPX9BhIQk`CLGXF@Fx$C%uJ&gm&8ylqNk#Zc=#kdyJTg6I*53P
zu@nNE0h$&6RhpdqO>~z7_y8?QsX2qrFaRM`^7kml=yc*ZBoqbbbdMP_nSuh@MkTCy
zzS_8^_Wl`3mTr~sfPSD+F*NRMqQ9VI#ezDJXc1h2eMYQ(`!$5yT43n2x%;`5fd|3Z
zXLH{fxBr*^5QUEbS&TMby3G_Fv*Lc3*EvH4-Sd;ws6gt0%o`JvBZ3jikR#;<OdJT8
zK*RY`%#I0E8&HD-WHd|`oko3<A*KPQk@TqBO))CINa#kOd#Y%8rD3eA(?UD}RD*=_
zbLhSWSn~L)7q@LFy!YPa<;%Yv-{sh#<?o?LZG>P>-(uhG{tte!?u_qp$R4h-cN~k3
zcU_M{k!=ofzHJ1=yDsGKs<B(4gR3L=yFZ$!hfdY(U+jt0E^WT>;)mDXzgB-CUQ2Fy
z0~>*%KhD(yLl3<}b??xo@40)PWe*_K>d)8t<D2~9Pdp!a>iluQDL(LDf>XOz)ZO!h
z1NPe?<O)`k-|a*|ph|M?d(RIIvY&^b(`}hdQOwJkOy%?ib&dy5sajr<p<2GKlr?@q
zDPxjJpte1%EtDj_7m|V2%WDdd0_r#_%)kJzvt+hR!0)1><gPDB{Pu^-w=3OAg~w7s
zx(W4VlS?BbIVygm=Di@RB)&+=>iC*K3MLn&Vttc;6)Q!)5fW6NaYnH)1~5UGgs~oN
z7J&(~(o++y@~>U}k+?NVKs-i91a^(gVnmN|n#6vR`7)VC&=`r0%gd#*hA$IxBc3k7
zNVgV6EjtS+N-7;piU*x4;tS-1pk^apZ2d(gS1w2c_-8OjEK>gk-8Mr})ECGO|NmGJ
zHB9ZWA*yTV6dRzc9;iayp{jky;-UOIJrs3x#}=f5+eZ=Ws#WSt_ha_?4R-%ySJ&+;
z)zQbKIvv_zhre`q9{Ivc(r;W3eLd^Gp1Lnu<GytI?wngZcl-Rxt9573BbR?EvEhnr
zy8Mp=`&Y`VQay0AX8otP3s%+wiNCpyJtl+R4cFmEdm^j7_fI_76JKVb(mesZSE!Z9
zN^s@k>V?%CAC0dLei~WpI<?_DO~i^Tj^*NpFAC%LtdI)36HoRbr+3?r*u923{txsd
Bk6Zu%

literal 0
HcmV?d00001

diff --git a/smart-notes/rag_mvp/embeddings/__init__.py b/smart-notes/rag_mvp/embeddings/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/smart-notes/rag_mvp/embeddings/chunker.py b/smart-notes/rag_mvp/embeddings/chunker.py
new file mode 100644
index 0000000..6682853
--- /dev/null
+++ b/smart-notes/rag_mvp/embeddings/chunker.py
@@ -0,0 +1,31 @@
+"""
+Chunking utilities for splitting long notes into overlapping chunks.
+This helps embeddings capture local context.
+"""
+
+from typing import List
+
+
+def chunk_text(text: str, max_length: int = 500, overlap: int = 50) -> List[str]:
+    if not text:
+        return []
+
+    chunks = []
+    start = 0
+    text = text.strip()
+
+    while start < len(text):
+        end = start + max_length
+        chunk = text[start:end].strip()
+
+        if chunk:
+            chunks.append(chunk)
+
+        if end >= len(text):
+            break
+
+        start = end - overlap
+        if start < 0:
+            start = 0
+
+    return chunks
diff --git a/smart-notes/rag_mvp/embeddings/embedder.py b/smart-notes/rag_mvp/embeddings/embedder.py
new file mode 100644
index 0000000..1f296c0
--- /dev/null
+++ b/smart-notes/rag_mvp/embeddings/embedder.py
@@ -0,0 +1,30 @@
+"""
+Embedding wrapper for converting text chunks into vectors.
+Supports pluggable embedding backends later (Ollama, OpenAI, SentenceTransformers).
+"""
+
+from typing import List
+import numpy as np
+
+try:
+    from sentence_transformers import SentenceTransformer
+except ImportError:
+    SentenceTransformer = None
+
+
+class Embedder:
+    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
+        if SentenceTransformer is None:
+            raise ImportError(
+                "sentence-transformers not installed. Run: pip install sentence-transformers"
+            )
+
+        self.model_name = model_name
+        self.model = SentenceTransformer(model_name)
+
+    def embed(self, texts: List[str]) -> np.ndarray:
+        if not texts:
+            return np.array([])
+
+        embeddings = self.model.encode(texts, convert_to_numpy=True)
+        return embeddings
diff --git a/smart-notes/rag_mvp/embeddings/indexer.py b/smart-notes/rag_mvp/embeddings/indexer.py
new file mode 100644
index 0000000..d1dc7d6
--- /dev/null
+++ b/smart-notes/rag_mvp/embeddings/indexer.py
@@ -0,0 +1,41 @@
+"""
+Simple vector indexer using FAISS for similarity search.
+"""
+
+from typing import List
+import numpy as np
+
+try:
+    import faiss
+except ImportError:
+    faiss = None
+
+
+class VectorIndexer:
+    def __init__(self, dim: int):
+        if faiss is None:
+            raise ImportError("faiss not installed. Run: pip install faiss-cpu")
+
+        self.dim = dim
+        self.index = faiss.IndexFlatL2(dim)
+        self.texts: List[str] = []
+
+    def add(self, embeddings: np.ndarray, chunks: List[str]):
+        if len(embeddings) == 0:
+            return
+
+        self.index.add(embeddings)
+        self.texts.extend(chunks)
+
+    def search(self, query_embedding: np.ndarray, k: int = 3):
+        if self.index.ntotal == 0:
+            return []
+
+        distances, indices = self.index.search(query_embedding.reshape(1, -1), k)
+        results = []
+
+        for idx in indices[0]:
+            if idx < len(self.texts):
+                results.append(self.texts[idx])
+
+        return results
diff --git a/smart-notes/rag_mvp/pipelines/__init__.py b/smart-notes/rag_mvp/pipelines/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/smart-notes/rag_mvp/pipelines/__pycache__/__init__.cpython-313.pyc b/smart-notes/rag_mvp/pipelines/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2915ec340c53b3d79803691f9787f54060cc7779
GIT binary patch
literal 203
zcmey&%ge<81h*QxGePuY5CH>>P{wB#AY&>+I)f&o-%5reCLr%KNa|LWi&acYW>IQ#
zNq$jfOnPyCGMLaUPfgM-D9X<QN)*Qw=Oz}F=;q~@r0P2R`-KE~I)#RK`uoLr=B4E$
zNfyTxC8o#cmKDSlWEQ06Wagz7$Hd2H=4F<|$LkeT-r}&y%}*)KNwq6t1zHVqS}}<6
Nk(rT^v4|PS0stk}ISc>*

literal 0
HcmV?d00001

diff --git a/smart-notes/rag_mvp/pipelines/__pycache__/embedding_pipeline.cpython-313.pyc b/smart-notes/rag_mvp/pipelines/__pycache__/embedding_pipeline.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..480e783f13ab60d9ccf36eeaa3b40c9da0a6e708
GIT binary patch
literal 2897
zcmb7G&2Jk;6rbJouI-Iur-?TohO}-;QWkImp)E8;6_ge=p-I78qtHfLw)V!^)?T|a
z>(G=-xuC58wJEI<PJ|O5C;kQggBXKi4Lwxilv~w;5I5e8KawT|#FO^zn|bfe?EK!x
z&ZEvw6#;$hf9iZKBlH^^v?$ON9vuSVE;5iIyp5&=EF_sMzAa*rza<>#M+cB0C6Ey~
zEH=EboELR@lMQr%_87M7*>2W8hb@=PmvGU>Gueh^gE!sKqvJ5Xi{=m(3<MhpSTe$f
zm_U~V957V22}rULNFb~jGVq|$VJLhyWCU?IAJQXq&#B@?JC}3Z{5wwBE;z2e8G?1Q
ztzFoja9kw8)7<fbz$O`G1Vb>yqhbaX+H(eiQQLcrjhHW|Jkq6^Yq66PW5rU=E)X+o
zWfyEXb%YD%e5sJLu^zz8gUZ~9%8r|}uTmwuP;oC4U8DhG7v`zTwWezoZNj!oCaHIF
zVlL-kJL{G3(p;XDvYe$pv@fR0xO52=WR4Uq?4{h2XQ$pgJ992Qc_MRe^6bpqq&r{Q
zrX+LN%A3W-@?5!z3z@T9P?@c{(ee@vnWp19o@vs=E-Xh|x-fhp*$3=8x~Dz+ZDRd|
zQPs}*fzQ;sHo#xK{eIx{jz?gEvaNh#1159ueGu-VG2{je!Id-e_Anf7!avcCkTr_X
zY&r$jt}?4-lmK#rnLxT71W+_76Kwe<(QGya`Tei!0?N~{b|x@{ba4hV&><R9!FH))
zmCLr9(?d7{vozq@S3QW3b=3rI-dmu-(xQzERvC9Ny-Yj^0+qmp%8j_Pq)RplN12hz
z6((Qm>D|a~X_+;77ua=F-_zqO^{zd>d_UTAC%ZcIg;R^}t9SQRyNCQY>+$|oa_g*r
zsvhaSIelY#b@=B<vL5eSp7u{Y2%}iM;qgm!--8J1>h(ME9$59UQeY_ZXmkPMcOkjc
zFtL-Kh_%qd+7&Of<C|7?`XcBL8L?%XW_h!NjBP7r#5C_pkDC}Gt5{dMz=FXk%`n<Q
zu#eQ*2a1#hfD@dlt0ix}(>+^qOWOL*Bz!Yug)~Eh^M#V-y*$3jGE!GDI{;Onp+Uh@
z(JfQi!q{4Zj3M4q*@jeDu-I$fatNUvldRU?Dp=l`am-FeB`cSMhQLnJ$Q`v^g@j5u
zr$`u>EO8oH({Zul6mn3JtY-}FPMdA&1UQmBFvuHiVCDSs`3-HPs*S8I*0fPyy{Gl9
z%-)_|y;Rfm4eiCM_Tmo%HSL(M-jDWgME6#sdsnYiqx*g7o|XW|pTLpSwC8y-haxxC
z8|uyIjp(gwKZg$7j}5Gc2L2_RuESUGY-+brm}pX_9q;XJ7NW$0yn|r2-FmlFtth+D
zsGp?Ih=*A8gc)7JPk|c;8b1dTOmO5iHv23v!jiuYb|-YztWAi?+9{#?cEkOc!4O;E
zFfahTH?dM$F0JKiy$rDq2*8FmT-Aoxa^GD3`f^Ph_tko==jKN@K3bhxkBzK{MjG&K
zZ)5HMQuKcp<5(M1C=-@Joc^I4+hy_}<tPEXPbkN9XKVef_w0D<_g{g4l}}~lM$z`5
z_Y@+IAtLsm)6jiNA3UjaGYUKUJK{7HWd}+^gLjooa0tAT@VLr6D@D8)Y#LoU#LEyT
z*~EUv4lwo%W4y=|Vp}-7z~kQP;qsM=jhCqGmCEL28s;N&(azFv&Vlv~HxxpHP-YGj
z1cwH(O)3SCklmd;M(m>HdQR4C>_hkOPLv%cI(!=`1A|nF_peMXPi@49tMTEr%38h_
zAN9lc;=L=UZ=YU$zZO5Z5g)C_N55UF#gF>o`-A(xI{)SQjlt3CAe$R}y&6jR;rjFA
zKfLt)M6LVC_Y-T@%CY5Rt5@zE|8Bw$u6G})$9nzfUk@eJ^9o@d`Nxs|6Jh|8pZ%>k
zL>#~wUAWd|6$+_o_~D<KPMtZDS{&C!=LJZ=&2cc*g-w>9y2>sE>}CR&0@XC(rcx;|
zIbxbuDpsMXiJ9iSgNes3U2e%Vag6Q83y{?wX1{R!Dln><CP2;rv^)<x7b{S0ruiM>
zCBge1W(mO=gP*WPK0&{Wl05Lcs>;0&`$KZ#VO)|A{;4W*U&D{N=nB<H^Tug<TUSoP
zuV8o>OsMQuishw7;_z+WfZ46BY3Q}a0gvO8pa3ioVXgInBnZME3KEX|iXy+F?mv}-
LLf=CKjQjo#78Piz

literal 0
HcmV?d00001

diff --git a/smart-notes/rag_mvp/pipelines/embedding_pipeline.py b/smart-notes/rag_mvp/pipelines/embedding_pipeline.py
new file mode 100644
index 0000000..2163bcd
--- /dev/null
+++ b/smart-notes/rag_mvp/pipelines/embedding_pipeline.py
@@ -0,0 +1,47 @@
+# rag_mvp/pipelines/embedding_pipeline.py
+
+from sentence_transformers import SentenceTransformer
+import faiss
+import numpy as np
+
+
+class EmbeddingPipeline:
+    def __init__(self, model_name="all-MiniLM-L6-v2"):
+        self.model = SentenceTransformer(model_name, cache_folder="D:/models_cache")
+        self.index = None
+        self.chunks = []
+
+    def chunk_text(self, text, max_length=300, overlap=50):
+        chunks = []
+        start = 0
+
+        while start < len(text):
+            end = start + max_length
+            chunk = text[start:end]
+            chunks.append(chunk)
+            start = end - overlap
+
+        return chunks
+
+    def build_index(self, chunks):
+        embeddings = self.model.encode(chunks)
+        embeddings = np.array(embeddings).astype("float32")
+
+        dim = embeddings.shape[1]
+        self.index = faiss.IndexFlatL2(dim)
+        self.index.add(embeddings)
+
+        return embeddings
+
+    def process_notes(self, text):
+        self.chunks = self.chunk_text(text)
+        embeddings = self.build_index(self.chunks)
+        return self.chunks, embeddings
+
+    def semantic_search(self, query, top_k=3):
+        query_vec = self.model.encode([query])
+        query_vec = np.array(query_vec).astype("float32")
+
+        distances, indices = self.index.search(query_vec, top_k)
+        results = [self.chunks[i] for i in indices[0]]
+        return results
diff --git a/smart-notes/rag_mvp/qa_cli.py b/smart-notes/rag_mvp/qa_cli.py
index 210d56a..4b3f19d 100644
--- a/smart-notes/rag_mvp/qa_cli.py
+++ b/smart-notes/rag_mvp/qa_cli.py
@@ -1,6 +1,35 @@
 import os
 import re
 
+#-------------------emedding-pipeline-chunking concept
+from rag_mvp.pipelines.embedding_pipeline import EmbeddingPipeline
+
+def demo_embeddings_pipeline():
+    pipeline = EmbeddingPipeline()
+
+    note_text = """
+    Python is a programming language.
+    It is widely used in AI and machine learning projects.
+    Smart Notes helps users organize knowledge using embeddings.
+    """
+
+    chunks, embeddings = pipeline.process_notes(note_text)
+
+    print("\n--- Chunks Created ---")
+    for i, c in enumerate(chunks):
+        print(f"[{i}] {c}")
+
+    query = "What is Python used for?"
+    results = pipeline.semantic_search(query)
+
+    print("\n--- Search Results ---")
+    for r in results:
+        print("-", r)
+#-------------------------------------------------
+
+
+
+
 QUESTION_WORDS = {
     "what", "where", "who", "when", "which",
     "is", "are", "was", "were", "the", "a", "an",
@@ -54,6 +83,9 @@ def search_notes(query, notes):
 
 
 if __name__ == "__main__":
+
+    demo_embeddings_pipeline()      # Temporary demo for embeddings pipeline
+
     notes = load_notes()
 
     print("Ask questions about your notes (type 'exit' to quit)\n")