|
78 | 78 | }, |
79 | 79 | { |
80 | 80 | "cell_type": "code", |
81 | | - "execution_count": 3, |
| 81 | + "execution_count": 39, |
82 | 82 | "id": "4995a54f311c4b1c", |
83 | 83 | "metadata": {}, |
84 | 84 | "outputs": [], |
|
100 | 100 | }, |
101 | 101 | { |
102 | 102 | "cell_type": "code", |
103 | | - "execution_count": 4, |
| 103 | + "execution_count": 40, |
104 | 104 | "id": "bdddb9f0a005b74d", |
105 | 105 | "metadata": {}, |
106 | 106 | "outputs": [], |
|
113 | 113 | "# Define your embedding model.\n", |
114 | 114 | "text_embed = EmbeddingFunction(\"openai/text-embedding-3-small\")\n", |
115 | 115 | "\n", |
116 | | - "class Chunk(TiDBModel, table=True):\n", |
| 116 | + "class Chunk(TiDBModel, table=True): \n", |
117 | 117 | " __tablename__ = \"chunks\"\n", |
| 118 | + " __table_args__ = {'extend_existing': True}\n", |
| 119 | + "\n", |
118 | 120 | " id: int = Field(primary_key=True)\n", |
119 | 121 | " text: str = Field()\n", |
120 | | - " text_vec: Optional[Any] = text_embed.VectorField(source_field=\"text\")\n", |
| 122 | + " text_vec: Optional[Any] = text_embed.VectorField(source_field=\"text\") # 👈 Define the vector field.\n", |
121 | 123 | " user_id: int = Field()\n", |
122 | 124 | "\n", |
123 | 125 | "table = db.create_table(schema=Chunk)" |
|
128 | 130 | "id": "3eab5d6eaaaaa868", |
129 | 131 | "metadata": {}, |
130 | 132 | "source": [ |
131 | | - "### Insert Data" |
| 133 | + "### Insert Data\n", |
| 134 | + "\n", |
| 135 | + "🔢 Auto embedding: when you insert new data, the SDK automatically embeds the corpus for you." |
132 | 136 | ] |
133 | 137 | }, |
134 | 138 | { |
135 | 139 | "cell_type": "code", |
136 | | - "execution_count": 5, |
| 140 | + "execution_count": 46, |
137 | 141 | "id": "baec9a5ae06231be", |
138 | 142 | "metadata": {}, |
139 | 143 | "outputs": [ |
140 | 144 | { |
141 | 145 | "data": { |
142 | 146 | "text/plain": [ |
143 | | - "[Chunk(user_id=2, id=2, text='A quick brown dog runs in the park', text_vec=array([-0.0412815 , -0.00934362, 0.01239674, ..., -0.00587278,\n", |
144 | | - " -0.00735941, 0.01383422], dtype=float32)),\n", |
145 | | - " Chunk(user_id=2, id=3, text='The lazy fox sleeps under the tree', text_vec=array([-0.01610469, -0.00269681, -0.01787939, ..., -0.00041015,\n", |
146 | | - " 0.01320426, 0.02987844], dtype=float32)),\n", |
147 | | - " Chunk(user_id=3, id=4, text='A dog and a fox play in the park', text_vec=array([-2.7123539e-02, -4.4581316e-02, -3.8457386e-02, ...,\n", |
148 | | - " -1.1360981e-03, 9.5597192e-05, 3.4092940e-02], dtype=float32))]" |
| 147 | + "4" |
149 | 148 | ] |
150 | 149 | }, |
151 | | - "execution_count": 5, |
| 150 | + "execution_count": 46, |
152 | 151 | "metadata": {}, |
153 | 152 | "output_type": "execute_result" |
154 | 153 | } |
|
159 | 158 | " Chunk(id=2, text=\"A quick brown dog runs in the park\", user_id=2),\n", |
160 | 159 | " Chunk(id=3, text=\"The lazy fox sleeps under the tree\", user_id=2),\n", |
161 | 160 | " Chunk(id=4, text=\"A dog and a fox play in the park\", user_id=3)\n", |
162 | | - "])" |
| 161 | + "])\n", |
| 162 | + "table.rows()" |
163 | 163 | ] |
164 | 164 | }, |
165 | 165 | { |
|
172 | 172 | }, |
173 | 173 | { |
174 | 174 | "cell_type": "code", |
175 | | - "execution_count": 8, |
| 175 | + "execution_count": 53, |
176 | 176 | "id": "3c4313022f06bd3e", |
177 | 177 | "metadata": {}, |
178 | 178 | "outputs": [ |
179 | 179 | { |
180 | 180 | "data": { |
181 | 181 | "text/plain": [ |
182 | | - "[(4, 'A dog and a fox play in the park', 0.7308190419242949),\n", |
183 | | - " (2, 'A quick brown dog runs in the park', 0.665493189763966),\n", |
184 | | - " (1, 'The quick brown fox jumps over the lazy dog', 0.6157064668170177)]" |
| 182 | + "[('A quick brown dog runs in the park', 0.665493189763966),\n", |
| 183 | + " ('The lazy fox sleeps under the tree', 0.554631888866523)]" |
185 | 184 | ] |
186 | 185 | }, |
187 | | - "execution_count": 8, |
| 186 | + "execution_count": 53, |
188 | 187 | "metadata": {}, |
189 | 188 | "output_type": "execute_result" |
190 | 189 | } |
191 | 190 | ], |
192 | 191 | "source": [ |
193 | | - "chunks = table.search(\"A quick fox in the park\").limit(3).to_pydantic()\n", |
194 | | - "[\n", |
195 | | - " (c.id, c.text, c.score)\n", |
196 | | - " for c in chunks\n", |
197 | | - "]" |
| 192 | + "from autoflow.storage.tidb import DistanceMetric\n", |
| 193 | + "\n", |
| 194 | + "chunks = (\n", |
| 195 | + " table.search(\"A quick fox in the park\") # 👈 The query will be embedding automatically.\n", |
| 196 | + " # .distance_metric(metric=DistanceMetric.COSINE)\n", |
| 197 | + " # .num_candidate(20)\n", |
| 198 | + " .filter({\n", |
| 199 | + " \"user_id\": 2\n", |
| 200 | + " })\n", |
| 201 | + " .limit(2)\n", |
| 202 | + " .to_pydantic()\n", |
| 203 | + ")\n", |
| 204 | + "[(c.text, c.score) for c in chunks]" |
198 | 205 | ] |
199 | 206 | }, |
200 | 207 | { |
|
221 | 228 | }, |
222 | 229 | { |
223 | 230 | "cell_type": "code", |
224 | | - "execution_count": 11, |
| 231 | + "execution_count": 49, |
225 | 232 | "id": "ace02b45", |
226 | 233 | "metadata": {}, |
227 | 234 | "outputs": [ |
|
231 | 238 | "[(1, 'The quick brown fox jumps over the lazy dog', 1)]" |
232 | 239 | ] |
233 | 240 | }, |
234 | | - "execution_count": 11, |
| 241 | + "execution_count": 49, |
235 | 242 | "metadata": {}, |
236 | 243 | "output_type": "execute_result" |
237 | 244 | } |
|
249 | 256 | "id": "af9c3428", |
250 | 257 | "metadata": {}, |
251 | 258 | "source": [ |
252 | | - "### Truncate table" |
| 259 | + "### Truncate table\n", |
| 260 | + "\n", |
| 261 | + "Clear all data in the table:" |
253 | 262 | ] |
254 | 263 | }, |
255 | 264 | { |
256 | 265 | "cell_type": "code", |
257 | | - "execution_count": 12, |
| 266 | + "execution_count": 45, |
258 | 267 | "id": "cceb0bf0", |
259 | 268 | "metadata": {}, |
260 | | - "outputs": [], |
| 269 | + "outputs": [ |
| 270 | + { |
| 271 | + "data": { |
| 272 | + "text/plain": [ |
| 273 | + "0" |
| 274 | + ] |
| 275 | + }, |
| 276 | + "execution_count": 45, |
| 277 | + "metadata": {}, |
| 278 | + "output_type": "execute_result" |
| 279 | + } |
| 280 | + ], |
261 | 281 | "source": [ |
262 | | - "table.truncate()" |
| 282 | + "table.truncate()\n", |
| 283 | + "table.rows()" |
263 | 284 | ] |
264 | 285 | } |
265 | 286 | ], |
266 | | - "metadata": { |
267 | | - "kernelspec": { |
268 | | - "display_name": "Python 3", |
269 | | - "language": "python", |
270 | | - "name": "python3" |
271 | | - }, |
272 | | - "language_info": { |
273 | | - "codemirror_mode": { |
274 | | - "name": "ipython", |
275 | | - "version": 3 |
276 | | - }, |
277 | | - "file_extension": ".py", |
278 | | - "mimetype": "text/x-python", |
279 | | - "name": "python", |
280 | | - "nbconvert_exporter": "python", |
281 | | - "pygments_lexer": "ipython3", |
282 | | - "version": "3.11.9" |
283 | | - } |
284 | | - }, |
| 287 | + "metadata": {}, |
285 | 288 | "nbformat": 5, |
286 | 289 | "nbformat_minor": 9 |
287 | 290 | } |
0 commit comments