Skip to content

Commit 77f95f2

Browse files
committed
fix
1 parent 760b74e commit 77f95f2

File tree

1 file changed

+52
-49
lines changed

1 file changed

+52
-49
lines changed

core/tests/tidb-client-quickstart.ipynb

Lines changed: 52 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878
},
7979
{
8080
"cell_type": "code",
81-
"execution_count": 3,
81+
"execution_count": 39,
8282
"id": "4995a54f311c4b1c",
8383
"metadata": {},
8484
"outputs": [],
@@ -100,7 +100,7 @@
100100
},
101101
{
102102
"cell_type": "code",
103-
"execution_count": 4,
103+
"execution_count": 40,
104104
"id": "bdddb9f0a005b74d",
105105
"metadata": {},
106106
"outputs": [],
@@ -113,11 +113,13 @@
113113
"# Define your embedding model.\n",
114114
"text_embed = EmbeddingFunction(\"openai/text-embedding-3-small\")\n",
115115
"\n",
116-
"class Chunk(TiDBModel, table=True):\n",
116+
"class Chunk(TiDBModel, table=True): \n",
117117
" __tablename__ = \"chunks\"\n",
118+
" __table_args__ = {'extend_existing': True}\n",
119+
"\n",
118120
" id: int = Field(primary_key=True)\n",
119121
" text: str = Field()\n",
120-
" text_vec: Optional[Any] = text_embed.VectorField(source_field=\"text\")\n",
122+
" text_vec: Optional[Any] = text_embed.VectorField(source_field=\"text\") # 👈 Define the vector field.\n",
121123
" user_id: int = Field()\n",
122124
"\n",
123125
"table = db.create_table(schema=Chunk)"
@@ -128,27 +130,24 @@
128130
"id": "3eab5d6eaaaaa868",
129131
"metadata": {},
130132
"source": [
131-
"### Insert Data"
133+
"### Insert Data\n",
134+
"\n",
135+
"🔢 Auto embedding: when you insert new data, the SDK automatically embeds the corpus for you."
132136
]
133137
},
134138
{
135139
"cell_type": "code",
136-
"execution_count": 5,
140+
"execution_count": 46,
137141
"id": "baec9a5ae06231be",
138142
"metadata": {},
139143
"outputs": [
140144
{
141145
"data": {
142146
"text/plain": [
143-
"[Chunk(user_id=2, id=2, text='A quick brown dog runs in the park', text_vec=array([-0.0412815 , -0.00934362, 0.01239674, ..., -0.00587278,\n",
144-
" -0.00735941, 0.01383422], dtype=float32)),\n",
145-
" Chunk(user_id=2, id=3, text='The lazy fox sleeps under the tree', text_vec=array([-0.01610469, -0.00269681, -0.01787939, ..., -0.00041015,\n",
146-
" 0.01320426, 0.02987844], dtype=float32)),\n",
147-
" Chunk(user_id=3, id=4, text='A dog and a fox play in the park', text_vec=array([-2.7123539e-02, -4.4581316e-02, -3.8457386e-02, ...,\n",
148-
" -1.1360981e-03, 9.5597192e-05, 3.4092940e-02], dtype=float32))]"
147+
"4"
149148
]
150149
},
151-
"execution_count": 5,
150+
"execution_count": 46,
152151
"metadata": {},
153152
"output_type": "execute_result"
154153
}
@@ -159,7 +158,8 @@
159158
" Chunk(id=2, text=\"A quick brown dog runs in the park\", user_id=2),\n",
160159
" Chunk(id=3, text=\"The lazy fox sleeps under the tree\", user_id=2),\n",
161160
" Chunk(id=4, text=\"A dog and a fox play in the park\", user_id=3)\n",
162-
"])"
161+
"])\n",
162+
"table.rows()"
163163
]
164164
},
165165
{
@@ -172,29 +172,36 @@
172172
},
173173
{
174174
"cell_type": "code",
175-
"execution_count": 8,
175+
"execution_count": 53,
176176
"id": "3c4313022f06bd3e",
177177
"metadata": {},
178178
"outputs": [
179179
{
180180
"data": {
181181
"text/plain": [
182-
"[(4, 'A dog and a fox play in the park', 0.7308190419242949),\n",
183-
" (2, 'A quick brown dog runs in the park', 0.665493189763966),\n",
184-
" (1, 'The quick brown fox jumps over the lazy dog', 0.6157064668170177)]"
182+
"[('A quick brown dog runs in the park', 0.665493189763966),\n",
183+
" ('The lazy fox sleeps under the tree', 0.554631888866523)]"
185184
]
186185
},
187-
"execution_count": 8,
186+
"execution_count": 53,
188187
"metadata": {},
189188
"output_type": "execute_result"
190189
}
191190
],
192191
"source": [
193-
"chunks = table.search(\"A quick fox in the park\").limit(3).to_pydantic()\n",
194-
"[\n",
195-
" (c.id, c.text, c.score)\n",
196-
" for c in chunks\n",
197-
"]"
192+
"from autoflow.storage.tidb import DistanceMetric\n",
193+
"\n",
194+
"chunks = (\n",
195+
" table.search(\"A quick fox in the park\") # 👈 The query will be embedding automatically.\n",
196+
" # .distance_metric(metric=DistanceMetric.COSINE)\n",
197+
" # .num_candidate(20)\n",
198+
" .filter({\n",
199+
" \"user_id\": 2\n",
200+
" })\n",
201+
" .limit(2)\n",
202+
" .to_pydantic()\n",
203+
")\n",
204+
"[(c.text, c.score) for c in chunks]"
198205
]
199206
},
200207
{
@@ -221,7 +228,7 @@
221228
},
222229
{
223230
"cell_type": "code",
224-
"execution_count": 11,
231+
"execution_count": 49,
225232
"id": "ace02b45",
226233
"metadata": {},
227234
"outputs": [
@@ -231,7 +238,7 @@
231238
"[(1, 'The quick brown fox jumps over the lazy dog', 1)]"
232239
]
233240
},
234-
"execution_count": 11,
241+
"execution_count": 49,
235242
"metadata": {},
236243
"output_type": "execute_result"
237244
}
@@ -249,39 +256,35 @@
249256
"id": "af9c3428",
250257
"metadata": {},
251258
"source": [
252-
"### Truncate table"
259+
"### Truncate table\n",
260+
"\n",
261+
"Clear all data in the table:"
253262
]
254263
},
255264
{
256265
"cell_type": "code",
257-
"execution_count": 12,
266+
"execution_count": 45,
258267
"id": "cceb0bf0",
259268
"metadata": {},
260-
"outputs": [],
269+
"outputs": [
270+
{
271+
"data": {
272+
"text/plain": [
273+
"0"
274+
]
275+
},
276+
"execution_count": 45,
277+
"metadata": {},
278+
"output_type": "execute_result"
279+
}
280+
],
261281
"source": [
262-
"table.truncate()"
282+
"table.truncate()\n",
283+
"table.rows()"
263284
]
264285
}
265286
],
266-
"metadata": {
267-
"kernelspec": {
268-
"display_name": "Python 3",
269-
"language": "python",
270-
"name": "python3"
271-
},
272-
"language_info": {
273-
"codemirror_mode": {
274-
"name": "ipython",
275-
"version": 3
276-
},
277-
"file_extension": ".py",
278-
"mimetype": "text/x-python",
279-
"name": "python",
280-
"nbconvert_exporter": "python",
281-
"pygments_lexer": "ipython3",
282-
"version": "3.11.9"
283-
}
284-
},
287+
"metadata": {},
285288
"nbformat": 5,
286289
"nbformat_minor": 9
287290
}

0 commit comments

Comments
 (0)