Skip to content

Commit 5f8d441

Browse files
committed
Add Zilliz free tier example
Signed-off-by: christy <[email protected]>
1 parent 039b706 commit 5f8d441

File tree

1 file changed

+186
-38
lines changed

1 file changed

+186
-38
lines changed

notebooks/llms/langchain/readthedocs_rag_zilliz.ipynb

Lines changed: 186 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
},
2929
{
3030
"cell_type": "code",
31-
"execution_count": null,
31+
"execution_count": 1,
3232
"id": "d7570b2e",
3333
"metadata": {},
3434
"outputs": [],
@@ -51,7 +51,7 @@
5151
},
5252
{
5353
"cell_type": "code",
54-
"execution_count": null,
54+
"execution_count": 2,
5555
"id": "20dcdaf7",
5656
"metadata": {},
5757
"outputs": [],
@@ -93,10 +93,18 @@
9393
},
9494
{
9595
"cell_type": "code",
96-
"execution_count": null,
96+
"execution_count": 3,
9797
"id": "0806d2db",
9898
"metadata": {},
99-
"outputs": [],
99+
"outputs": [
100+
{
101+
"name": "stdout",
102+
"output_type": "stream",
103+
"text": [
104+
"Type of server: zilliz_cloud\n"
105+
]
106+
}
107+
],
100108
"source": [
101109
"from pymilvus import connections, utility\n",
102110
"\n",
@@ -134,10 +142,26 @@
134142
},
135143
{
136144
"cell_type": "code",
137-
"execution_count": null,
145+
"execution_count": 4,
138146
"id": "dd2be7fd",
139147
"metadata": {},
140-
"outputs": [],
148+
"outputs": [
149+
{
150+
"name": "stdout",
151+
"output_type": "stream",
152+
"text": [
153+
"device: cpu\n",
154+
"<class 'sentence_transformers.SentenceTransformer.SentenceTransformer'>\n",
155+
"SentenceTransformer(\n",
156+
" (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel \n",
157+
" (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})\n",
158+
")\n",
159+
"model_name: BAAI/bge-base-en-v1.5\n",
160+
"EMBEDDING_LENGTH: 768\n",
161+
"MAX_SEQ_LENGTH: 512\n"
162+
]
163+
}
164+
],
141165
"source": [
142166
"# Import torch.\n",
143167
"import torch\n",
@@ -188,9 +212,19 @@
188212
},
189213
{
190214
"cell_type": "code",
191-
"execution_count": null,
192-
"metadata": {},
193-
"outputs": [],
215+
"execution_count": 5,
216+
"metadata": {},
217+
"outputs": [
218+
{
219+
"name": "stdout",
220+
"output_type": "stream",
221+
"text": [
222+
"Embedding length: 768\n",
223+
"Created collection: MIlvusDocs\n",
224+
"Schema: {'auto_id': True, 'description': 'The schema for docs pages', 'fields': [{'name': 'pk', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': True}, {'name': 'vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 768}}], 'enable_dynamic_field': True}\n"
225+
]
226+
}
227+
],
194228
"source": [
195229
"from pymilvus import (\n",
196230
" FieldSchema, DataType, \n",
@@ -246,9 +280,17 @@
246280
},
247281
{
248282
"cell_type": "code",
249-
"execution_count": null,
250-
"metadata": {},
251-
"outputs": [],
283+
"execution_count": 6,
284+
"metadata": {},
285+
"outputs": [
286+
{
287+
"name": "stdout",
288+
"output_type": "stream",
289+
"text": [
290+
"{'loading_progress': '100%'}\n"
291+
]
292+
}
293+
],
252294
"source": [
253295
"# 5. Drop the index, in case it already exists.\n",
254296
"mc.drop_index()\n",
@@ -266,19 +308,26 @@
266308
" field_name=\"vector\", \n",
267309
" index_params=index_params)\n",
268310
"\n",
269-
"# collection.load()\n",
270-
"\n",
271311
"# Get loading progress\n",
312+
"mc.load()\n",
272313
"progress = utility.loading_progress(COLLECTION_NAME)\n",
273314
"print(progress)"
274315
]
275316
},
276317
{
277318
"cell_type": "code",
278-
"execution_count": null,
319+
"execution_count": 7,
279320
"id": "6861beb7",
280321
"metadata": {},
281-
"outputs": [],
322+
"outputs": [
323+
{
324+
"name": "stdout",
325+
"output_type": "stream",
326+
"text": [
327+
"loaded 15 documents\n"
328+
]
329+
}
330+
],
282331
"source": [
283332
"## Read docs into LangChain\n",
284333
"#!pip install langchain \n",
@@ -309,9 +358,23 @@
309358
},
310359
{
311360
"cell_type": "code",
312-
"execution_count": null,
313-
"metadata": {},
314-
"outputs": [],
361+
"execution_count": 8,
362+
"metadata": {},
363+
"outputs": [
364+
{
365+
"name": "stdout",
366+
"output_type": "stream",
367+
"text": [
368+
"chunking time: 0.01805710792541504\n",
369+
"docs: 15, split into: 15\n",
370+
"split into chunks: 159, type: list of <class 'langchain.schema.document.Document'>\n",
371+
"\n",
372+
"Looking at a sample chunk...\n",
373+
"{'h1': 'Installation', 'h2': 'Installing via pip', 'source': 'rtdocs/pymilvus.readthedocs.io/en/latest/install.html'}\n",
374+
"demonstrate how to install and using PyMilvus in a virtual environment. See virtualenv for more info\n"
375+
]
376+
}
377+
],
315378
"source": [
316379
"from langchain.text_splitter import HTMLHeaderTextSplitter, RecursiveCharacterTextSplitter\n",
317380
"\n",
@@ -384,10 +447,19 @@
384447
},
385448
{
386449
"cell_type": "code",
387-
"execution_count": null,
450+
"execution_count": 9,
388451
"id": "512130a3",
389452
"metadata": {},
390-
"outputs": [],
453+
"outputs": [
454+
{
455+
"name": "stdout",
456+
"output_type": "stream",
457+
"text": [
458+
"{'h1': 'Installation', 'h2': 'Installing via pip', 'source': 'https://pymilvus.readthedocs.io/en/latest/install.html'}\n",
459+
"Installation¶ Installing via pip¶ PyMilvus is in the Python Package Index. PyMilvus only support pyt\n"
460+
]
461+
}
462+
],
391463
"source": [
392464
"# Clean up the metadata urls\n",
393465
"for doc in chunks:\n",
@@ -413,7 +485,7 @@
413485
},
414486
{
415487
"cell_type": "code",
416-
"execution_count": null,
488+
"execution_count": 10,
417489
"metadata": {},
418490
"outputs": [],
419491
"source": [
@@ -445,10 +517,21 @@
445517
},
446518
{
447519
"cell_type": "code",
448-
"execution_count": null,
520+
"execution_count": 11,
449521
"id": "b51ff139",
450522
"metadata": {},
451-
"outputs": [],
523+
"outputs": [
524+
{
525+
"name": "stdout",
526+
"output_type": "stream",
527+
"text": [
528+
"Start inserting entities\n",
529+
"Milvus insert time for 159 vectors: 1.0154786109924316 seconds\n",
530+
"(insert count: 159, delete count: 0, upsert count: 0, timestamp: 445785288603074562, success count: 159, err count: 0)\n",
531+
"[{\"name\":\"_default\",\"collection_name\":\"MIlvusDocs\",\"description\":\"\"}]\n"
532+
]
533+
}
534+
],
452535
"source": [
453536
"# Insert a batch of data into the Milvus collection.\n",
454537
"\n",
@@ -503,10 +586,18 @@
503586
},
504587
{
505588
"cell_type": "code",
506-
"execution_count": null,
589+
"execution_count": 12,
507590
"id": "5e7f41f4",
508591
"metadata": {},
509-
"outputs": [],
592+
"outputs": [
593+
{
594+
"name": "stdout",
595+
"output_type": "stream",
596+
"text": [
597+
"query length: 54\n"
598+
]
599+
}
600+
],
510601
"source": [
511602
"# Define a sample question about your data.\n",
512603
"question = \"what is the default distance metric used in AUTOINDEX?\"\n",
@@ -534,10 +625,20 @@
534625
},
535626
{
536627
"cell_type": "code",
537-
"execution_count": null,
628+
"execution_count": 13,
538629
"id": "89642119",
539630
"metadata": {},
540-
"outputs": [],
631+
"outputs": [
632+
{
633+
"name": "stdout",
634+
"output_type": "stream",
635+
"text": [
636+
"Loaded milvus collection into memory.\n",
637+
"Milvus search time: 0.06506514549255371 sec\n",
638+
"type: <class 'pymilvus.client.abstract.SearchResult'>, count: 5\n"
639+
]
640+
}
641+
],
541642
"source": [
542643
"# RETRIEVAL USING MILVUS.\n",
543644
"\n",
@@ -587,9 +688,17 @@
587688
},
588689
{
589690
"cell_type": "code",
590-
"execution_count": null,
591-
"metadata": {},
592-
"outputs": [],
691+
"execution_count": 14,
692+
"metadata": {},
693+
"outputs": [
694+
{
695+
"name": "stdout",
696+
"output_type": "stream",
697+
"text": [
698+
"2267\n"
699+
]
700+
}
701+
],
593702
"source": [
594703
"# # TODO - remove this before saving in github.\n",
595704
"# for n, hits in enumerate(results):\n",
@@ -617,10 +726,19 @@
617726
},
618727
{
619728
"cell_type": "code",
620-
"execution_count": null,
729+
"execution_count": 15,
621730
"id": "3e7fa0b6",
622731
"metadata": {},
623-
"outputs": [],
732+
"outputs": [
733+
{
734+
"name": "stdout",
735+
"output_type": "stream",
736+
"text": [
737+
"Question: what is the default distance metric used in AUTOINDEX?\n",
738+
"Answer: lazy dog\n"
739+
]
740+
}
741+
],
624742
"source": [
625743
"# BASELINING THE LLM: ASK A QUESTION WITHOUT ANY RETRIEVED CONTEXT.\n",
626744
"\n",
@@ -649,10 +767,19 @@
649767
},
650768
{
651769
"cell_type": "code",
652-
"execution_count": null,
770+
"execution_count": 16,
653771
"id": "a68e87b1",
654772
"metadata": {},
655-
"outputs": [],
773+
"outputs": [
774+
{
775+
"name": "stdout",
776+
"output_type": "stream",
777+
"text": [
778+
"Question: what is the default distance metric used in AUTOINDEX?\n",
779+
"Answer: MetricType.L2\n"
780+
]
781+
}
782+
],
656783
"source": [
657784
"# NOW ASK THE SAME LLM THE SAME QUESTION USING THE RETRIEVED CONTEXT.\n",
658785
"QA_input = {\n",
@@ -673,7 +800,7 @@
673800
},
674801
{
675802
"cell_type": "code",
676-
"execution_count": null,
803+
"execution_count": 17,
677804
"id": "d0e81e68",
678805
"metadata": {},
679806
"outputs": [],
@@ -684,10 +811,31 @@
684811
},
685812
{
686813
"cell_type": "code",
687-
"execution_count": null,
814+
"execution_count": 18,
688815
"id": "c777937e",
689816
"metadata": {},
690-
"outputs": [],
817+
"outputs": [
818+
{
819+
"name": "stdout",
820+
"output_type": "stream",
821+
"text": [
822+
"Author: Christy Bergman\n",
823+
"\n",
824+
"Python implementation: CPython\n",
825+
"Python version : 3.10.12\n",
826+
"IPython version : 8.15.0\n",
827+
"\n",
828+
"torch : 2.0.1\n",
829+
"transformers: 4.34.1\n",
830+
"milvus : 2.3.3\n",
831+
"pymilvus : 2.3.3\n",
832+
"langchain : 0.0.322\n",
833+
"\n",
834+
"conda environment: py310\n",
835+
"\n"
836+
]
837+
}
838+
],
691839
"source": [
692840
"# Props to Sebastian Raschka for this handy watermark.\n",
693841
"# !pip install watermark\n",

0 commit comments

Comments
 (0)