Skip to content

Commit 6e62ab9

Browse files
committed
Remove mentions of tantivy
1 parent 7158f52 commit 6e62ab9

4 files changed

Lines changed: 8 additions & 12 deletions

File tree

docs/snippets/integrations.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ export const PyFrameworksAgnoCliChat = "agent.print_response(\n \"Summarize t
5050

5151
export const PyFrameworksAgnoIngestYoutube = "youtube_url = \"https://www.youtube.com/watch?v=wl6mFyXoxos\"\nvideo_id = extract_video_id(youtube_url)\nytt = YouTubeTranscriptApi()\ntranscript_segments = ytt.fetch(video_id, languages=[\"en\", \"en-US\"]).to_raw_data()\ntranscript_text = \" \".join(segment[\"text\"] for segment in transcript_segments)\n\nknowledge.insert(\n name=f\"YouTube Transcript ({video_id})\",\n text_content=transcript_text,\n metadata={\"source\": \"youtube\", \"video_id\": video_id, \"video_url\": youtube_url},\n)\n";
5252

53-
export const PyFrameworksAgnoSetup = "import os\nimport re\n\nfrom agno.agent import Agent\nfrom agno.knowledge.embedder.openai import OpenAIEmbedder\nfrom agno.knowledge.knowledge import Knowledge\nfrom agno.models.openai import OpenAIResponses\nfrom agno.vectordb.lancedb import LanceDb, SearchType\nfrom youtube_transcript_api import YouTubeTranscriptApi\n\nif \"OPENAI_API_KEY\" not in os.environ:\n os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n\ndef extract_video_id(youtube_url: str) -> str:\n match = re.search(r\"(?<=v=)[\\w-]+\", youtube_url) or re.search(\n r\"(?<=be/)[\\w-]+\", youtube_url\n )\n if not match:\n raise ValueError(\"Could not parse YouTube video ID from URL\")\n return match.group(0)\n\nknowledge = Knowledge(\n vector_db=LanceDb(\n uri=\"./tmp/lancedb\",\n table_name=\"youtube_transcripts\",\n search_type=SearchType.hybrid,\n use_tantivy=False,\n embedder=OpenAIEmbedder(id=\"text-embedding-3-small\"),\n ),\n)\n";
53+
export const PyFrameworksAgnoSetup = "import os\nimport re\n\nfrom agno.agent import Agent\nfrom agno.knowledge.embedder.openai import OpenAIEmbedder\nfrom agno.knowledge.knowledge import Knowledge\nfrom agno.models.openai import OpenAIResponses\nfrom agno.vectordb.lancedb import LanceDb, SearchType\nfrom youtube_transcript_api import YouTubeTranscriptApi\n\nif \"OPENAI_API_KEY\" not in os.environ:\n os.environ[\"OPENAI_API_KEY\"] = \"sk-...\"\n\ndef extract_video_id(youtube_url: str) -> str:\n match = re.search(r\"(?<=v=)[\\w-]+\", youtube_url) or re.search(\n r\"(?<=be/)[\\w-]+\", youtube_url\n )\n if not match:\n raise ValueError(\"Could not parse YouTube video ID from URL\")\n return match.group(0)\n\nknowledge = Knowledge(\n vector_db=LanceDb(\n uri=\"./tmp/lancedb\",\n table_name=\"youtube_transcripts\",\n search_type=SearchType.hybrid,\n embedder=OpenAIEmbedder(id=\"text-embedding-3-small\"),\n ),\n)\n";
5454

5555
export const PyFrameworksLangchainAddImages = "image_uris = [\"./assets/image-1.png\", \"./assets/image-2.png\"]\nvector_store.add_images(uris=image_uris)\n# here image_uris are local fs paths to the images.\n";
5656

docs/snippets/search.mdx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{/* Auto-generated by scripts/mdx_snippets_gen.py. Do not edit manually. */}
22

3-
export const PyBasicFts = "uri = \"data/sample-lancedb\"\ndb = lancedb.connect(uri)\n\ntable = db.create_table(\n \"my_table_fts\",\n data=[\n {\"vector\": [3.1, 4.1], \"text\": \"Frodo was a happy puppy\"},\n {\"vector\": [5.9, 26.5], \"text\": \"There are several kittens playing\"},\n ],\n mode=\"overwrite\",\n)\n\n# passing `use_tantivy=False` to use lance FTS index\n# `use_tantivy=True` by default\ntable.create_fts_index(\"text\", use_tantivy=False)\ntable.search(\"puppy\").limit(10).select([\"text\"]).to_list()\n# [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}]\n# ...\n";
3+
export const PyBasicFts = "uri = \"data/sample-lancedb\"\ndb = lancedb.connect(uri)\n\ntable = db.create_table(\n \"my_table_fts\",\n data=[\n {\"vector\": [3.1, 4.1], \"text\": \"Frodo was a happy puppy\"},\n {\"vector\": [5.9, 26.5], \"text\": \"There are several kittens playing\"},\n ],\n mode=\"overwrite\",\n)\n\ntable.create_fts_index(\"text\")\ntable.search(\"puppy\").limit(10).select([\"text\"]).to_list()\n# [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}]\n# ...\n";
44

55
export const PyBasicFtsAsync = "uri = \"data/sample-lancedb\"\nasync_db = await lancedb.connect_async(uri)\n\nasync_tbl = await async_db.create_table(\n \"my_table_fts_async\",\n data=[\n {\"vector\": [3.1, 4.1], \"text\": \"Frodo was a happy puppy\"},\n {\"vector\": [5.9, 26.5], \"text\": \"There are several kittens playing\"},\n ],\n mode=\"overwrite\",\n)\n\n# async API uses our native FTS algorithm\nawait async_tbl.create_index(\"text\", config=FTS())\nawait (await async_tbl.search(\"puppy\")).select([\"text\"]).limit(10).to_list()\n# [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}]\n# ...\n";
66

@@ -24,7 +24,7 @@ export const PyExhaustiveSearchAsyncCosine = "(\n await (await async_tbl.sear
2424

2525
export const PyExhaustiveSearchCosine = "tbl.search(np.random.random((1536))).distance_type(\"cosine\").limit(10).to_list()\n";
2626

27-
export const PyFtsConfigFolding = "table.create_fts_index(\n \"text\",\n use_tantivy=False,\n language=\"French\",\n stem=True,\n ascii_folding=True,\n replace=True,\n)\n";
27+
export const PyFtsConfigFolding = "table.create_fts_index(\n \"text\",\n language=\"French\",\n stem=True,\n ascii_folding=True,\n replace=True,\n)\n";
2828

2929
export const PyFtsConfigFoldingAsync = "await async_tbl.create_index(\n \"text\", config=FTS(language=\"French\", stem=True, ascii_folding=True)\n)\n";
3030

@@ -44,7 +44,7 @@ export const PyFtsPrefiltering = "table.search(\"puppy\").limit(10).where(\"text
4444

4545
export const PyFtsPrefilteringAsync = "await (await async_tbl.search(\"puppy\")).limit(10).where(\"text='foo'\").to_list()\n";
4646

47-
export const PyFtsWithPosition = "table.create_fts_index(\"text\", use_tantivy=False, with_position=True, replace=True)\n";
47+
export const PyFtsWithPosition = "table.create_fts_index(\"text\", with_position=True, replace=True)\n";
4848

4949
export const PyFtsWithPositionAsync = "await async_tbl.create_index(\"text\", config=FTS(with_position=True))\n";
5050

tests/py/test_integrations.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1431,7 +1431,6 @@ def extract_video_id(youtube_url: str) -> str:
14311431
uri="./tmp/lancedb",
14321432
table_name="youtube_transcripts",
14331433
search_type=SearchType.hybrid,
1434-
use_tantivy=False,
14351434
embedder=OpenAIEmbedder(id="text-embedding-3-small"),
14361435
),
14371436
)

tests/py/test_search.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def test_fts_fuzzy_query():
181181
),
182182
mode="overwrite",
183183
)
184-
table.create_fts_index("text", use_tantivy=False, replace=True)
184+
table.create_fts_index("text", replace=True)
185185

186186
results = table.search(MatchQuery("foo", "text", fuzziness=1)).to_pandas()
187187
assert len(results) == 4
@@ -219,7 +219,7 @@ def test_fts_boost_query():
219219
),
220220
mode="overwrite",
221221
)
222-
table.create_fts_index("desc", use_tantivy=False, replace=True)
222+
table.create_fts_index("desc", replace=True)
223223

224224
results = table.search(
225225
BoostQuery(
@@ -252,9 +252,7 @@ def test_fts_native():
252252
mode="overwrite",
253253
)
254254

255-
# passing `use_tantivy=False` to use lance FTS index
256-
# `use_tantivy=True` by default
257-
table.create_fts_index("text", use_tantivy=False)
255+
table.create_fts_index("text")
258256
table.search("puppy").limit(10).select(["text"]).to_list()
259257
# [{'text': 'Frodo was a happy puppy', '_score': 0.6931471824645996}]
260258
# ...
@@ -265,7 +263,6 @@ def test_fts_native():
265263
# --8<-- [start:fts_config_folding]
266264
table.create_fts_index(
267265
"text",
268-
use_tantivy=False,
269266
language="French",
270267
stem=True,
271268
ascii_folding=True,
@@ -279,7 +276,7 @@ def test_fts_native():
279276
table.search("puppy").limit(10).where("text='foo'", prefilter=False).to_list()
280277
# --8<-- [end:fts_postfiltering]
281278
# --8<-- [start:fts_with_position]
282-
table.create_fts_index("text", use_tantivy=False, with_position=True, replace=True)
279+
table.create_fts_index("text", with_position=True, replace=True)
283280
# --8<-- [end:fts_with_position]
284281
# --8<-- [start:fts_incremental_index]
285282
table.add([{"vector": [3.1, 4.1], "text": "Frodo was a happy puppy"}])

0 commit comments

Comments
 (0)