Skip to content

Commit

Permalink
Update aryn-elasticsearch-blog-dataprep.ipynb
Browse files Browse the repository at this point in the history
Update from feedback
  • Loading branch information
jonfritz authored Jan 13, 2025
1 parent 55bb17b commit 5ba1d88
Showing 1 changed file with 4 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"outputs": [],
"source": [
"!pip install sycamore-ai[elasticsearch]\n",
"# Install the Sycamore document ETL library: https://github.com/aryn-ai/sycamore "
"# Install the Sycamore document ETL library: https://github.com/aryn-ai/sycamore"
]
},
{
Expand Down Expand Up @@ -38,10 +38,10 @@
"context = sycamore.init()\n",
"# Add exec_mode=ExecMode.LOCAL to .init to run without Ray\n",
"docset = context.read.binary(paths=paths, binary_format=\"pdf\")\n",
"docset = docset.materialize(path=\"./opensearch-tutorial/downloaded-docset\", source_mode=sycamore.MATERIALIZE_USE_STORED)\n",
"docset = docset.materialize(path=\"./elasticsearch-tutorial/downloaded-docset\", source_mode=sycamore.MATERIALIZE_USE_STORED)\n",
"# Make sure your Aryn token is accessible in the environment variable ARYN_API_KEY\n",
"partitioned_docset = (docset.partition(partitioner=ArynPartitioner(extract_table_structure=True, extract_images=True))\n",
" .materialize(path=\"./opensearch-tutorial/partitioned-docset\", source_mode=sycamore.MATERIALIZE_USE_STORED)\n",
" .materialize(path=\"./elasticsearch-tutorial/partitioned-docset\", source_mode=sycamore.MATERIALIZE_USE_STORED)\n",
" )\n",
"partitioned_docset.execute()"
]
Expand Down Expand Up @@ -98,7 +98,7 @@
"\n",
"embedded_docset = chunked_docset.spread_properties([\"entity\", \"path\"]).explode().embed(embedder=SentenceTransformerEmbedder(batch_size=10_000, model_name=model_name))\n",
"\n",
"embedded_docset = embedded_docset.materialize(path=\"./opensearch-tutorial/embedded-docset\", source_mode=sycamore.MATERIALIZE_USE_STORED)\n",
"embedded_docset = embedded_docset.materialize(path=\"./elasticsearch-tutorial/embedded-docset\", source_mode=sycamore.MATERIALIZE_USE_STORED)\n",
"embedded_docset.execute()"
]
},
Expand Down

0 comments on commit 5ba1d88

Please sign in to comment.