deepsense-ai
diff --git a/‎.gitignore
Lines changed: 2 additions & 0 deletions b/‎.gitignore
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/api_reference/document_search/ingest/enrichers.md
Lines changed: 7 additions & 0 deletions b/‎docs/api_reference/document_search/ingest/enrichers.md
Lines changed: 7 additions & 0 deletions
diff --git a/‎docs/api_reference/document_search/ingest/parsers.md
Lines changed: 11 additions & 0 deletions b/‎docs/api_reference/document_search/ingest/parsers.md
Lines changed: 11 additions & 0 deletions
diff --git a/‎docs/api_reference/document_search/processing.md
Lines changed: 0 additions & 24 deletions b/‎docs/api_reference/document_search/processing.md
Lines changed: 0 additions & 24 deletions
diff --git a/‎docs/how-to/document_search/ingest/strategies/async_processing.md
Lines changed: 1 addition & 1 deletion b/‎docs/how-to/document_search/ingest/strategies/async_processing.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md
Lines changed: 9 additions & 9 deletions b/‎docs/how-to/document_search/ingest/strategies/create_custom_execution_strategy.md
Lines changed: 9 additions & 9 deletions
diff --git a/‎docs/how-to/document_search/search_documents.md
Lines changed: 7 additions & 7 deletions b/‎docs/how-to/document_search/search_documents.md
Lines changed: 7 additions & 7 deletions
diff --git a/‎docs/how-to/project/component_preferences.md
Lines changed: 1 addition & 1 deletion b/‎docs/how-to/project/component_preferences.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/document-search/configurable.py
Lines changed: 1 addition & 1 deletion b/‎examples/document-search/configurable.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/document-search/multimodal.py
Lines changed: 3 additions & 3 deletions b/‎examples/document-search/multimodal.py
Lines changed: 3 additions & 3 deletions
@@ -98,3 +98,5 @@ chroma/
 qdrant/
 
 .aider*
+
+.DS_Store
@@ -0,0 +1,7 @@
+# Element Enrichers
+
+::: ragbits.document_search.ingestion.enrichers.router.ElementEnricherRouter
+
+::: ragbits.document_search.ingestion.enrichers.base.ElementEnricher
+
+::: ragbits.document_search.ingestion.enrichers.image.ImageElementEnricher
@@ -0,0 +1,11 @@
+# Document Parsers
+
+::: ragbits.document_search.ingestion.parsers.router.DocumentParserRouter
+
+::: ragbits.document_search.ingestion.parsers.base.DocumentParser
+
+::: ragbits.document_search.ingestion.parsers.base.TextDocumentParser
+
+::: ragbits.document_search.ingestion.parsers.base.ImageDocumentParser
+
+::: ragbits.document_search.ingestion.parsers.unstructured.UnstructuredDocumentParser
@@ -3,7 +3,7 @@
 In Ragbits, a component called "processing execution strategy" controls how document processing is executed during ingestion. There are multiple execution strategies available in Ragbits that can be easily interchanged. You can also [create new custom execution strategies](create_custom_execution_strategy.md) to meet your specific needs.
 
 !!! note
-    It's important to note that processing execution strategies are a separate concept from processors. While the former manage how the processing is executed, the latter deals with the actual processing of documents. Processors are managed by [DocumentProcessorRouter][ragbits.document_search.ingestion.document_processor.DocumentProcessorRouter].
+    It's important to note that processing execution strategies are a separate concept from processors. While the former manage how the processing is executed, the latter deals with the actual processing of documents. Processors are managed by [DocumentParserRouter][ragbits.document_search.ingestion.parsers.router.DocumentParserRouter].
 
 ## The Synchronous Execution Strategy
 
 
@@ -19,16 +19,16 @@ from collections.abc import Sequence
 
 from ragbits.document_search.documents.document import Document, DocumentMeta, Source
 from ragbits.document_search.documents.element import Element
-from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter
+from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter
 from ragbits.document_search.ingestion.strategies import IngestStrategy
-from ragbits.document_search.ingestion.providers.base import BaseProvider
+from ragbits.document_search.ingestion.parsers.base import DocumentParser
 
 class DelayedExecutionStrategy(IngestStrategy):
     async def process_documents(
         self,
         documents: Sequence[DocumentMeta | Document | Source],
-        processor_router: DocumentProcessorRouter,
-        processor_overwrite: BaseProvider | None = None,
+        processor_router: DocumentParserRouter,
+        processor_overwrite: DocumentParser | None = None,
     ) -> list[Element]:
         elements = []
         for document in documents:
@@ -48,24 +48,24 @@ from collections.abc import Sequence
 
 from ragbits.document_search.documents.document import Document, DocumentMeta, Source
 from ragbits.document_search.documents.element import Element
-from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter
+from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter
 from ragbits.document_search.ingestion.strategies import IngestStrategy
-from ragbits.document_search.ingestion.providers.base import BaseProvider
+from ragbits.document_search.ingestion.parsers.base import DocumentParser
 
 class DelayedExecutionStrategy(IngestStrategy):
     async def process_documents(
         self,
         documents: Sequence[DocumentMeta | Document | Source],
-        processor_router: DocumentProcessorRouter,
-        processor_overwrite: BaseProvider | None = None,
+        processor_router: DocumentParserRouter,
+        processor_overwrite: DocumentParser | None = None,
     ) -> list[Element]:
         elements = []
         for document in documents:
             # Convert the document to DocumentMeta
             document_meta = await self.to_document_meta(document)
 
             # Get the processor for the document
-            processor = processor_overwrite or processor_router.get_provider(document)
+            processor = processor_overwrite or processor_router.get(document)
 
             await asyncio.sleep(1)
 
 
@@ -76,26 +76,26 @@ library that supports parsing and chunking of most common document types (i.e. p
 from ragbits.core.embeddings.litellm import LiteLLMEmbedder
 from ragbits.core.vector_stores.in_memory import InMemoryVectorStore
 from ragbits.document_search import DocumentSearch
-from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter
+from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter
 from ragbits.document_search.documents.document import DocumentType
-from ragbits.document_search.ingestion.providers.unstructured.default import UnstructuredDefaultProvider
+from ragbits.document_search.ingestion.parsers.unstructured.default import UnstructuredDocumentParser
 
 embedder = LiteLLMEmbedder()
 vector_store = InMemoryVectorStore(embedder=embedder)
 document_search = DocumentSearch(
     vector_store=vector_store,
-    parser_router=DocumentProcessorRouter({DocumentType.TXT: UnstructuredDefaultProvider()})
+    parser_router=DocumentParserRouter({DocumentType.TXT: UnstructuredDocumentParser()})
 )
 ```
 
-If you want to implement a new provider you should extend the [`BaseProvider`][ragbits.document_search.ingestion.providers.base.BaseProvider] class:
+If you want to implement a new provider you should extend the [`DocumentParser`][ragbits.document_search.ingestion.parsers.base.DocumentParser] class:
 ```python
 from ragbits.document_search.documents.document import DocumentMeta, DocumentType
 from ragbits.document_search.documents.element import Element
-from ragbits.document_search.ingestion.providers.base import BaseProvider
+from ragbits.document_search.ingestion.parsers.base import DocumentParser
 
 
-class CustomProvider(BaseProvider):
+class CustomProvider(DocumentParser):
     SUPPORTED_DOCUMENT_TYPES = { DocumentType.TXT }  # provide supported document types
 
     async def process(self, document_meta: DocumentMeta) -> list[Element]:
@@ -112,7 +112,7 @@ There is an additional functionality of [`DocumentSearch`][ragbits.document_sear
 config = {
     "vector_store": {...},
     "reranker": {...},
-    "providers": {...},
+    "parser_router": {...},
     "rephraser": {...},
 }
 
 
@@ -167,6 +167,6 @@ This is the list of component types for which you can set a preferred configurat
 | `vector_store`       | `ragbits-core`            | [`VectorStore`][ragbits.core.vector_stores.base.VectorStore]|                                          |
 | `history_compressor` | `ragbits-conversations`   | [`ConversationHistoryCompressor`][ragbits.conversations.history.compressors.base.ConversationHistoryCompressor]|          |
 | `document_search`    | `ragbits-document-search` | [`DocumentSearch`][ragbits.document_search.DocumentSearch]| Specifics: [Configuration](#ds-configuration)|
-| `provider`           | `ragbits-document-search` | [`BaseProvider`][ragbits.document_search.ingestion.providers.base.BaseProvider]|                                              |
+| `parser`           | `ragbits-document-search` | [`DocumentParser`][ragbits.document_search.ingestion.parsers.base.DocumentParser]|                                              |
 | `rephraser`          | `ragbits-document-search` | [`QueryRephraser`][ragbits.document_search.retrieval.rephrasers.QueryRephraser]|                                          |
 | `reranker`           | `ragbits-document-search` | [`Reranker`][ragbits.document_search.retrieval.rerankers.base.Reranker]|                                               |
@@ -90,7 +90,7 @@ class to rephrase the query.
             },
         },
     },
-    "providers": {"txt": {"type": "DummyProvider"}},
+    "parser_router": {"txt": {"type": "TextDocumentParser"}},
     "rephraser": {
         "type": "LLMQueryRephraser",
         "config": {
 
@@ -40,8 +40,8 @@
 from ragbits.document_search import DocumentSearch
 from ragbits.document_search.documents.document import DocumentMeta, DocumentType
 from ragbits.document_search.documents.sources import LocalFileSource
-from ragbits.document_search.ingestion.document_processor import DocumentProcessorRouter
-from ragbits.document_search.ingestion.providers.dummy import DummyImageProvider
+from ragbits.document_search.ingestion.parsers.base import ImageDocumentParser
+from ragbits.document_search.ingestion.parsers.router import DocumentParserRouter
 
 IMAGES_PATH = Path(__file__).parent / "images"
 
@@ -68,7 +68,7 @@ async def main() -> None:
     vector_store_hybrid = HybridSearchVectorStore(vector_store_text, vector_store_image)
 
     # For this example, we want to skip OCR and make sure that we test direct image embeddings.
-    parser_router = DocumentProcessorRouter.from_config({DocumentType.JPG: DummyImageProvider()})
+    parser_router = DocumentParserRouter({DocumentType.JPG: ImageDocumentParser()})
 
     document_search = DocumentSearch(
         vector_store=vector_store_hybrid,
-Original file line number
+Diff line change
 qdrant/
 .aider*
++
 +.DS_Store