cleanups

SciPhi-AI · Oct 8, 2024 · 6dc487b · 6dc487b
1 parent 0a769bc
commit 6dc487b
Show file tree

Hide file tree

Showing 17 changed files with 47 additions and 75 deletions.
diff --git a/.github/workflows/integration-test-workflow-debian.yml b/.github/workflows/integration-test-workflow-debian.yml
@@ -138,6 +138,9 @@ jobs:
         poetry run python tests/integration/harness_sdk.py test_user_gets_collections_for_user
         poetry run python tests/integration/harness_sdk.py test_user_gets_collections_for_document
         poetry run python tests/integration/harness_sdk.py test_user_permissions
+        poetry run python tests/integration/harness_sdk.py test_collection_user_interactions
+        poetry run python tests/integration/harness_sdk.py test_collection_document_interactions
+        poetry run python tests/integration/harness_sdk.py test_error_handling
 
     - name: Stop R2R server
       if: always()

diff --git a/docs/cookbooks/agent.mdx b/docs/cookbooks/agent.mdx
@@ -48,7 +48,7 @@ def rag_agent_example():
     # Use the RAG assistant via the agent endpoint
     response = client.agent(
         messages=messages,
-        vector_search_settings={"search_limit": 5, "search_filters": {}},
+        vector_search_settings={"search_limit": 5, "filters": {}},
         kg_search_settings={"use_kg_search": False},
         rag_generation_config={"max_tokens": 300}
     )
@@ -91,7 +91,7 @@ def streaming_rag_agent_example():
 
     streaming_response = client.agent(
         messages=messages,
-        vector_search_settings={"search_limit": 5, "search_filters": {}},
+        vector_search_settings={"search_limit": 5, "filters": {}},
         kg_search_settings={"use_kg_search": False},
         rag_generation_config={"max_tokens": 300, "stream": True}
     )
@@ -144,7 +144,7 @@ custom_response = client.agent(
         {"role": "system", "content": "You are an expert on ancient Greek philosophy. Ask clarifying questions if needed."},
         {"role": "user", "content": "Compare Aristotle's and Plato's views on the nature of reality."},
     ],
-    vector_search_settings={"search_limit": 25, "use_hybrid_search": True, "search_filters": {"category": "philosophy"}},
+    vector_search_settings={"search_limit": 25, "use_hybrid_search": True, "filters": {"category": {"$eq": "philosophy"}}},
     kg_search_settings={"use_kg_search": False},
     rag_generation_config={
         "max_tokens": 500,

diff --git a/docs/cookbooks/walkthrough.mdx b/docs/cookbooks/walkthrough.mdx
@@ -800,7 +800,7 @@ curl -X POST http://localhost:7272/v2/rag \
     "query": "Who is Jon Snow?",
     "vector_search_settings": {
       "use_vector_search": true,
-      "search_filters": {},
+      "filters": {},
       "search_limit": 10,
       "use_hybrid_search": true
     }

diff --git a/docs/documentation/configuration/rag.mdx b/docs/documentation/configuration/rag.mdx
@@ -30,13 +30,11 @@ Refer to the retrieval configuration [page here](/documentation/configuration/re
 The `rag_generation_config` parameter allows you to customize the language model's behavior. Default settings are set on the server-side using the `r2r.toml`, as described in in previous configuraiton guides. These settings can be overridden at runtime as shown below:
 
 ```python
-# Configure knowledge graph search
+# Configure vector search
 vector_search_settings = {
     "use_vector_search": True,
-    "filters": {"document_type": "article"},
     "search_limit": 20,
     "use_hybrid_search": True,
-    "selected_collection_ids": ["c3291abf-8a4e-5d9d-80fd-232ef6fd8526"]
 }
 
 # Configure graphRAG search

diff --git a/docs/documentation/configuration/retrieval/vector-search.mdx b/docs/documentation/configuration/retrieval/vector-search.mdx
@@ -10,10 +10,9 @@ Example using the Python SDK:
 ```python
 vector_search_settings = {
     "use_vector_search": True,
-    "filters": {"document_type": "article"},
+    "search_filters": {"document_type": {"$eq": "article"}},
     "search_limit": 20,
     "use_hybrid_search": True,
-    "selected_collection_ids": ["c3291abf-8a4e-5d9d-80fd-232ef6fd8526"]
 }
 
 response = client.search("query", vector_search_settings=vector_search_settings)
@@ -27,7 +26,6 @@ response = client.search("query", vector_search_settings=vector_search_settings)
 2. `use_hybrid_search` (bool): Whether to perform a hybrid search (combining vector and keyword search)
 3. `filters` (dict): Filters to apply to the vector search
 4. `search_limit` (int): Maximum number of results to return (1-1000)
-5. `selected_collection_ids` (list[UUID]): Group IDs to search for
 6. `index_measure` (IndexMeasure): The distance measure to use for indexing (cosine_distance, l2_distance, or max_inner_product)
 7. `include_values` (bool): Whether to include search score values in the search results
 8. `include_metadatas` (bool): Whether to include element metadata in the search results

diff --git a/docs/documentation/deployment/sciphi.mdx b/docs/documentation/deployment/sciphi.mdx
@@ -6,30 +6,22 @@ icon: 'building'
 
 # SciPhi Enterprise: Fully Managed R2R for Your Organization
 
-SciPhi offers a fully managed, enterprise-grade solution for deploying and scaling R2R (RAG to Riches) within your organization. Our SciPhi Enterprise offering provides all the benefits of R2R, including multimodal ingestion, hybrid search, GraphRAG, user management, and observability, in a hassle-free, scalable environment tailored to your business needs.
+SciPhi offers a fully managed, enterprise-grade solution for deploying and scaling R2R (RAG to Riches) within your organization. The SciPhi Enterprise offering provides all the benefits of R2R, including multimodal ingestion, hybrid search, GraphRAG, user management, and observability, in a hassle-free, scalable environment.
 
-## Why SciPhi Enterprise?
+## Why Use SciPhi Enterprise?
 
-- **Fully Managed**: We handle the infrastructure, deployment, scaling, updates, and maintenance of R2R, so your team can focus on building RAG applications.
+- **Fully Managed**: SciPhi handles the infrastructure, deployment, scaling, updates, and maintenance of R2R, so your team can focus on building RAG applications.
 - **Scalable**: Seamlessly scale your R2R deployment to handle growing user bases, document collections, and query volumes.
 - **Secure**: Benefit from enterprise-level security, compliance, and data privacy measures.
 - **Customizable**: Tailor your R2R deployment to your organization's specific requirements and integrate with your existing systems and workflows.
-- **Expert Support**: Get direct access to the R2R team for guidance, troubleshooting, and best practices, ensuring your success with the platform.
-
-## Key Features
-
-- All the powerful features of R2R, including multimodal ingestion, hybrid search, GraphRAG, user management, and observability
-- Fully managed deployment and scaling across your choice of cloud provider or on-premises environment
-- Enterprise-grade security and compliance measures to protect your data and meet regulatory requirements
-- Customizable configuration and integration options to fit your organization's unique needs
-- Dedicated support and success management from the R2R team to ensure you get the most value from the platform
+- **Expert Support**: Get direct access to the SciPhi team for guidance, troubleshooting, and best practices.
 
 ## Getting Started
 
 Getting started with SciPhi Enterprise is easy:
 
-1. **Contact Us**: Reach out to our team at [[email protected]](mailto:[email protected]) to discuss your organization's RAG application needs and learn more about SciPhi Enterprise.
-2. **Discovery**: Our experts will work with you to understand your requirements, existing systems, and goals for R2R within your organization.
-3. **Deployment**: We'll handle the deployment and configuration of R2R in your environment, whether cloud-based or on-premises, and integrate with your existing systems and workflows.
-4. **Onboarding**: Our team will provide training and support to help your developers and users get up and running with R2R quickly and effectively.
-5. **Ongoing Support**: With SciPhi Enterprise, you'll have ongoing access to our team for support, updates, and guidance as you scale and evolve your RAG applications.
+1. **Contact SciPhi**: Reach out to their team at [[email protected]](mailto:[email protected]) to discuss your organization's RAG application needs and learn more about SciPhi Enterprise.
+2. **Discovery**: SciPhi's experts will work with you to understand your requirements, existing systems, and goals for R2R within your organization.
+3. **Deployment**: SciPhi will handle the deployment and configuration of R2R in your environment, whether cloud-based or on-premises, and integrate with your existing systems and workflows.
+4. **Onboarding**: SciPhi will provide training and support to help your developers and users get up and running with R2R quickly and effectively.
+5. **Ongoing Support**: SciPhi Enterprise provides ongoing support, updates, and guidance as you scale and evolve your RAG applications.
diff --git a/docs/documentation/js-sdk/retrieval.mdx b/docs/documentation/js-sdk/retrieval.mdx
@@ -72,9 +72,6 @@ const searchResponse = await client.search("What was Uber's profit in 2020?");
     Maximum number of results to return (1-1000).
     </ParamField>
 
-    <ParamField path="selected_collection_ids" type="Array<string | null>">
-    Group IDs to search for.
-    </ParamField>
     <ParamField path="index_measure" type="string" default="cosine_distance">
     The distance measure to use for indexing (cosine_distance, l2_distance, or max_inner_product).
     </ParamField>
@@ -102,11 +99,7 @@ const searchResponse = await client.search("What was Uber's profit in 2020?");
   <Expandable title="properties">
 
     <ParamField path="filters" type="dict[str, Any]">
-      Filters to apply to the vector search. This field is deprecated in favor of `search_filters`.
-    </ParamField>
-
-    <ParamField path="selected_collection_ids" type="list[UUID]">
-      Collection IDs to search for.
+      Filters to apply to the vector search. This field is deprecated in favor of `filters`.
     </ParamField>
 
     <ParamField path="graphrag_map_system_prompt" type="str" default="graphrag_map_system_prompt">
@@ -159,7 +152,7 @@ Search with custom settings, such as bespoke document filters and larger search
 # returns only chunks from documents with title `document_title`
 const filtered_search_response = client.search(
     "What was Uber's profit in 2020?",
-    { search_filters: {
+    { filters: {
         $eq: "uber_2021.pdf"
     },
     search_limit: 100
@@ -175,7 +168,7 @@ Combine traditional keyword-based search with vector search:
 ```javascript
 const hybrid_search_response = client.search(
     "What was Uber's profit in 2020?",
-    { search_filters: {
+    { filters: {
         $eq: "uber_2021.pdf"
     },
     search_limit: 100
@@ -284,10 +277,6 @@ const ragResponse = await client.rag("What was Uber's profit in 2020?");
     Maximum number of results to return (1-1000).
     </ParamField>
 
-    <ParamField path="selected_collection_ids" type="list[UUID]" default="[]">
-    Group IDs to search for.
-    </ParamField>
-
     <ParamField path="index_measure" type="IndexMeasure" default="cosine_distance">
     The distance measure to use for indexing (cosine_distance, l2_distance, or max_inner_product).
     </ParamField>
@@ -338,12 +327,9 @@ const ragResponse = await client.rag("What was Uber's profit in 2020?");
   The `KGSearchSettings` class allows you to configure the knowledge graph search settings for your R2R system. Here are the available options:
 
   <ParamField path="filters" type="dict[str, Any]">
-    Filters to apply to the vector search. This field is deprecated in favor of `search_filters`.
+    Filters to apply to the vector search. This field is deprecated in favor of `filters`.
   </ParamField>
 
-  <ParamField path="selected_collection_ids" type="list[UUID]">
-    Collection IDs to search for.
-  </ParamField>
 
   <ParamField path="graphrag_map_system_prompt" type="str" default="graphrag_map_system_prompt">
     The system prompt for the GraphRAG map prompt.
@@ -635,7 +621,7 @@ Note that any of the customization seen in AI powered search and RAG documentati
   Whether to use vector search.
 </ParamField>
 
-<ParamField path="search_filters" type="object">
+<ParamField path="filters" type="object">
   Optional filters for the search.
 </ParamField>
 

diff --git a/docs/documentation/python-sdk/retrieval.mdx b/docs/documentation/python-sdk/retrieval.mdx
@@ -71,10 +71,6 @@ search_response = client.search("What was Uber's profit in 2020?")
     Maximum number of results to return (1-1000).
     </ParamField>
 
-    <ParamField path="selected_collection_ids" type="list[UUID]" default="[]">
-    Group IDs to search for.
-    </ParamField>
-
     <ParamField path="index_measure" type="IndexMeasure" default="cosine_distance">
     The distance measure to use for indexing (cosine_distance, l2_distance, or max_inner_product).
     </ParamField>
@@ -170,7 +166,7 @@ filtered_search_response = client.search(
     "What was Uber's profit in 2020?",
     vector_search_settings={
         # restrict results to the Uber document
-        "search_filters": {"title": {"$eq": "uber_2021.pdf"}},
+        "filters": {"title": {"$eq": "uber_2021.pdf"}},
         "search_limit": 100
     }
 )
@@ -205,7 +201,7 @@ Learn more about the dedicated knowledge graph capabilities [in R2R here](/cookb
 ```python
 kg_search_response = client.search(
     "What is airbnb",
-    vector_search_settings={"use_vector_search": False}
+    vector_search_settings={"use_vector_search": False},
     kg_search_settings={
       "use_kg_search": True,
       "kg_search_type": "local",

diff --git a/js/sdk/README.md b/js/sdk/README.md
@@ -92,7 +92,7 @@ console.log(searchResult);
 const ragResult = await client.rag({
   query: "Who was Raskolnikov?",
   use_vector_search: true,
-  search_filters: {},
+  filters: {},
   search_limit: 10,
   use_hybrid_search: false,
   use_kg_search: false,

diff --git a/js/sdk/src/models.tsx b/js/sdk/src/models.tsx
@@ -42,7 +42,6 @@ export interface VectorSearchSettings {
   filters?: Record<string, any>;
   search_limit?: number;
   offset?: number;
-  selected_collection_ids?: string[];
   index_measure: IndexMeasure;
   include_values?: boolean;
   include_metadatas?: boolean;
@@ -55,7 +54,6 @@ export interface VectorSearchSettings {
 export interface KGSearchSettings {
   use_kg_search?: boolean;
   filters?: Record<string, any>;
-  selected_collection_ids?: string[];
   graphrag_map_system_prompt?: string;
   kg_search_type?: "global" | "local";
   kg_search_level?: number | null;

diff --git a/py/core/main/api/management_router.py b/py/core/main/api/management_router.py
@@ -371,7 +371,7 @@ async def document_chunks_app(
                 auth_user.id
             )
             document_collections = await self.service.document_collections(
-                document_uuid, 0, 1
+                document_uuid, 0, -1
             )
 
             user_has_access = (

diff --git a/py/tests/integration/harness_sdk.py b/py/tests/integration/harness_sdk.py
@@ -3,7 +3,7 @@
 
 from r2r import R2RClient, R2RException
 
-client = R2RClient("http://localhost:7272")
+client = R2RClient("http://localhost:7276")
 
 
 def compare_result_fields(result, expected_fields):
@@ -170,7 +170,7 @@ def test_vector_search_sample_file_filter_sdk():
     results = client.search(
         query="What was Uber's recent profit??",
         vector_search_settings={
-            "search_filters": {
+            "filters": {
                 "document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}
             }
         },
@@ -201,7 +201,7 @@ def test_hybrid_search_sample_file_filter_sdk():
         query="What was Uber's recent profit??",
         vector_search_settings={
             "use_hybrid_search": True,
-            "search_filters": {
+            "filters": {
                 "document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}
             },
         },
@@ -238,7 +238,7 @@ def test_rag_response_sample_file_sdk():
     response = client.rag(
         query="What was Uber's recent profit and loss?",
         vector_search_settings={
-            "search_filters": {
+            "filters": {
                 "document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}
             }
         },
@@ -263,7 +263,7 @@ def test_rag_response_stream_sample_file_sdk():
         query="What was Uber's recent profit and loss?",
         rag_generation_config={"stream": True},
         vector_search_settings={
-            "search_filters": {
+            "filters": {
                 "document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}
             }
         },
@@ -298,7 +298,7 @@ def test_agent_sample_file_sdk():
         ],
         rag_generation_config={"stream": False},
         vector_search_settings={
-            "search_filters": {
+            "filters": {
                 "document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}
             }
         },
@@ -333,7 +333,7 @@ def test_agent_stream_sample_file_sdk():
         ],
         rag_generation_config={"stream": True},
         vector_search_settings={
-            "search_filters": {
+            "filters": {
                 "document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}
             }
         },
@@ -1388,13 +1388,14 @@ def test_collection_document_interactions():
     # Get collections for the document
     document_collections = client.document_collections(document_id)
 
-    # Check if both collections are present in the document's collections
+    test_collection_ids = [collection1_id, collection2_id]
     if not all(
-        collection["collection_id"] in [collection1_id, collection2_id]
-        for collection in document_collections["results"]
+        collection_id
+        in [c["collection_id"] for c in document_collections["results"]]
+        for collection_id in test_collection_ids
     ):
         print(
-            "Collection document interactions test failed: Document not assigned to both collections"
+            "Collection document interactions test failed: Document not assigned to both test collections"
         )
         sys.exit(1)
 

diff --git a/templates/agentic_rag_chatbot/web-app/components/ChatWindow.tsx b/templates/agentic_rag_chatbot/web-app/components/ChatWindow.tsx
@@ -128,7 +128,7 @@ export const ChatWindow: FC<{
             messages: [...messages, newUserMessage],
             apiUrl: agentUrl,
             use_vector_search: true,
-            search_filters: {},
+            filters: {},
             search_limit: 10,
             do_hybrid_search: false,
             use_kg_search: false,

diff --git a/templates/agentic_rag_chatbot/web-app/pages/api/agent.ts b/templates/agentic_rag_chatbot/web-app/pages/api/agent.ts
@@ -15,7 +15,7 @@ export default async function handler(req: Request) {
       userId,
       apiUrl,
       use_vector_search,
-      search_filters,
+      filters,
       search_limit,
       do_hybrid_search,
       use_kg_search,
@@ -30,7 +30,7 @@ export default async function handler(req: Request) {
 
     const searchParams = {
       use_vector_search: use_vector_search ?? true,
-      filters: userId ? { ...search_filters, user_id: userId } : search_filters,
+      filters: userId ? { ...filters, user_id: userId } : filters,
       limit: search_limit || 10,
       do_hybrid_search: do_hybrid_search ?? false,
     };

diff --git a/templates/recommendation_platform/web-app/pages/api/search.ts b/templates/recommendation_platform/web-app/pages/api/search.ts
@@ -7,7 +7,7 @@ export const config = {
 interface RagParams {
   query: string;
   use_vector_search?: boolean;
-  search_filters?: Record<string, any>;
+  filters?: Record<string, any>;
   search_limit?: number;
   do_hybrid_search?: boolean;
   use_kg_search?: boolean;
@@ -34,7 +34,7 @@ export default async function handler(req: Request) {
     const params: RagParams = {
       query,
       use_vector_search: true,
-      search_filters: {},
+      filters: {},
       search_limit: 10,
       do_hybrid_search: false,
       use_kg_search: false,

diff --git a/templates/ycombinator_graphrag/web-app/components/ChatWindow.tsx b/templates/ycombinator_graphrag/web-app/components/ChatWindow.tsx
@@ -136,7 +136,7 @@ export const ChatWindow: FC<ChatWindowProps> = ({
             messages: [...messages, newUserMessage],
             apiUrl: agentUrl,
             use_vector_search: true,
-            search_filters: {},
+            filters: {},
             search_limit: 20,
             do_hybrid_search: false,
             use_kg_search: true,