Skip to content

Commit 9345566

Browse files
committed
Concurrent index creation, allow -1 for paginated entries
1 parent 0a769bc commit 9345566

File tree

5 files changed

+63
-60
lines changed

5 files changed

+63
-60
lines changed

py/compose.full.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ services:
311311
- POSTGRES_PORT=${POSTGRES_PORT:-5432}
312312
- POSTGRES_DBNAME=${POSTGRES_DBNAME:-postgres}
313313
- R2R_PROJECT_NAME=${R2R_PROJECT_NAME:-r2r_default}
314+
- POSTGRES_MAX_CONNECTIONS=${POSTGRES_MAX_CONNECTIONS:-1024}
314315
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
315316
- OPENAI_API_BASE=${OPENAI_API_BASE:-}
316317
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}

py/core/main/api/management_router.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,11 @@ async def file_stream():
323323
async def documents_overview_app(
324324
document_ids: list[str] = Query([]),
325325
offset: int = Query(0, ge=0),
326-
limit: int = Query(100, ge=1, le=1000),
326+
limit: int = Query(
327+
100,
328+
ge=-1,
329+
description="Number of items to return. Use -1 to return all items.",
330+
),
327331
auth_user=Depends(self.service.providers.auth.auth_wrapper),
328332
) -> WrappedDocumentOverviewResponse:
329333
request_user_ids = (

py/core/main/services/management_service.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,8 +358,8 @@ async def documents_overview(
358358
user_ids: Optional[list[UUID]] = None,
359359
collection_ids: Optional[list[UUID]] = None,
360360
document_ids: Optional[list[UUID]] = None,
361-
offset: Optional[int] = 0,
362-
limit: Optional[int] = 1000,
361+
offset: Optional[int] = None,
362+
limit: Optional[int] = None,
363363
*args: Any,
364364
**kwargs: Any,
365365
):

py/core/providers/database/vecs/collection.py

Lines changed: 54 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,26 @@ def _drop(self):
382382

383383
return self
384384

385+
def _get_index_options(
386+
self,
387+
method: IndexMethod,
388+
index_arguments: Optional[Union[IndexArgsIVFFlat, IndexArgsHNSW]],
389+
) -> str:
390+
if method == IndexMethod.ivfflat:
391+
if isinstance(index_arguments, IndexArgsIVFFlat):
392+
return f"WITH (lists={index_arguments.n_lists})"
393+
else:
394+
# Default value if no arguments provided
395+
return "WITH (lists=100)"
396+
elif method == IndexMethod.hnsw:
397+
if isinstance(index_arguments, IndexArgsHNSW):
398+
return f"WITH (m={index_arguments.m}, ef_construction={index_arguments.ef_construction})"
399+
else:
400+
# Default values if no arguments provided
401+
return "WITH (m=16, ef_construction=64)"
402+
else:
403+
return "" # No options for other methods
404+
385405
def upsert(
386406
self,
387407
records: Iterable[Record],
@@ -941,6 +961,7 @@ def create_index(
941961
Union[IndexArgsIVFFlat, IndexArgsHNSW]
942962
] = None,
943963
replace=True,
964+
concurrently=True,
944965
) -> None:
945966
"""
946967
Creates an index for the collection.
@@ -1017,69 +1038,45 @@ def create_index(
10171038
if ops is None:
10181039
raise ArgError("Unknown index measure")
10191040

1020-
unique_string = str(uuid4()).replace("-", "_")[0:7]
1021-
1022-
with self.client.Session() as sess:
1023-
with sess.begin():
1024-
if self.index is not None:
1025-
if replace:
1026-
sess.execute(
1027-
text(
1028-
f'drop index {self.client.project_name}."{self.index}";'
1029-
)
1030-
)
1031-
self._index = None
1032-
else:
1033-
raise ArgError(
1034-
"replace is set to False but an index exists"
1035-
)
1036-
1037-
if method == IndexMethod.ivfflat:
1038-
if not index_arguments:
1039-
n_records: int = sess.execute(func.count(self.table.c.extraction_id)).scalar() # type: ignore
1041+
concurrently_sql = "CONCURRENTLY" if concurrently else ""
10401042

1041-
n_lists = (
1042-
int(max(n_records / 1000, 30))
1043-
if n_records < 1_000_000
1044-
else int(math.sqrt(n_records))
1045-
)
1046-
else:
1047-
# The following mypy error is ignored because mypy
1048-
# complains that `index_arguments` is typed as a union
1049-
# of IndexArgsIVFFlat and IndexArgsHNSW types,
1050-
# which both don't necessarily contain the `n_lists`
1051-
# parameter, however we have validated that the
1052-
# correct type is being used above.
1053-
n_lists = index_arguments.n_lists # type: ignore
1054-
1055-
sess.execute(
1056-
text(
1057-
f"""
1058-
create index ix_{ops}_ivfflat_nl{n_lists}_{unique_string}
1059-
on {self.client.project_name}."{self.table.name}"
1060-
using ivfflat (vec {ops}) with (lists={n_lists})
1061-
"""
1062-
)
1043+
# Drop existing index if needed (must be outside of transaction)
1044+
if self.index is not None and replace:
1045+
drop_index_sql = f'DROP INDEX {concurrently_sql} IF EXISTS {self.client.project_name}."{self.index}";'
1046+
try:
1047+
with self.client.engine.connect() as connection:
1048+
connection = connection.execution_options(
1049+
isolation_level="AUTOCOMMIT"
10631050
)
1051+
connection.execute(text(drop_index_sql))
1052+
except Exception as e:
1053+
raise Exception(f"Failed to drop existing index: {e}")
1054+
self._index = None
10641055

1065-
if method == IndexMethod.hnsw:
1066-
if not index_arguments:
1067-
index_arguments = IndexArgsHNSW()
1056+
unique_string = str(uuid4()).replace("-", "_")[0:7]
1057+
index_name = f"ix_{ops}_{method}__{unique_string}"
10681058

1069-
# See above for explanation of why the following lines
1070-
# are ignored
1071-
m = index_arguments.m # type: ignore
1072-
ef_construction = index_arguments.ef_construction # type: ignore
1059+
create_index_sql = f"""
1060+
CREATE INDEX {concurrently_sql} {index_name}
1061+
ON {self.client.project_name}."{self.table.name}"
1062+
USING {method} (vec {ops}) {self._get_index_options(method, index_arguments)};
1063+
"""
10731064

1074-
sess.execute(
1075-
text(
1076-
f"""
1077-
create index ix_{ops}_hnsw_m{m}_efc{ef_construction}_{unique_string}
1078-
on {self.client.project_name}."{self.table.name}"
1079-
using hnsw (vec {ops}) WITH (m={m}, ef_construction={ef_construction});
1080-
"""
1081-
)
1065+
try:
1066+
if concurrently:
1067+
with self.client.engine.connect() as connection:
1068+
connection = connection.execution_options(
1069+
isolation_level="AUTOCOMMIT"
10821070
)
1071+
connection.execute(text(create_index_sql))
1072+
else:
1073+
with self.client.Session() as sess:
1074+
sess.execute(text(create_index_sql))
1075+
sess.commit()
1076+
except Exception as e:
1077+
raise Exception(f"Failed to create index: {e}")
1078+
1079+
self._index = index_name
10831080

10841081
return None
10851082

py/core/providers/database/vector.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ def create_index(
306306
measure=measure,
307307
index_arguments=index_options,
308308
replace=True,
309+
concurrently=True,
309310
)
310311

311312
def delete(

0 commit comments

Comments
 (0)