3030 ChunkingFailedException ,
3131 CollectionNotFoundException ,
3232 DocumentNotFoundException ,
33+ FileSizeLimitExceededException ,
3334 InsufficientStorageLimitException ,
3435 ParsingDocumentFailedException ,
3536 VectorizationFailedException ,
@@ -201,7 +202,7 @@ async def create_document(
201202 # parse the file
202203 try :
203204 content = await self .parser_manager .parse (file = file )
204- document_size = len (content . encode ( encoding = "utf-8" ) )
205+ document_size = len (content )
205206 except Exception as e :
206207 logger .exception (f"failed to parse { document_name } ({ e } )." )
207208 raise ParsingDocumentFailedException ()
@@ -399,13 +400,26 @@ async def create_document_chunks(
399400 for i , chunk in enumerate (chunks , start = start )
400401 ]
401402
402- chunks_size = sum (len (chunk .content . encode ( encoding = "utf-8" ) ) for chunk in chunks )
403+ chunks_size = sum (len (chunk .content ) for chunk in chunks )
403404 storage_limit , storage_consumption = await self ._get_storage_limit_and_consumption (postgres_session = postgres_session , user_id = user_id )
404405 if storage_limit is not None and storage_consumption > storage_limit :
405406 raise InsufficientStorageLimitException (
406407 detail = f"Upload size limit exceeded. Limit: { storage_limit } bytes. Current: { storage_consumption } bytes."
407408 )
408409
410+ # update the document size
411+ result = await postgres_session .execute (
412+ statement = update (table = DocumentTable )
413+ .values (size = func .coalesce (DocumentTable .size , 0 ) + chunks_size )
414+ .where (DocumentTable .id == document_id )
415+ .returning (DocumentTable .size )
416+ )
417+ new_size = result .scalar_one ()
418+
419+ if new_size > FileSizeLimitExceededException .MAX_CONTENT_SIZE :
420+ await postgres_session .rollback ()
421+ raise FileSizeLimitExceededException ()
422+
409423 try :
410424 await self ._upsert_document_chunks (
411425 chunks = chunks ,
@@ -417,17 +431,11 @@ async def create_document_chunks(
417431 request_context = request_context ,
418432 )
419433 except Exception as e :
434+ await postgres_session .rollback ()
420435 raise VectorizationFailedException (detail = f"Vectorization failed: { e } " )
421436
422- chunk_ids = [chunk .id for chunk in chunks ]
423-
424- # update the document size
425- await postgres_session .execute (
426- statement = update (table = DocumentTable )
427- .values (size = func .coalesce (DocumentTable .size , 0 ) + chunks_size )
428- .where (DocumentTable .id == document_id )
429- )
430437 await postgres_session .commit ()
438+ chunk_ids = [chunk .id for chunk in chunks ]
431439
432440 return chunk_ids
433441
@@ -478,7 +486,7 @@ async def get_document_chunks(
478486 statement = select (DocumentTable )
479487 .join (CollectionTable , DocumentTable .collection_id == CollectionTable .id )
480488 .where (DocumentTable .id == document_id )
481- .where (CollectionTable .user_id == user_id )
489+ .where (or_ ( CollectionTable .user_id == user_id , CollectionTable . visibility == CollectionVisibility . PUBLIC ) )
482490 )
483491 try :
484492 result .scalar_one ()
0 commit comments