@@ -183,9 +183,7 @@ def get_collection():
183183 try :
184184 collection = client .get_collection (COLLECTION_NAME )
185185 doc_count = collection .count ()
186- logger .info (
187- f"Connected to collection '{ COLLECTION_NAME } ' with { doc_count } documents"
188- )
186+ logger .info (f"Connected to collection '{ COLLECTION_NAME } ' with { doc_count } documents" )
189187 except ValueError :
190188 logger .info (f"Creating new collection '{ COLLECTION_NAME } '" )
191189 collection = client .create_collection (COLLECTION_NAME )
@@ -361,9 +359,7 @@ async def query(request: QueryRequest):
361359 "metadata" : {
362360 "total_chunks" : collection .count (),
363361 "query" : request .query ,
364- "embedding_type" : (
365- "mock" if MOCK_EMBEDDINGS or not openai_client else "openai"
366- ),
362+ "embedding_type" : ("mock" if MOCK_EMBEDDINGS or not openai_client else "openai" ),
367363 },
368364 }
369365
@@ -401,14 +397,10 @@ class AgentQueryResponse(BaseModel):
401397
402398
403399@app .post ("/agent/query" , response_model = AgentQueryResponse )
404- async def agent_query (
405- req : AgentQueryRequest , deps : None = Depends (verify_dependencies )
406- ):
400+ async def agent_query (req : AgentQueryRequest , deps : None = Depends (verify_dependencies )):
407401 """Agent-optimized query endpoint for Cursor integration."""
408402 start_time = time .time ()
409- logger .info (
410- f"Agent query received: '{ req .query } ', top_k={ req .top_k } , context={ req .context } "
411- )
403+ logger .info (f"Agent query received: '{ req .query } ', top_k={ req .top_k } , context={ req .context } " )
412404
413405 # Check if mock mode is requested for this query
414406 use_mock = MOCK_EMBEDDINGS
@@ -523,9 +515,7 @@ async def agent_query(
523515 suggested_prompt += f"\n --- Context { i } ({ chunk ['source' ]} ) ---\n "
524516 suggested_prompt += f"{ chunk ['content' ]} \n "
525517
526- suggested_prompt += (
527- "\n Based on the above context, please help with the query."
528- )
518+ suggested_prompt += "\n Based on the above context, please help with the query."
529519 logger .debug (f"Generated prompt in { time .time () - prompt_start :.2f} s" )
530520
531521 response_time = time .time () - start_time
@@ -623,9 +613,7 @@ class IndexRequest(BaseModel):
623613 project_path : str = Field (
624614 "./whk-ignition-scada" , description = "Path to the Ignition project directory"
625615 )
626- rebuild : bool = Field (
627- False , description = "Whether to rebuild the index from scratch"
628- )
616+ rebuild : bool = Field (False , description = "Whether to rebuild the index from scratch" )
629617 skip_rate_limiting : bool = Field (
630618 False , description = "Skip rate limiting for faster processing (use with caution)"
631619 )
@@ -713,9 +701,7 @@ async def generate_embedding_with_backoff(text, max_retries=5, initial_backoff=1
713701 ):
714702 retries += 1
715703 if retries > max_retries :
716- logger .error (
717- f"Max retries reached for rate limit. Final error: { e !s} "
718- )
704+ logger .error (f"Max retries reached for rate limit. Final error: { e !s} " )
719705 raise
720706
721707 logger .info (
@@ -781,7 +767,7 @@ def chunk_by_characters(text, max_chunk_size):
781767 for file_index , file_path in enumerate (json_files ):
782768 file_start_time = time .time ()
783769 try :
784- logger .info (f"Processing { file_path } ... [{ file_index + 1 } /{ total_files } ]" )
770+ logger .info (f"Processing { file_path } ... [{ file_index + 1 } /{ total_files } ]" )
785771 with open (file_path , encoding = "utf-8" ) as f :
786772 content = f .read ()
787773
@@ -844,10 +830,7 @@ def chunk_by_characters(text, max_chunk_size):
844830 )
845831
846832 # For array-type JSONs, split at the top level
847- if (
848- isinstance (json_content , list )
849- and len (json_content ) > 1
850- ):
833+ if isinstance (json_content , list ) and len (json_content ) > 1 :
851834 logger .info (
852835 f"Using array-level chunking for JSON array with { len (json_content )} items"
853836 )
@@ -876,10 +859,7 @@ def chunk_by_characters(text, max_chunk_size):
876859 )
877860 sub_chunks .extend (item_chunks )
878861 # If adding this would exceed limit, create a new chunk
879- elif (
880- current_tokens + item_tokens
881- > hard_token_limit
882- ):
862+ elif current_tokens + item_tokens > hard_token_limit :
883863 array_str = json .dumps (current_array )
884864 sub_chunks .append (array_str )
885865 current_array = [item ]
@@ -901,9 +881,7 @@ def chunk_by_characters(text, max_chunk_size):
901881 content ,
902882 int (hard_token_limit / 1.2 ),
903883 )
904- chunks = [
905- (chunk , metadata ) for chunk in text_chunks
906- ]
884+ chunks = [(chunk , metadata ) for chunk in text_chunks ]
907885 except json .JSONDecodeError :
908886 # If JSON parsing fails, use character-level chunking
909887 text_chunks = chunk_by_characters (
@@ -928,14 +906,10 @@ def chunk_by_characters(text, max_chunk_size):
928906 for i , (chunk_text , chunk_metadata ) in enumerate (chunks ):
929907 try :
930908 # Generate embedding with backoff
931- embedding = await generate_embedding_with_backoff (
932- chunk_text
933- )
909+ embedding = await generate_embedding_with_backoff (chunk_text )
934910
935911 # Create a unique ID for this chunk
936- file_path_replaced = file_path .replace ("/" , "_" ).replace (
937- "\\ " , "_"
938- )
912+ file_path_replaced = file_path .replace ("/" , "_" ).replace ("\\ " , "_" )
939913 chunk_id = f"{ file_path_replaced } _chunk_{ i } "
940914
941915 # Add to collection
@@ -948,9 +922,7 @@ def chunk_by_characters(text, max_chunk_size):
948922
949923 chunk_count += 1
950924 except Exception as e :
951- logger .error (
952- f"Error processing chunk { i } of { file_path } : { e !s} "
953- )
925+ logger .error (f"Error processing chunk { i } of { file_path } : { e !s} " )
954926
955927 doc_count += 1
956928 logger .info (f"Indexed { file_path } into { len (chunks )} chunks" )
0 commit comments