Skip to content

Commit 6aff1d9

Browse files
committed
feat: refactored API rate limiting handling. Fixed e2es in run_tests.sh
1 parent 13e8329 commit 6aff1d9

File tree

2 files changed

+72
-68
lines changed

2 files changed

+72
-68
lines changed

api.py

Lines changed: 57 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/usr/bin/env python3
2+
import asyncio
23
import hashlib
34
import json
45
import os
@@ -683,14 +684,55 @@ async def index_project(request: IndexRequest):
683684
total_files = len(json_files)
684685
start_time = time.time()
685686

686-
# Rate limiting variables
687-
tokens_in_minute = 0
688-
minute_start = time.time()
689-
max_tokens_per_minute = 80000 # Conservative limit below OpenAI's 100K TPM
690-
691687
# Set limits for chunking
692688
hard_token_limit = 7500
693689

690+
# Helper function for exponential backoff retry
691+
async def generate_embedding_with_backoff(text, max_retries=5, initial_backoff=1):
692+
"""Generate embedding with exponential backoff for rate limit handling."""
693+
if MOCK_EMBEDDINGS:
694+
return indexer_mock_embedding(text)
695+
696+
retries = 0
697+
backoff_time = initial_backoff
698+
699+
while retries <= max_retries:
700+
try:
701+
response = openai_client.embeddings.create(
702+
input=text, model="text-embedding-ada-002"
703+
)
704+
return response.data[0].embedding
705+
except Exception as e:
706+
error_str = str(e).lower()
707+
708+
# Check if this is a rate limit error
709+
if (
710+
"rate limit" in error_str
711+
or "too many requests" in error_str
712+
or "429" in error_str
713+
):
714+
retries += 1
715+
if retries > max_retries:
716+
logger.error(
717+
f"Max retries reached for rate limit. Final error: {e!s}"
718+
)
719+
raise
720+
721+
logger.info(
722+
f"Rate limit hit. Backing off for {backoff_time:.1f} seconds (retry {retries}/{max_retries})"
723+
)
724+
await asyncio.sleep(backoff_time)
725+
726+
# Exponential backoff: double the wait time for next retry
727+
backoff_time *= 2
728+
else:
729+
# Not a rate limit error, re-raise
730+
logger.error(f"Error generating embedding: {e!s}")
731+
raise
732+
733+
# This should not be reached due to the raise in the loop
734+
raise Exception("Failed to generate embedding after maximum retries")
735+
694736
# Helper function for character chunking (copied from main.py)
695737
def chunk_by_characters(text, max_chunk_size):
696738
"""Chunk a text by a fixed number of characters, respecting JSON structure when possible."""
@@ -763,35 +805,9 @@ def chunk_by_characters(text, max_chunk_size):
763805

764806
# Always chunk files over 3000 tokens to ensure safer processing
765807
if token_count <= 3000:
766-
# Check rate limits
767-
if (
768-
not request.skip_rate_limiting
769-
and tokens_in_minute + token_count > max_tokens_per_minute
770-
):
771-
# Wait until the minute is up
772-
elapsed = time.time() - minute_start
773-
if elapsed < 60:
774-
sleep_time = 60 - elapsed
775-
logger.info(
776-
f"Rate limit approaching. Sleeping for {sleep_time:.1f} seconds..."
777-
)
778-
time.sleep(sleep_time)
779-
# Reset rate limit counter
780-
tokens_in_minute = 0
781-
minute_start = time.time()
782-
783808
try:
784-
# Generate embedding using OpenAI or mock
785-
if MOCK_EMBEDDINGS:
786-
embedding = indexer_mock_embedding(content)
787-
logger.info("Generated mock embedding")
788-
else:
789-
response = openai_client.embeddings.create(
790-
input=content, model="text-embedding-ada-002"
791-
)
792-
embedding = response.data[0].embedding
793-
# Update rate limit counter
794-
tokens_in_minute += token_count
809+
# Generate embedding with backoff for rate limiting
810+
embedding = await generate_embedding_with_backoff(content)
795811

796812
# Add to collection
797813
file_path_replaced = file_path.replace("/", "_").replace("\\", "_")
@@ -805,7 +821,7 @@ def chunk_by_characters(text, max_chunk_size):
805821
chunk_count += 1
806822
logger.info(f"Indexed {file_path} as a single chunk")
807823
except Exception as e:
808-
logger.error(f"Error processing {file_path}: {e}")
824+
logger.error(f"Error processing {file_path}: {e!s}")
809825
else:
810826
# For large files, we need to chunk the content
811827
logger.info(f"File exceeds token limit, chunking: {file_path}")
@@ -910,37 +926,11 @@ def chunk_by_characters(text, max_chunk_size):
910926

911927
# Process chunks
912928
for i, (chunk_text, chunk_metadata) in enumerate(chunks):
913-
# Check rate limits for OpenAI API
914-
chunk_tokens = len(enc.encode(chunk_text))
915-
916-
if (
917-
not request.skip_rate_limiting
918-
and not MOCK_EMBEDDINGS
919-
and tokens_in_minute + chunk_tokens > max_tokens_per_minute
920-
):
921-
# Wait until the minute is up
922-
elapsed = time.time() - minute_start
923-
if elapsed < 60:
924-
sleep_time = 60 - elapsed
925-
logger.info(
926-
f"Rate limit approaching. Sleeping for {sleep_time:.1f} seconds..."
927-
)
928-
time.sleep(sleep_time)
929-
# Reset rate limit counter
930-
tokens_in_minute = 0
931-
minute_start = time.time()
932-
933929
try:
934-
# Generate embedding
935-
if MOCK_EMBEDDINGS:
936-
embedding = indexer_mock_embedding(chunk_text)
937-
else:
938-
response = openai_client.embeddings.create(
939-
input=chunk_text, model="text-embedding-ada-002"
940-
)
941-
embedding = response.data[0].embedding
942-
# Update rate limit counter
943-
tokens_in_minute += chunk_tokens
930+
# Generate embedding with backoff
931+
embedding = await generate_embedding_with_backoff(
932+
chunk_text
933+
)
944934

945935
# Create a unique ID for this chunk
946936
file_path_replaced = file_path.replace("/", "_").replace(
@@ -959,21 +949,21 @@ def chunk_by_characters(text, max_chunk_size):
959949
chunk_count += 1
960950
except Exception as e:
961951
logger.error(
962-
f"Error processing chunk {i} of {file_path}: {e}"
952+
f"Error processing chunk {i} of {file_path}: {e!s}"
963953
)
964954

965955
doc_count += 1
966956
logger.info(f"Indexed {file_path} into {len(chunks)} chunks")
967957

968958
except Exception as e:
969-
logger.error(f"Error chunking {file_path}: {e}")
959+
logger.error(f"Error chunking {file_path}: {e!s}")
970960

971961
# Calculate time taken for this file
972962
file_time = time.time() - file_start_time
973963
logger.info(f"Processed {file_path} in {file_time:.2f} seconds")
974964

975965
except Exception as e:
976-
logger.error(f"Error processing {file_path}: {e}")
966+
logger.error(f"Error processing {file_path}: {e!s}")
977967

978968
# Calculate total time
979969
total_time = time.time() - start_time

run_tests.sh

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,17 @@ set -e
33

44
# Process command line arguments
55
SKIP_LINT=false
6+
INCLUDE_E2E=false
67
for arg in "$@"; do
78
case $arg in
89
--no-lint)
910
SKIP_LINT=true
1011
shift # Remove --no-lint from processing
1112
;;
13+
--include-e2e)
14+
INCLUDE_E2E=true
15+
shift # Remove --include-e2e from processing
16+
;;
1217
*)
1318
# Unknown option
1419
;;
@@ -49,9 +54,18 @@ export MOCK_EMBEDDINGS="true"
4954
export TEST_MODE="true"
5055
export LOG_LEVEL="ERROR" # Reduce logging noise during tests
5156

57+
# Determine test targets
58+
if [ "$INCLUDE_E2E" = true ]; then
59+
TEST_TARGETS="tests"
60+
echo -e "${YELLOW}Running ALL tests including E2E tests - make sure API server is running!${NC}"
61+
else
62+
TEST_TARGETS="tests/unit tests/integration"
63+
echo -e "${YELLOW}Running unit and integration tests only. Use --include-e2e to run E2E tests.${NC}"
64+
fi
65+
5266
# Run tests
5367
echo -e "${BLUE}Running tests...${NC}"
54-
if pytest -v --cov=. --cov-report term-missing --cov-config=.coveragerc; then
68+
if pytest -v --cov=. --cov-report term-missing --cov-config=.coveragerc $TEST_TARGETS; then
5569
echo -e "${GREEN}All tests passed${NC}"
5670
else
5771
echo -e "${RED}Tests failed${NC}"

0 commit comments

Comments
 (0)