Skip to content

Commit 448fd12

Browse files
p0dejesh3pik
authored andcommitted
feat: rate limit requests
This addition allows delaying requests to the LLM. This can be useful when using free tiers of AI providers. For example, Google AI Studio has a free plan for Gemini limited to 15 requests per minute. It's possible to set ALUMINIUM_RPM_LIMIT=15, which will delay every request by 4 seconds, ensuring the tests don't encounter a "429 Resource Exhausted" error.
1 parent b4cd876 commit 448fd12

File tree

7 files changed

+37
-6
lines changed

7 files changed

+37
-6
lines changed

.github/workflows/ci.yml

+5-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ env:
1515
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
1616
AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
1717
AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
18+
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
1819
DISPLAY: :99
1920

2021
jobs:
@@ -23,13 +24,15 @@ jobs:
2324
runs-on: ubuntu-latest
2425
env:
2526
ALUMNIUM_MODEL: ${{ matrix.model }}
27+
ALUMNIUM_RPM_LIMIT: ${{ matrix.model == 'google' && '15' || '0' }}
2628
strategy:
2729
fail-fast: false
2830
matrix:
2931
model:
3032
- aws_anthropic
3133
- aws_meta
3234
- azure_openai
35+
- google
3336
steps:
3437
- uses: actions/checkout@v4
3538
- uses: actions/setup-python@v5
@@ -39,6 +42,7 @@ jobs:
3942
path: ./.venv
4043
key: venv-${{ hashFiles('poetry.lock') }}
4144
- run: Xvfb ${{ env.DISPLAY }} &
45+
- run: echo ALUMNIUM_DEBUG=${{ runner.debug }} >> "$GITHUB_ENV"
4246
- run: poetry install
4347
- run: poetry run behave --format html-pretty --outfile reports/behave.html --format pretty
4448
- run: poetry run pytest --html reports/pytest.html
@@ -70,7 +74,7 @@ jobs:
7074
key: venv-${{ hashFiles('poetry.lock') }}
7175
- run: poetry install
7276
- run: poetry build
73-
- run: echo "version=$(poetry version --short)" >> $GITHUB_OUTPUT
77+
- run: echo "version=$(poetry version --short)" >> "$GITHUB_OUTPUT"
7478
id: version
7579
- uses: actions/attest-build-provenance@v1
7680
with:

alumnium/agents/actor_agent.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from langchain_core.language_models import BaseChatModel
66
from langchain_core.prompts import ChatPromptTemplate
77

8+
from alumnium.delayed_runnable import DelayedRunnable
89
from alumnium.drivers import SeleniumDriver
910
from alumnium.tools import ALL_TOOLS
1011

@@ -27,7 +28,7 @@ def __init__(self, driver: SeleniumDriver, llm: BaseChatModel):
2728
("human", self.USER_MESSAGE),
2829
]
2930
)
30-
self.chain = prompt | llm
31+
self.chain = prompt | DelayedRunnable(llm)
3132

3233
def invoke(self, goal: str):
3334
logger.info(f"Starting action:")

alumnium/agents/contradiction_checker_agent.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from langchain_core.language_models import BaseChatModel
55
from pydantic import BaseModel, Field
66

7+
from alumnium.delayed_runnable import DelayedRunnable
78

89
logger = logging.getLogger(__name__)
910

@@ -17,7 +18,7 @@ class ContradictionCheckerAgent:
1718
USER_MESSAGE = f.read()
1819

1920
def __init__(self, llm: BaseChatModel):
20-
self.chain = llm.with_structured_output(Response, include_raw=True)
21+
self.chain = DelayedRunnable(llm.with_structured_output(Response, include_raw=True))
2122

2223
def invoke(self, statement: str, verification_explanation: str) -> bool:
2324
logger.info(f"Starting contradiction checking:")

alumnium/agents/loading_detector_agent.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from langchain_core.language_models import BaseChatModel
55
from pydantic import BaseModel, Field
66

7+
from alumnium.delayed_runnable import DelayedRunnable
78

89
logger = logging.getLogger(__name__)
910

@@ -26,10 +27,10 @@ class LoadingDetectorAgent:
2627

2728
def __init__(self, llm: BaseChatModel):
2829
llm = llm.with_structured_output(Loading, include_raw=True)
29-
self.chain = llm
30+
self.chain = DelayedRunnable(llm)
3031

3132
def invoke(self, aria: str, title: str, url: str, screenshot: str = ""):
32-
logger.info(f"Starting loading detection:")
33+
logger.info("Starting loading detection:")
3334

3435
human_messages = [
3536
{

alumnium/agents/verifier_agent.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from langchain_core.language_models import BaseChatModel
66
from pydantic import BaseModel, Field
77

8+
from alumnium.delayed_runnable import DelayedRunnable
89
from alumnium.drivers import SeleniumDriver
910
from . import LoadingDetectorAgent
1011

@@ -26,7 +27,7 @@ class VerifierAgent:
2627

2728
def __init__(self, driver: SeleniumDriver, llm: BaseChatModel):
2829
self.driver = driver
29-
self.chain = llm.with_structured_output(Verification, include_raw=True)
30+
self.chain = DelayedRunnable(llm.with_structured_output(Verification, include_raw=True))
3031

3132
self.loading_detector_agent = LoadingDetectorAgent(llm)
3233
self.retry_count = LoadingDetectorAgent.timeout / LoadingDetectorAgent.delay

alumnium/delayed_runnable.py

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import logging
2+
from os import getenv
3+
from time import sleep
4+
5+
from langchain_core.runnables import Runnable
6+
7+
logger = logging.getLogger(__name__)
8+
9+
10+
class DelayedRunnable(Runnable):
11+
def __init__(self, runnable: Runnable, delay: int = 0):
12+
self.runnable = runnable
13+
self.delay = delay
14+
rpm_limit = int(getenv("ALUMNIUM_RPM_LIMIT", 0))
15+
if rpm_limit:
16+
self.delay = 60 / rpm_limit
17+
18+
def invoke(self, input, config=None):
19+
if self.delay:
20+
logger.info(f"Delaying invocation for {self.delay} seconds")
21+
sleep(self.delay)
22+
return self.runnable.invoke(input, config)

examples/pytest/calculator_test.py

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
@mark.xfail(Model.load() == Model.AWS_ANTHROPIC, reason="Bedrock version of Haiku is subpar")
66
@mark.xfail(Model.load() == Model.AWS_META, reason="It is too hard for Llama 3.2")
7+
@mark.xfail(Model.load() == Model.GOOGLE, reason="It is too hard for Gemini 1.5 Flash")
78
def test_addition(al, driver):
89
driver.get("https://seleniumbase.io/apps/calculator")
910
al.do("1 + 1 =")

0 commit comments

Comments
 (0)