feat: rate limit requests

p0deje · sh3pik · commit 448fd128b062 · 2024-12-21T09:49:12.000+08:00
This addition allows delaying requests to the LLM.
This can be useful when using free tiers of AI providers.
For example, Google AI Studio has a free plan for Gemini limited to
15 requests per minute. It's possible to set ALUMINIUM_RPM_LIMIT=15,
which will delay every request by 4 seconds, ensuring the tests don't
encounter a "429 Resource Exhausted" error.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -15,6 +15,7 @@ env:
   AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
   AZURE_OPENAI_API_VERSION: ${{ secrets.AZURE_OPENAI_API_VERSION }}
   AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
+  GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
   DISPLAY: :99
 
 jobs:
@@ -23,13 +24,15 @@ jobs:
     runs-on: ubuntu-latest
     env:
       ALUMNIUM_MODEL: ${{ matrix.model }}
+      ALUMNIUM_RPM_LIMIT: ${{ matrix.model == 'google' && '15' || '0' }}
     strategy:
       fail-fast: false
       matrix:
         model:
           - aws_anthropic
           - aws_meta
           - azure_openai
+          - google
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
@@ -39,6 +42,7 @@ jobs:
           path: ./.venv
           key: venv-${{ hashFiles('poetry.lock') }}
       - run: Xvfb ${{ env.DISPLAY }} &
+      - run: echo ALUMNIUM_DEBUG=${{ runner.debug }} >> "$GITHUB_ENV"
       - run: poetry install
       - run: poetry run behave --format html-pretty --outfile reports/behave.html --format pretty
       - run: poetry run pytest --html reports/pytest.html
@@ -70,7 +74,7 @@ jobs:
           key: venv-${{ hashFiles('poetry.lock') }}
       - run: poetry install
       - run: poetry build
-      - run: echo "version=$(poetry version --short)" >> $GITHUB_OUTPUT
+      - run: echo "version=$(poetry version --short)" >> "$GITHUB_OUTPUT"
         id: version
       - uses: actions/attest-build-provenance@v1
         with:
diff --git a/alumnium/agents/actor_agent.py b/alumnium/agents/actor_agent.py
@@ -5,6 +5,7 @@
 from langchain_core.language_models import BaseChatModel
 from langchain_core.prompts import ChatPromptTemplate
 
+from alumnium.delayed_runnable import DelayedRunnable
 from alumnium.drivers import SeleniumDriver
 from alumnium.tools import ALL_TOOLS
 
@@ -27,7 +28,7 @@ def __init__(self, driver: SeleniumDriver, llm: BaseChatModel):
                 ("human", self.USER_MESSAGE),
             ]
         )
-        self.chain = prompt | llm
+        self.chain = prompt | DelayedRunnable(llm)
 
     def invoke(self, goal: str):
         logger.info(f"Starting action:")
diff --git a/alumnium/agents/contradiction_checker_agent.py b/alumnium/agents/contradiction_checker_agent.py
@@ -4,6 +4,7 @@
 from langchain_core.language_models import BaseChatModel
 from pydantic import BaseModel, Field
 
+from alumnium.delayed_runnable import DelayedRunnable
 
 logger = logging.getLogger(__name__)
 
@@ -17,7 +18,7 @@ class ContradictionCheckerAgent:
         USER_MESSAGE = f.read()
 
     def __init__(self, llm: BaseChatModel):
-        self.chain = llm.with_structured_output(Response, include_raw=True)
+        self.chain = DelayedRunnable(llm.with_structured_output(Response, include_raw=True))
 
     def invoke(self, statement: str, verification_explanation: str) -> bool:
         logger.info(f"Starting contradiction checking:")
diff --git a/alumnium/agents/loading_detector_agent.py b/alumnium/agents/loading_detector_agent.py
@@ -4,6 +4,7 @@
 from langchain_core.language_models import BaseChatModel
 from pydantic import BaseModel, Field
 
+from alumnium.delayed_runnable import DelayedRunnable
 
 logger = logging.getLogger(__name__)
 
@@ -26,10 +27,10 @@ class LoadingDetectorAgent:
 
     def __init__(self, llm: BaseChatModel):
         llm = llm.with_structured_output(Loading, include_raw=True)
-        self.chain = llm
+        self.chain = DelayedRunnable(llm)
 
     def invoke(self, aria: str, title: str, url: str, screenshot: str = ""):
-        logger.info(f"Starting loading detection:")
+        logger.info("Starting loading detection:")
 
         human_messages = [
             {
diff --git a/alumnium/agents/verifier_agent.py b/alumnium/agents/verifier_agent.py
@@ -5,6 +5,7 @@
 from langchain_core.language_models import BaseChatModel
 from pydantic import BaseModel, Field
 
+from alumnium.delayed_runnable import DelayedRunnable
 from alumnium.drivers import SeleniumDriver
 from . import LoadingDetectorAgent
 
@@ -26,7 +27,7 @@ class VerifierAgent:
 
     def __init__(self, driver: SeleniumDriver, llm: BaseChatModel):
         self.driver = driver
-        self.chain = llm.with_structured_output(Verification, include_raw=True)
+        self.chain = DelayedRunnable(llm.with_structured_output(Verification, include_raw=True))
 
         self.loading_detector_agent = LoadingDetectorAgent(llm)
         self.retry_count = LoadingDetectorAgent.timeout / LoadingDetectorAgent.delay
diff --git a/alumnium/delayed_runnable.py b/alumnium/delayed_runnable.py
@@ -0,0 +1,22 @@
+import logging
+from os import getenv
+from time import sleep
+
+from langchain_core.runnables import Runnable
+
+logger = logging.getLogger(__name__)
+
+
+class DelayedRunnable(Runnable):
+    def __init__(self, runnable: Runnable, delay: int = 0):
+        self.runnable = runnable
+        self.delay = delay
+        rpm_limit = int(getenv("ALUMNIUM_RPM_LIMIT", 0))
+        if rpm_limit:
+            self.delay = 60 / rpm_limit
+
+    def invoke(self, input, config=None):
+        if self.delay:
+            logger.info(f"Delaying invocation for {self.delay} seconds")
+            sleep(self.delay)
+        return self.runnable.invoke(input, config)
diff --git a/examples/pytest/calculator_test.py b/examples/pytest/calculator_test.py
@@ -4,6 +4,7 @@
 
 @mark.xfail(Model.load() == Model.AWS_ANTHROPIC, reason="Bedrock version of Haiku is subpar")
 @mark.xfail(Model.load() == Model.AWS_META, reason="It is too hard for Llama 3.2")
+@mark.xfail(Model.load() == Model.GOOGLE, reason="It is too hard for Gemini 1.5 Flash")
 def test_addition(al, driver):
     driver.get("https://seleniumbase.io/apps/calculator")
     al.do("1 + 1 =")

Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,7 @@`
`5`	`5`	`from langchain_core.language_models import BaseChatModel`
`6`	`6`	`from langchain_core.prompts import ChatPromptTemplate`
`7`	`7`
	`8`	`+from alumnium.delayed_runnable import DelayedRunnable`
`8`	`9`	`from alumnium.drivers import SeleniumDriver`
`9`	`10`	`from alumnium.tools import ALL_TOOLS`
`10`	`11`
`@@ -27,7 +28,7 @@ def __init__(self, driver: SeleniumDriver, llm: BaseChatModel):`
`27`	`28`	`("human", self.USER_MESSAGE),`
`28`	`29`	`]`
`29`	`30`	`)`
`30`		`- self.chain = prompt \| llm`
	`31`	`+ self.chain = prompt \| DelayedRunnable(llm)`
`31`	`32`
`32`	`33`	`def invoke(self, goal: str):`
`33`	`34`	`logger.info(f"Starting action:")`