Skip to content

Weird Behaviour when using OpenAI with Aiohttp #938

@baniasbaabe

Description

@baniasbaabe

Hi guys,

I noticed something very weird when using vcrpy together with Async OpenAI and Aiohttp (instead of the default httpx).

import json
import logging
import typing as ty
from pathlib import Path

import tiktoken
from aiohttp import ClientSession
from httpx_aiohttp import AiohttpTransport
from langfuse.decorators import observe
from langfuse.openai import AsyncAzureOpenAI
from openai import BadRequestError, DefaultAsyncHttpxClient, RateLimitError
from openai.types.audio import TranscriptionVerbose
from openai.types.chat import ChatCompletionMessageParam
from pydantic import BaseModel
from tenacity import (
    RetryCallState,
    retry,
    retry_if_exception_type,
    stop_after_attempt,
    wait_chain,
    wait_fixed,
)

from .base import AsyncClient

class OpenAIManager(AsyncClient):

    def __init__(self, settings: AppSettings, configs: AppConfig):
        self.configs = configs
        self.settings = settings
        aiohttp_transport = AiohttpTransport(client=ClientSession())
        httpx_client = DefaultAsyncHttpxClient(transport=aiohttp_transport)

        self.openai_client = AsyncAzureOpenAI(
            azure_endpoint=settings.AZURE_OPENAI_ENDPOINT,
            api_version=settings.AZURE_OPENAI_API_VERSION,
            api_key=settings.AZURE_OPENAI_API_KEY,
            timeout=300,
            http_client=httpx_client,
        )

Now, when I use OpenAI with AiohttpTransport, my records contain multiple calls to OpenAI (while just one should be there). And also, my tests fail.

When I comment out the http_client parameter, it works fine (since OpenAI uses plain HTTPX by default)

My config:

@pytest.fixture(scope="module")
def vcr_config():
    cassette_dir = Path(__file__).parent / "cassettes"
    cassette_dir.mkdir(exist_ok=True)

    return vcr.VCR(
        cassette_library_dir=str(cassette_dir),
        filter_headers=["authorization", "api-key"],
        ignore_hosts=[
            "localhost",
            "unix",
            "docker",
            "langfuse.genai-netz-nele-dev.enbw-az.cloud",
        ],
        ignore_localhost=True,
        record_mode="new_episodes",
        match_on=["uri", "method", "body"],
        decode_compressed_response=True,
        record_on_exception=False
    )

@pytest.mark.asyncio
async def test_vector_search_knowledge_file_success(test_client_tmr_user, vcr_config):
    with vcr_config.use_cassette("test_vector_search_knowledge_file_success.yaml", allow_playback_repeats=True):
        response = await test_client_tmr_user.get(
            "/vector_search",
            params={
                ...
            },
        )
        
        assert response.status_code == 200
        data = response.json()
        assert isinstance(data, list)
        assert len(data) <= 5 

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions