futuresearch-python/tests/integration/conftest.py at b6298ffe29691487751e51723722413ae43ddc91 · futuresearch/futuresearch-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""Shared fixtures and configuration for integration tests."""

import os
from collections.abc import AsyncGenerator
from datetime import datetime

import pandas as pd
import pytest
import pytest_asyncio
from pydantic import BaseModel, Field

from everyrow.session import Session, create_session


@pytest.fixture(scope="session", autouse=True)
def require_api_key():
    """Fail integration tests if EVERYROW_API_KEY is not set."""
    if not os.environ.get("EVERYROW_API_KEY"):
        pytest.fail("EVERYROW_API_KEY environment variable not set")


@pytest_asyncio.fixture(scope="session")
async def session() -> AsyncGenerator[Session, None]:
    """Create a single shared session for all integration tests."""
    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
    async with create_session(name=f"integration-tests-{timestamp}") as sess:
        yield sess


# ============================================================================
# Common Test Data - Small datasets to minimize cost/time
# ============================================================================


@pytest.fixture
def companies_df():
    """Small company dataset for screen/rank tests."""
    return pd.DataFrame(
        [
            {"company": "Apple", "industry": "Technology", "website": "apple.com"},
            {
                "company": "Microsoft",
                "industry": "Technology",
                "website": "microsoft.com",
            },
            {
                "company": "Coca-Cola",
                "industry": "Beverages",
                "website": "coca-cola.com",
            },
        ]
    )


@pytest.fixture
def papers_df():
    """Academic papers dataset for dedupe tests - contains known duplicates."""
    return pd.DataFrame(
        [
            {
                "title": "Attention Is All You Need",
                "authors": "Vaswani et al.",
                "venue": "NeurIPS 2017",
                "identifier": "10.5555/3295222.3295349",
            },
            {
                "title": "Attention Is All You Need",
                "authors": "Vaswani, Shazeer, Parmar et al.",
                "venue": "arXiv",
                "identifier": "1706.03762",
            },
            {
                "title": "BERT: Pre-training of Deep Bidirectional Transformers",
                "authors": "Devlin et al.",
                "venue": "NAACL 2019",
                "identifier": "10.18653/v1/N19-1423",
            },
        ]
    )


@pytest.fixture
def trials_df():
    """Clinical trials dataset for merge tests."""
    return pd.DataFrame(
        [
            {"trial_id": "NCT001", "sponsor": "Genentech", "indication": "Lung cancer"},
            {"trial_id": "NCT002", "sponsor": "MSD", "indication": "Melanoma"},
            {"trial_id": "NCT003", "sponsor": "BMS", "indication": "Leukemia"},
        ]
    )


@pytest.fixture
def pharma_df():
    """Pharma companies dataset for merge tests."""
    return pd.DataFrame(
        [
            {"company": "Roche Holding AG", "hq_country": "Switzerland"},
            {"company": "Merck & Co.", "hq_country": "United States"},
            {"company": "Bristol-Myers Squibb", "hq_country": "United States"},
        ]
    )


# ============================================================================
# Common Response Models
# ============================================================================


class RiskAssessment(BaseModel):
    """Response model for screen tests."""

    passes: bool = Field(description="Whether the company passes risk assessment")
    risk_level: str = Field(description="Risk level: Low, Medium, or High")


class RevenueScore(BaseModel):
    """Response model for rank tests."""

    revenue_score: float = Field(description="Estimated annual revenue in billions USD")


class CompanyFinancials(BaseModel):
    """Detailed response model for agent_map tests."""

    annual_revenue_usd: int = Field(description="Most recent annual revenue in USD")
    employee_count: int = Field(description="Current number of employees")