Merge branch 'staging' into SN1-447-add-logits

bkb2135 · web-flow · commit 18efdcb301c9 · 2025-04-08T11:40:53.000+02:00
diff --git a/poetry.lock b/poetry.lock
diff --git a/prompting/datasets/random_website.py b/prompting/datasets/random_website.py
@@ -26,15 +26,16 @@ class DDGDataset(BaseDataset):
 
     def search_random_term(self, retries: int = 3) -> tuple[Optional[str], Optional[list[dict[str, str]]]]:
         ddg = PatchedDDGS(proxy=settings.shared_settings.PROXY_URL, verify=False)
+        exception: BaseException | None = None
         for _ in range(retries):
             random_words = " ".join(random.sample(ENGLISH_WORDS, 3))
             try:
                 results = list(ddg.text(random_words))
                 if results:
                     return random_words, results
-            except Exception as ex:
-                logger.debug(f"Failed to get search results from DuckDuckGo: {ex}")
-        logger.warning(f"Failed to get search results from DuckDuckGo after {retries} tries")
+            except BaseException as ex:
+                exception = ex
+        logger.warning(f"Failed to get search results from DuckDuckGo after {retries} tries: {exception}")
         return None, None
 
     @staticmethod
diff --git a/prompting/llms/apis/llm_messages.py b/prompting/llms/apis/llm_messages.py
@@ -37,7 +37,7 @@ def calculate_image_tokens(width: int, height: int, low_res: bool = False) -> in
 
 class LLMMessage(BaseModel):
     role: Literal["system", "user", "assistant"]
-    content: str = None
+    content: str | None = None
     image: Image.Image | None = None
 
     class Config:
diff --git a/prompting/rewards/web_retrieval.py b/prompting/rewards/web_retrieval.py
@@ -4,9 +4,11 @@
 import json
 import os
 from collections import defaultdict
+from datetime import datetime
 
 import numpy as np
 import pandas as pd
+import whois
 from loguru import logger
 from pydantic import BaseModel
 from scipy import spatial
@@ -48,6 +50,9 @@
 # Maximum number of past URLs to store per user
 N_PAST_URLS = 200
 
+# Minimum age of the website.
+MIN_AGE_DAYS = 90
+
 # Load the past_websites dictionary and top domains
 try:
     # Load top domains
@@ -98,6 +103,36 @@ def __hash__(self):
         # Use the id of the object as its hash
         return hash(self.model_dump_json)
 
+    @staticmethod
+    @async_lru_cache(maxsize=1000)
+    async def domain_age_days(domain: str, fallback_age: int = 1_000_000) -> int:
+        """Returns the age of a domain in days.
+
+        Args:
+            domain: Website url.
+            fallback_age: If can't fetch domain age, fallback to `fallback_age` age.
+
+        Returns:
+            Domain age in days since creation.
+        """
+        fallback_age = 1_000_000
+        try:
+            w = whois.whois(domain)
+            creation_date = w.creation_date
+            if isinstance(creation_date, list):
+                creation_date = creation_date[0]
+
+            if creation_date is None:
+                return fallback_age
+            # Convert everything to naive datetime in UTC or local.
+            if hasattr(creation_date, "tzinfo") and creation_date.tzinfo is not None:
+                creation_date = creation_date.replace(tzinfo=None)
+            delta = datetime.now() - creation_date
+            return delta.days
+        except BaseException as e:
+            logger.debug(f"Error fetching domain age data: {e}")
+            return fallback_age
+
     @async_lru_cache(maxsize=1000)
     async def _cosine_similarity(self, content1: str, content2: str) -> float:
         """Calculate the cosine similarity between sentence embeddings of the reference and completions."""
@@ -111,13 +146,18 @@ async def score_website_result(
         if not response_url or not response_content or not response_relevant:
             return 0
 
-        # Extract domain from URL
+        # Extract domain from URL.
         netloc = extract_main_domain(response_url)
+        logger.debug(f"Scoring url: {response_url}")
 
         if any(term in response_url for term in BLACKLISTED_TERMS):
             logger.debug(f"Domain {response_url} contains blacklisted term, scoring 0")
             return 0
 
+        if (days := await self.domain_age_days(response_url)) < MIN_AGE_DAYS:
+            logger.debug(f"Domain {response_url} is too young ({days} days old), scoring 0")
+            return 0
+
         # Penalise a completion where the relevant section is contained in the URL (e.g. miners)
         # trying to use a search box to enter exactly the relevant section they need
         discount_factor = 1 - fuzz.token_sort_ratio(response_url, response_relevant) / 100
@@ -147,17 +187,17 @@ async def score_website_result(
             # Content scraped from the URL provided in the completion.
             reference_website_content = DDGDataset.extract_website_content(response_url)
             if not reference_website_content or len(reference_website_content) == 0:
-                logger.debug(f"Failed to extract miner's content from website: {response_url}")
+                logger.debug(f"Failed to extract miner {uid} content from website: {response_url}")
                 return 0
 
             if fuzz.ratio(response_content, reference_website_content) < MIN_MATCH_THRESHOLD:
-                logger.debug("Miner returned text that doesn't match the website, scoring 0")
+                logger.debug(f"Miner {uid} returned text that doesn't match the website, scoring 0")
                 return 0
 
             if len(response_relevant) > len(response_content) or len(response_relevant) < MIN_RELEVANT_CHARS:
                 logger.debug(
-                    f"Relevant section is too short (<{MIN_RELEVANT_CHARS} chars) or longer than the whole website content "
-                    f"{len(response_relevant)} > {len(response_content)}"
+                    f"Miner {uid} relevant section is too short (<{MIN_RELEVANT_CHARS} chars) or longer than the whole "
+                    f"website content {len(response_relevant)} > {len(response_content)}"
                 )
                 return 0
 
@@ -209,9 +249,6 @@ async def reward(
             rewards.append(await self.score_miner_response(dataset_entry, completion, task=task, uid=uid))
             timings.append(0)
 
-        logger.debug(f"REWARDWEBRETRIEVAL: {rewards}")
-        logger.debug(f"COMPLETIONS: {response_event.completions}")
-
         # Save the past_websites dictionary to CSV
         past_websites_data = []
         for uid, domains in past_websites.items():
diff --git a/pyproject.toml b/pyproject.toml
@@ -159,6 +159,7 @@ datasets = { version = ">=3.1.0", optional = true }
 nltk = { version = ">=3.8.1", optional = true }
 thefuzz = { version = ">=0.22.1", optional = true }
 wandb = { version = ">=0.19.4", optional = true }
+python-whois = { version = ">=0.9.5", optional = true }
 substrate-interface = "^1.7.11"
 tldextract = "^5.1.3"
 justext = "3.0.2"
@@ -183,6 +184,7 @@ validator = [
     "datasets",
     "nltk",
     "wandb",
+    "python-whois",
 ]
 
 [build-system]
diff --git a/shared/epistula.py b/shared/epistula.py
@@ -123,11 +123,21 @@ async def query_miners(
         tasks = []
         for uid in uids:
             try:
+                timeout_connect = 10
+                timeout_postprocess = 5
                 response = asyncio.wait_for(
                     asyncio.create_task(
-                        make_openai_query(shared_settings.METAGRAPH, shared_settings.WALLET, timeout_seconds, body, uid)
+                        make_openai_query(
+                            shared_settings.METAGRAPH,
+                            shared_settings.WALLET,
+                            timeout_seconds,
+                            body,
+                            uid,
+                            timeout_connect=timeout_connect,
+                        )
                     ),
-                    timeout=timeout_seconds,
+                    # Give additional time for connect and result post-processings.
+                    timeout=timeout_seconds + timeout_connect + timeout_postprocess,
                 )
             except asyncio.TimeoutError:
                 logger.error(f"Timeout exceeded while querying miner {uid}")
@@ -136,23 +146,37 @@ async def query_miners(
 
         responses = await asyncio.gather(*tasks, return_exceptions=True)
 
-        results = []
+        responses_valid = 0
+        responses_error = 0
+        responses_exception = 0
+        exception_info: Exception | None = None
+        results: list[SynapseStreamResult] = []
         for response, uid in zip(responses, uids):
             if isinstance(response, Exception):
+                responses_exception += 1
+                exception_info = response
                 results.append(SynapseStreamResult(exception=str(response)))
             elif isinstance(response, tuple) and isinstance(response[0], ChatCompletion):
+                if response and response[1]:
+                    responses_valid += 1
                 results.append(
                     SynapseStreamResult(
                         uid=uid,
-                        response=response[0],
                         accumulated_chunks=response[1],
                         accumulated_chunks_timings=response[2],
                         accumulated_chunk_dicts_raw=response[3],
                     )
                 )
             else:
+                responses_error += 1
                 logger.error(f"Unknown response type: {response}")
                 results.append(SynapseStreamResult(uid=uid, exception=f"Unknown response type: {response}"))
+
+        logger.info(
+            f"Responses success: {responses_valid}/{len(uids)}. "
+            f"Responses exception: {responses_exception}/{len(uids)} [{exception_info}]. "
+            f"Reponses error: {responses_error}/{len(uids)}"
+        )
         return results
     except Exception as e:
         logger.exception(f"Error in query_miners: {e}")
@@ -211,14 +235,15 @@ async def make_openai_query(
     body: dict[str, Any],
     uid: int,
     stream: bool = False,
+    timeout_connect: int = 10,
 ) -> tuple[ChatCompletion, list, list] | AsyncGenerator:
     body["seed"] = body.get("seed", random.randint(0, 1000000))
     axon_info = metagraph.axons[uid]
     miner = openai.AsyncOpenAI(
         base_url=f"http://{axon_info.ip}:{axon_info.port}/v1",
         api_key="Apex",
         max_retries=0,
-        timeout=Timeout(timeout_seconds, connect=5, read=timeout_seconds - 5),
+        timeout=Timeout(timeout_seconds, connect=timeout_connect, read=timeout_seconds),
         http_client=openai.DefaultAsyncHttpxClient(
             event_hooks={
                 "request": [create_header_hook(wallet.hotkey, axon_info.hotkey, timeout_seconds=timeout_seconds)]
@@ -227,6 +252,7 @@ async def make_openai_query(
     )
     extra_body = {k: v for k, v in body.items() if k not in ["messages", "model"]}
     body["messages"] = model_factory(body.get("model")).format_messages(body["messages"])
+
     start_time = time.perf_counter()
     chat = await miner.chat.completions.create(
         # model=None,
diff --git a/tests/prompting/rewards/test_web_retrieval.py b/tests/prompting/rewards/test_web_retrieval.py
@@ -1,5 +1,6 @@
 # ruff: noqa: E402
-from unittest.mock import MagicMock
+from datetime import datetime, timedelta
+from unittest.mock import MagicMock, patch
 
 import numpy as np
 import pytest
@@ -10,6 +11,26 @@
 from prompting.rewards.web_retrieval import WebRetrievalRewardModel
 
 
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "creation_date, expected_age",
+    [
+        # Domain created 100 days ago.
+        (datetime.now() - timedelta(days=100), 100),
+        # Domain created 10 days ago.
+        (datetime.now() - timedelta(days=10), 10),
+        # Domain has no valid creation_date => fallback_age.
+        (None, 1_000_000),
+    ],
+)
+async def test_domain_age(creation_date: datetime, expected_age: int):
+    mock_whois = MagicMock()
+    mock_whois.creation_date = creation_date
+    with patch("prompting.rewards.web_retrieval.whois.whois", return_value=mock_whois):
+        age = await WebRetrievalRewardModel.domain_age_days("testdomain.com", fallback_age=1_000_000)
+        assert age == expected_age
+
+
 @pytest.mark.parametrize(
     "completion, expected_url, expected_content, expected_relevant",
     [
diff --git a/validator_api/test_time_inference.py b/validator_api/test_time_inference.py
@@ -110,7 +110,6 @@ async def single_attempt():
                     max_tokens=2000,
                 )
 
-            logger.debug(f"Making API call with\n\nMESSAGES: {messages}\n\nRESPONSE: {response_str}")
             response_dict = parse_multiple_json(response_str)[0]
             return response_dict
         except Exception as e:

Original file line number	Diff line number	Diff line change
`@@ -110,7 +110,6 @@ async def single_attempt():`
`110`	`110`	`max_tokens=2000,`
`111`	`111`	`)`
`112`	`112`
`113`		`- logger.debug(f"Making API call with\n\nMESSAGES: {messages}\n\nRESPONSE: {response_str}")`
`114`	`113`	`response_dict = parse_multiple_json(response_str)[0]`
`115`	`114`	`return response_dict`
`116`	`115`	`except Exception as e:`