Merge pull request #6 from dhruvan2006/dune

dhruvan2006 · web-flow · commit 055a9bc8d430 · 2025-08-24T14:16:17.000+04:00
Add support for Dune
diff --git a/README.md b/README.md
@@ -36,6 +36,7 @@ handles the heavy lifting so you can focus on insights.
 - [Blockchain.com](https://www.blockchain.com/explorer/charts)
 - [Glassnode](https://studio.glassnode.com/charts/)
 - [The Block](https://www.theblock.co/data/)
+- [Dune](https://dune.com/)
 
 ## Installation
 To install the `chaindl` package, use pip:
diff --git a/assets/dune.png b/assets/dune.png
diff --git a/chaindl/download.py b/chaindl/download.py
@@ -30,6 +30,9 @@ def download(url, start=None, end=None, **kwargs):
         - CryptoQuant: "https://cryptoquant.com"
         - Bitcoin Magazine Pro: "https://www.bitcoinmagazinepro.com"
         - Blockchain.com: "https://www.blockchain.com/explorer/charts"
+        - Glassnode: "https://studio.glassnode.com/charts"
+        - The Block: "https://www.theblock.co"
+        - Dune: "https://dune.com"
 
     Example:
         >>> df = download("https://charts.checkonchain.com/path/to/indicator")
@@ -45,6 +48,7 @@ def download(url, start=None, end=None, **kwargs):
     BLOCKCHAIN_BASE_URL = "https://www.blockchain.com/explorer/charts"
     GLASSNODE_BASE_URL = "https://studio.glassnode.com/charts"
     THEBLOCK_BASE_URL = "https://www.theblock.co"
+    DUNE_BASE_URL = "https://dune.com"
 
     data = pd.DataFrame()
 
@@ -66,6 +70,8 @@ def download(url, start=None, end=None, **kwargs):
         data = scraper.glassnode._download(url, **kwargs)
     elif url.startswith(THEBLOCK_BASE_URL):
         data = scraper.theblock._download(url)
+    elif url.startswith(DUNE_BASE_URL):
+        data = scraper.dune._download(url)
     else:
         raise ValueError("Unsupported source. Find the list of supported websites here: https://chaindl.readthedocs.io/")
     
diff --git a/chaindl/scraper/__init__.py b/chaindl/scraper/__init__.py
@@ -7,3 +7,4 @@
 from .blockchain import _download
 from .glassnode import _download
 from .theblock import _download
+from .dune import _download
diff --git a/chaindl/scraper/dune.py b/chaindl/scraper/dune.py
@@ -0,0 +1,83 @@
+import pandas as pd
+import cloudscraper
+from urllib.parse import urlparse
+
+def _download(url: str) -> pd.DataFrame:
+    # Parse the query ID from URL
+    parsed = urlparse(url)
+    parts = [part for part in parsed.path.split("/") if part]
+    if len(parts) < 2 or parts[0] != "queries":
+        raise ValueError("URL is not a valid Dune query URL. Follow the guide at: https://chaindl.readthedocs.io/#dune-dune-com")
+    query_id = int(parts[1])
+
+    scraper = cloudscraper.create_scraper()
+
+    # Get latest result set ID
+    graphql_url = "https://dune.com/public/graphql"
+    payload = {
+        "operationName": "GetLatestResultSetIds",
+        "variables": {"queryId": query_id, "parameters": [], "canRefresh": True},
+        "query": """
+                query GetLatestResultSetIds($canRefresh: Boolean!, $queryId: Int!, $parameters: [ExecutionParameterInput!]) {
+                  resultSetForQuery(canRefresh: $canRefresh, queryId: $queryId, parameters: $parameters) {
+                    completedExecutionId
+                    failedExecutionId
+                    pendingExecutionId
+                    __typename
+                  }
+                }
+            """
+    }
+
+    response = scraper.post(graphql_url, json=payload)
+    if response.status_code != 200:
+        raise ConnectionError(f"Failed to get result set ID: {response.status_code}")
+    json_data = response.json()
+    execution_id = json_data["data"]["resultSetForQuery"]["completedExecutionId"]
+    if execution_id is None:
+        raise ValueError("No completed execution found for this query.")
+
+    # Fetch all execution data with pagination
+    all_data = []
+    offset = 0
+    limit = 9999999
+
+    execution_url = "https://core-api.dune.com/public/execution"
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0",
+        "Accept": "*/*",
+        "Accept-Language": "en-US,en;q=0.5",
+        "Content-Type": "application/json",
+        "Origin": "https://dune.com",
+        "Referer": "https://dune.com/",
+        "Connection": "keep-alive",
+        "Pragma": "no-cache",
+        "Cache-Control": "no-cache",
+    }
+
+    while True:
+        payload = {
+            "execution_id": execution_id,
+            "query_id": query_id,
+            "parameters": [],
+            "pagination": {"limit": limit, "offset": offset}
+        }
+        response = scraper.post(execution_url, headers=headers, json=payload)
+        if response.status_code != 200:
+            raise ConnectionError(f"Failed to fetch execution data: {response.status_code}")
+        json_data = response.json()
+        execution_result = json_data.get('execution_succeeded')
+        if not execution_result:
+            raise ValueError("Execution failed or no data returned.")
+
+        data = execution_result['data']
+        total_row_count = execution_result['total_row_count']
+        all_data.extend(data)
+
+        if len(all_data) >= total_row_count:
+            break
+        offset = len(all_data)
+
+    # Convert to DataFrame
+    df = pd.DataFrame(all_data)
+    return df
diff --git a/docs/index.rst b/docs/index.rst
@@ -34,6 +34,7 @@ It supports:
 - `Blockchain.com <https://www.blockchain.com/explorer/charts/>`__
 - `Glassnode <https://studio.glassnode.com/charts/>`__
 - `The Block <https://www.theblock.co/data/>`__
+- `Dune <https://dune.com/>`__
 
 ---
 
@@ -192,7 +193,7 @@ Example:
     url = "https://studio.glassnode.com/charts/addresses.ActiveCount?a=BTC"
     df = chaindl.download(url)
 
-The BLock (`theblock.co/data <https://www.theblock.co/data/>`__)
+The Block (`theblock.co/data <https://www.theblock.co/data/>`__)
 ---------------------------------------------------------------------------
 
 Click 'Share' and 'Copy Link' to get the URL of the respective metric.
@@ -204,6 +205,20 @@ Example:
     url = "https://www.theblock.co/data/crypto-markets/spot/total-exchange-volume-daily"
     df = chaindl.download(url)
 
+Dune (`dune.com <https://dune.com/>`__)
+---------------------------------------
+
+When on a Dune dashboard, you need to open the specific indicator/chart to access the queries page that shows the SQL and results. The URL should be in the format `https://dune.com/queries/{query_id}/{result_id}`.
+
+.. image:: ../assets/dune.png
+
+Example:
+
+.. code-block:: python
+
+    url = "https://dune.com/queries/5583538/9204329"
+    df = chaindl.download(url)
+
 Optional Arguments
 ==================
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,7 +28,8 @@ dependencies = [
     "seleniumbase",
     "selenium",
     "selenium-wire",
-    "blinker==1.7.0"
+    "blinker==1.7.0",
+    "cloudscraper"
 ]
 
 [project.urls]
diff --git a/requirements.txt b/requirements.txt
@@ -8,4 +8,5 @@ blinker==1.7.0
 python-dotenv
 pytest
 pytest-mock
-pytest-xdist
+pytest-xdist
+cloudscraper
diff --git a/tests/test_dune.py b/tests/test_dune.py
@@ -0,0 +1,17 @@
+import pytest
+import pandas as pd
+from chaindl.scraper.dune import _download
+
+def test_download_dune():
+    url = "https://dune.com/queries/3265994/5466888"
+    df = _download(url)
+    assert isinstance(df, pd.DataFrame)
+    assert not df.empty
+    assert "date" in df.columns
+    assert "BTC_Price" in df.columns
+    assert "mv_ratio" in df.columns
+
+def test_download_dashboard():
+    url = "https://dune.com/cryptokoryo/crypto-buy-signal"
+    with pytest.raises(ValueError, match="URL is not a valid Dune query URL"):
+        _download(url)

Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,8 @@ dependencies = [`
`28`	`28`	`"seleniumbase",`
`29`	`29`	`"selenium",`
`30`	`30`	`"selenium-wire",`
`31`		`- "blinker==1.7.0"`
	`31`	`+ "blinker==1.7.0",`
	`32`	`+ "cloudscraper"`
`32`	`33`	`]`
`33`	`34`
`34`	`35`	`[project.urls]`