Skip to content

WIP: Move OS score_real_collection to async/await and add progressbars #42

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,19 @@ You can install open rarity as a [python package](https://pypi.org/project/open-
```
pip install open-rarity
```

Please refer to the [scripts/](/scripts/) folder for an example of how to use the library.

If you have downloaded the repo, you can use OpenRarity shell tool to generate json or csv outputs of OpenRarity scoring and ranks for any collections:
```
python -m scripts.score_real_collections boredapeyachtclub proof-moonbirds
```
Read [developer documentation](https://openrarity.gitbook.io/developers/) for advanced library usage

This may also be used to generate json or csv outputs of OpenRarity scoring and ranks for any number of collections

Please note that there are rate limits when fetching data. Unless you have a special Api Key it is probably best to only run one collection at a time.

Read [developer documentation](https://openrarity.gitbook.io/developers/) for advanced library usage

# Contributions guide and governance

Expand Down
4 changes: 2 additions & 2 deletions open_rarity/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
StringAttribute,
Token,
TokenMetadata,
TokenStandard,
TokenRarity,
TokenStandard,
)
from .scoring import Scorer as OpenRarityScorer
from .rarity_ranker import RarityRanker
from .scoring import Scorer as OpenRarityScorer
4 changes: 2 additions & 2 deletions open_rarity/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .collection import Collection
from .token import Token
from .token_identifier import EVMContractTokenIdentifier
from .token_metadata import StringAttribute, TokenMetadata
from .token_rarity import TokenRarity
from .token_metadata import TokenMetadata, StringAttribute
from .token_standard import TokenStandard
from .token_identifier import EVMContractTokenIdentifier
2 changes: 1 addition & 1 deletion open_rarity/models/token_metadata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import dataclass, field
import datetime
from dataclasses import dataclass, field
from typing import Any

from open_rarity.models.utils.attribute_utils import normalize_attribute_string
Expand Down
2 changes: 1 addition & 1 deletion open_rarity/rarity_ranker.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import math

from open_rarity.models.collection import Collection
from open_rarity.models.token_rarity import TokenRarity

from open_rarity.scoring.scorer import Scorer


Expand Down
1 change: 1 addition & 0 deletions open_rarity/resolver/models/collection_with_metadata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dataclasses import dataclass

from open_rarity.models.collection import Collection


Expand Down
1 change: 1 addition & 0 deletions open_rarity/resolver/models/token_with_rarity_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataclasses import dataclass
from enum import Enum

from open_rarity.models.token import Token


Expand Down
218 changes: 108 additions & 110 deletions open_rarity/resolver/opensea_api_helpers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import asyncio as aio
import logging
import math
from itertools import chain

import httpx
import requests
from requests.models import HTTPError
from satchel import chunk
from tqdm.asyncio import tqdm_asyncio as tqdm_aio

from open_rarity.models.collection import Collection
from open_rarity.models.token import Token
Expand Down Expand Up @@ -52,70 +56,14 @@ def fetch_opensea_collection_data(slug: str) -> dict:
if response.status_code != 200:
logger.debug(
f"[Opensea] Failed to resolve collection {slug}."
f"Received {response.status_code}: {response.reason}. {response.json()}"
f"Received {response.status_code}: {response.text}. {response.json()}"
)

response.raise_for_status()

return response.json()["collection"]


def fetch_opensea_assets_data(
slug: str, token_ids: list[int], limit=30
) -> list[dict]:
"""Fetches asset data from Opensea's GET assets endpoint for the given token ids

Parameters
----------
slug: str
Opensea collection slug
token_ids: list[int]
the token id
limit: int, optional
How many to fetch at once. Defaults to 30, with a max of 30, by default 30.

Returns
-------
list[dict]
list of asset data dictionaries, e.g. the response in "assets" field,
sorted by token_id asc

Raises
------
Exception: If api request fails


"""
assert len(token_ids) <= limit
# Max 30 limit enforced on API
assert limit <= 30
querystring = {
"token_ids": token_ids,
"collection_slug": slug,
"offset": "0",
"limit": limit,
}

response = requests.request(
"GET",
OS_ASSETS_URL,
headers=HEADERS,
params=querystring,
)

if response.status_code != 200:
logger.debug(
f"[Opensea] Failed to resolve assets for {slug}."
f"Received {response.status_code}: {response.reason}. {response.json()}"
)
response.raise_for_status()

# The API does not sort return value assets by token ID, so sort then return
return sorted(
response.json()["assets"], key=(lambda a: int(a["token_id"]))
)


def opensea_traits_to_token_metadata(asset_traits: list) -> TokenMetadata:
"""Converts asset traits list returned by opensea assets API and converts
it into a TokenMetadata.
Expand Down Expand Up @@ -163,17 +111,24 @@ def opensea_traits_to_token_metadata(asset_traits: list) -> TokenMetadata:
)


def get_tokens_from_opensea(
opensea_slug: str, token_ids: list[int]
async def get_tokens_from_opensea(
slug: str,
token_ids: list[int],
client: httpx.AsyncClient,
sem: aio.BoundedSemaphore | aio.Semaphore,
) -> list[Token]:
"""Fetches eth nft data from opensea API and stores them into Token objects

Parameters
----------
opensea_slug : str
slug : str
Opensea collection slug
token_ids : list[int]
List of token ids to fetch for
List of token ids to fetch
client : httpx.AsyncClient
Async client used to make api calls
sem : BoundedSemaphore | Semaphore
From the asyncio library and is used to rate limit api calls

Returns
-------
Expand All @@ -187,10 +142,72 @@ def get_tokens_from_opensea(
HTTPError
if request to opensea fails
"""
limit = 30
try:
assets = fetch_opensea_assets_data(
slug=opensea_slug, token_ids=token_ids
)
async with sem:
assert len(token_ids) <= limit
# Max 30 limit enforced on API
assert limit <= 30
querystring = {
"token_ids": token_ids,
"collection_slug": slug,
"offset": "0",
"limit": limit,
}
if client:
r = await client.get(
OS_ASSETS_URL,
headers=HEADERS,
params=querystring,
)
else:
async with httpx.AsyncClient() as client:
r = await client.get(
OS_ASSETS_URL,
headers=HEADERS,
params=querystring,
)

if r.status_code != 200:
logger.debug(
f"[Opensea] Failed to resolve assets for {slug}."
f"Received {r.status_code}: {r.text}. {r.json()}"
)
r.raise_for_status()

# The API does not sort return value assets by token ID, so sort then return
tokens: list[Token] = []
assets = sorted(
r.json()["assets"], key=(lambda a: int(a["token_id"]))
)
for asset in assets:
token_metadata = opensea_traits_to_token_metadata(
asset_traits=asset["traits"]
)
asset_contract_address = asset["asset_contract"]["address"]
asset_contract_type = asset["asset_contract"][
"asset_contract_type"
]
if asset_contract_type == "non-fungible":
token_standard = TokenStandard.ERC721
elif asset_contract_type == "semi-fungible":
token_standard = TokenStandard.ERC1155
else:
raise ValueError(
f"Unexpected asset contrat type: {asset_contract_type}"
)
tokens.append(
Token(
token_identifier=EVMContractTokenIdentifier(
identifier_type="evm_contract",
contract_address=asset_contract_address,
token_id=int(asset["token_id"]),
),
token_standard=token_standard,
metadata=token_metadata,
)
)

except HTTPError as e:
logger.exception(
"FAILED: get_assets: could not fetch opensea assets for %s: %s",
Expand All @@ -200,33 +217,6 @@ def get_tokens_from_opensea(
)
raise

tokens = []
for asset in assets:
token_metadata = opensea_traits_to_token_metadata(
asset_traits=asset["traits"]
)
asset_contract_address = asset["asset_contract"]["address"]
asset_contract_type = asset["asset_contract"]["asset_contract_type"]
if asset_contract_type == "non-fungible":
token_standard = TokenStandard.ERC721
elif asset_contract_type == "semi-fungible":
token_standard = TokenStandard.ERC1155
else:
raise ValueError(
f"Unexpected asset contrat type: {asset_contract_type}"
)
tokens.append(
Token(
token_identifier=EVMContractTokenIdentifier(
identifier_type="evm_contract",
contract_address=asset_contract_address,
token_id=int(asset["token_id"]),
),
token_standard=token_standard,
metadata=token_metadata,
)
)

return tokens


Expand Down Expand Up @@ -274,7 +264,7 @@ def get_collection_with_metadata_from_opensea(
return collection_with_metadata


def get_collection_from_opensea(
async def get_collection_from_opensea(
opensea_collection_slug: str,
) -> Collection:
"""Fetches collection and token data with OpenSea endpoint and API key
Expand Down Expand Up @@ -308,26 +298,34 @@ def get_collection_from_opensea(
# Fetch token metadata
tokens: list[Token] = []
batch_size = 30
num_batches = math.ceil(total_supply / batch_size)
initial_token_id = 0

# Returns a list of `batch_size` token IDs, such that no token ID
# can exceed `max_token_id` (in which case len(return_value) < `batch_size`)
def get_token_ids(
batch_id: int, max_token_id: int = total_supply - 1
) -> list[int]:
token_id_start = initial_token_id + (batch_id * batch_size)
token_id_end = int(min(token_id_start + batch_size - 1, max_token_id))
return list(range(token_id_start, token_id_end + 1))

for batch_id in range(num_batches):
token_ids = get_token_ids(batch_id)
tokens_batch = get_tokens_from_opensea(
opensea_slug=opensea_collection_slug, token_ids=token_ids
max_token_id: int = total_supply - 1

# We need to bound the number of awaitables to avoid hitting the OS rate limit
sem = aio.BoundedSemaphore(4)
async with httpx.AsyncClient(timeout=None) as client:
tasks = [
get_tokens_from_opensea(
slug=opensea_collection_slug,
token_ids=token_ids,
client=client,
sem=sem,
)
for token_ids in chunk(
list(range(initial_token_id, max_token_id)), batch_size
)
]
tokens = list(
chain(
*(
await tqdm_aio.gather(
*tasks,
desc=f"Fetch {opensea_collection_slug} Token Batches from OS",
)
)
)
)

tokens.extend(tokens_batch)

collection = Collection(
name=collection_obj["name"],
attributes_frequency_counts=collection_obj["traits"],
Expand Down
Loading