Skip to content

Hotfix: Missing Monsters/NPCs #46

Open
@imgurbot12

Description

@imgurbot12

Hello! Love this project and I'm sad to see it hasn't been updated in a while, but I managed a pseudo-hotfix to include all of the most recent monsters/npcs if anyone is interested.

I wrote this quick python script to scrape the contents from aonprd's elasticsearch instance by reverse engineering their api-calls. It should hopefully generate a complete manifest of all available monsters/npcs. Hope it's useful for future updates in regards to fixing the current scraper implementation and helpful to anyone using this project in the future.

For any users, the generated creatures.json file just needs to be placed within src/webui/public/ folder.
The output could potentially just be added to the existing gh-pages site in order to include all of the current existing content.

"""
Monsters JSON Scraper
"""
import os
import json
import dataclasses
from dataclasses import dataclass, field
from typing import List, Set

import requests

#** Variables **#

#: user-agent to use when making requests
USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.81 Safari/537.36'

#: cache for raw-npcs results
NPCS_CACHE = 'npcs-raw.json'

#: cache for raw-monsters results
MONSTERS_CACHE = 'monsters-raw.json'

#: aeonprd hostname
AEONPRD = '2e.aonprd.com'

#: aeonprd elatasearch hostname
AEONPRD_ELASTIC = 'https://elasticsearch.aonprd.com/aon/_search'

#: pre-calculated aonprd elastic lookup query-params
ELASTIC_PARAM = {'track_total_hits': 'true'}

#: pre-calculated headers to include during elasticsearch lookup
ELASTIC_HEADERS = {
    'user-agent':     USER_AGENT,
    'accept':         '*/*',
    'origin':         f'https://{AEONPRD}',
    'referer':        f'https://{AEONPRD}',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'sec-gpc':        1,
}

#** Functions **#

def creature_data(npcs: bool = False) -> dict:
    """retrieve raw monster-data from aonprd elasticsearch"""
    # retrieve from file-cache
    cache = NPCS_CACHE if npcs else MONSTERS_CACHE
    if os.path.exists(cache):
        with open(cache, 'r') as f:
            return json.load(f)
    # construct query json 
    json_post = {
        "query":{
            "function_score":{
                "query":{
                    "bool":{
                        "filter":[
                            {"query_string":{
                                "query":f"category:creature npc:{str(npcs).lower()} ",
                                "default_operator":"AND",
                                "fields":["name","text^0.1","trait_raw","type"]
                            }}
                        ],
                        "must_not":[{"term":{"exclude_from_search":True}}]
                    }
                },
                "boost_mode":"multiply",
                "functions":[
                    {"filter":{"terms":{"type":["Ancestry","Class"]}},"weight":1.1},
                    {"filter":{"terms":{"type":["Trait"]}},"weight":1.05}
                ]
            }
        },
        "size":    10000,
        "sort":    ["_score","_doc"],
        "_source": {"excludes":["text"]},
        "aggs":    {
            "group1":{
                "composite":{
                    "sources":[{"field1":{"terms":{"field":"type","missing_bucket":True}}}],
                    "size":10000
                }
            }
        }
    }
    # make request and write to cache
    res = requests.post(AEONPRD_ELASTIC, params=ELASTIC_PARAM, json=json_post)
    with open(cache, 'wb') as f:
        f.write(res.content)
    return res.json()

def parse_creatures(data: dict) -> List['Creature']:
    """parse through raw monster data to retrieve imporant details"""
    # parse creatures from raw data
    creatures = []
    for hit in data['hits']['hits']:
        attrs   = hit['_source']
        name    = attrs['name']
        id      = attrs['url'].rsplit('=', 1)[1]
        if not id.isdigit():
            raise RuntimeError(f'failed to parse {name!r}')
        npc      = attrs.get('npc', False)
        align    = attrs['alignment']
        traits   = set(attrs['trait']) 
        images   = attrs.get('image', [])
        creature = 'NPC' if npc else attrs['type'] 
        if align in traits:
            traits.remove(align)
        creatures.append(Creature(
            id=id,
            name=name,
            level=attrs['level'],
            alignment=align,
            creature_type=creature,
            size=attrs['size'][0],
            rarity=attrs['rarity'],
            lore=attrs.get('summary', ''),
            family=attrs.get('creature_family', ''),
            image_url=images[0] if images else '',
            npc=npc,
            traits=traits,
        ))
    return creatures

def generate_data(creatures: List['Creature']) -> 'Data':
    """generate metadata and pass into data object"""
    # generate meta-data from collected monsters
    meta = Metadata(total=len(creatures))
    for creature in creatures:
        meta.min_level = min(meta.min_level, creature.level)
        meta.max_level = max(meta.max_level, creature.level)
        meta.alignments.add(creature.alignment)
        meta.creature_types.add(creature.creature_type)
        meta.rarities.add(creature.rarity)
        meta.sizes.add(creature.size)
        meta.traits |= creature.traits
        if creature.family:
            meta.families.add(creature.family)
    return Data(creatures, meta)

#** Classes **#

@dataclass
class Metadata:
    min_level:      int       = 0
    max_level:      int       = 0 
    total:          int       = 0
    families:       Set[str] = field(default_factory=set)
    alignments:     Set[str] = field(default_factory=set)
    creature_types: Set[str] = field(default_factory=set)
    traits:         Set[str] = field(default_factory=set)
    rarities:       Set[str] = field(default_factory=set)
    sizes:          Set[str] = field(default_factory=set)

@dataclass
class Creature:
    id:            str
    name:          str
    creature_type: str
    level:         int
    alignment:     str
    size:          str
    rarity:        str
    lore:          str
    family:        str
    image_url:     str
    npc:           bool     = False
    traits:        Set[str] = field(default_factory=set)

@dataclass(repr=False)
class Data:
    creatures: List[Creature] = field(default_factory=list)
    metadata:  Metadata       = field(default_factory=Metadata)

    def __repr__(self) -> str:
        return f'Creatures(found={len(self.creatures)}, meta={self.metadata!r})'

class JsonEncoder(json.JSONEncoder):
    """custom json encoder to convert sets to supported list-type"""
    def default(self, obj):
        if isinstance(obj, set):
            return sorted(list(obj))
        if dataclasses.is_dataclass(obj):
            return dataclasses.asdict(obj)
        return super().default(obj)

#** Init **#

data     = creature_data(npcs=False)
monsters = parse_creatures(data)

data = creature_data(npcs=True)
npcs = parse_creatures(data)

collected = generate_data(monsters + npcs)
with open('creatures.json', 'w') as f:
    json.dump(collected, f, cls=JsonEncoder)

Edit: a few quality of life changes like sorting the sets once back into a list and making NPCs a different creature-type

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions