diff --git a/.gitignore b/.gitignore index 6e80a91..f3398bd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,20 @@ __pycache__/ *.py[cod] *$py.class +.claude/ +data/aminer/ +data/arnetminer/ +data/inspire/ +data/kisti/ +data/medline/ +data/pubmed/ +data/qian/ +data/temp/ +data/s2and-mini/ +data/test/ +data/zbmath/ +data/lid.176.bin +data/LICENSE.txt # C extensions *.so diff --git a/README.md b/README.md index 29513cd..4831667 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,8 @@ python -m pip install --user --upgrade uv ```bash # create the project venv (uv defaults to .venv if you don't give a name) -uv venv --python 3.11 +# note that you can't go past 3.12 for now because of fasttext +uv venv --python 3.11.9 ``` 2. Activate the venv (choose one): diff --git a/docs/normalization_migration.md b/docs/normalization_migration.md new file mode 100644 index 0000000..2946bb5 --- /dev/null +++ b/docs/normalization_migration.md @@ -0,0 +1,45 @@ +Normalization Unification Migration Plan + +Scope +- Unify name normalization for first/middle/last across data preparation, modeling, subblocking, and auxiliary datasets (name counts, name tuples, ORCID prefix counts). + +Current State (post-hyphen fix) +- Canonical fields in runtime (used by featurizer/model/subblocking) preserve hyphenated first names: + - Implemented via `s2and.text.split_first_middle_hyphen_aware`. +- Legacy fields for counts/tuples remain single-token: + - `author_info_first_normalized` stays single-token for compatibility with existing name counts and name tuples. +- ORCID prefix map compatibility fallback: + - Subblocking probes `FIRST_K_LETTER_COUNTS` using the first token when canonical first contains spaces. + +Target State +- Single, unified normalization for names (apostrophes always stripped; hyphen variants normalized; Sinonym wired for Chinese names to keep given names together). +- Remove the distinction between `author_info_first_normalized` and `author_info_first_normalized_without_apostrophe` throughout the codebase. + +Steps +1) Decide normalization policy + - Always strip apostrophes to nothing (handle typographic variants). + - Normalize hyphen/dash variants consistently. + - For Chinese names, use Sinonym to keep given-name tokens together; confirm no regression on prod model. + +2) Implement unified normalizer + - Update `s2and.text.normalize_text` and/or replace usages with a single canonical path. + - Deprecate `special_case_apostrophes` and `split_first_middle_hyphen_aware` once a single path exists. + +3) Regenerate data artifacts with the new normalization + - Name counts: rerun `get_name_counts.py`. + - Name tuples: write/adjust a script to use `s2and_unnormalized_filtered_name_tuples.txt` from raw tuples using the new normalization. + - ORCID prefix counts: rewrite `scripts/get_orcid_name_prefix_counts.py` to call the unified logic; regenerate `data/first_k_letter_counts_from_orcid.json`. + +4) Code cleanup and renames + - Replace usages of `author_info_first_normalized_without_apostrophe` with the unified canonical field. + - Remove `author_info_first_normalized` or alias it to the canonical field (depending on migration strategy). + - Remove the temporary first-token fallback in `s2and/subblocking.py` for ORCID lookups. + +5) Validation + - Run clustering metrics and pairwise evaluation on representative datasets. + - Check subblock sizes/distributions and merge logs for anomalies. + - Spot-check Chinese and Western hyphenated names for expected behavior. + +Rollback/Compat Notes +- Keep a feature flag or version switch if needed to load legacy datasets during transition. + diff --git a/pyproject.toml b/pyproject.toml index 4053ece..b1d713e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ dependencies = [ "awscli", "fasttext-wheel>=0.9.2", "pycld2>=0.41", - "scikit-learn>=1.2,<1.5", + "scikit-learn==1.7.1", "text-unidecode==1.3", "requests>=2.28,<3", "hyperopt @ git+https://github.com/hyperopt/hyperopt.git", @@ -32,9 +32,10 @@ dependencies = [ "numpy>=1.24,<2", "orjson>=3.9,<4", "shap", - "sinonym", + "sinonym>=0.2.0", # Backport only for older Pythons; not needed on 3.11+ 'importlib-metadata>=4.13; python_version < "3.10"', + "awscli", ] [project.optional-dependencies] @@ -50,6 +51,7 @@ dev = [ "ruff>=0.4,<0.7", # CLI helpers used in some repos "click>=8,<9", + "ipykernel", ] [tool.setuptools.packages.find] diff --git a/s2and/data.py b/s2and/data.py index e6adf60..70c83f9 100644 --- a/s2and/data.py +++ b/s2and/data.py @@ -3,6 +3,7 @@ import os import re import json +import platform import numpy as np import pandas as pd import logging @@ -31,6 +32,7 @@ compute_block, get_text_ngrams_words, detect_language, + same_prefix_tokens, AFFILIATIONS_STOP_WORDS, VENUE_STOP_WORDS, NAME_PREFIXES, @@ -42,6 +44,9 @@ # Global variable for multiprocessing global_preprocess: bool +# Lazy-initialized global for Sinonym detector within worker processes +_SINONYM_DETECTOR = None # type: ignore +CHUNK_SIZE = 1000 # for multiprocessing imap chunks class NameCounts(NamedTuple): @@ -160,6 +165,10 @@ class ANDData: can be None or "filtered" or a set of name tuples use_orcid_id: whether to use the orcid id for (a) constraints as true if orcids match and (b) subblocking so that any sigs with the same orcid are in the same subblock + use_sinonym_overwrite: if True, run a pre-step that batch-detects Chinese names per paper via + Sinonym and overwrites the corresponding signature name parts with Sinonym's normalized output. + Also applies Sinonym-normalized names to the per-paper author list so co-author features + (coauthor sets/blocks and n-grams) are derived from the normalized names as well. """ def __init__( @@ -200,6 +209,7 @@ def __init__( preprocess: bool = True, name_tuples: Optional[Union[Set[Tuple[str, str]], str]] = "filtered", use_orcid_id: bool = True, + use_sinonym_overwrite: bool = False, ): if mode == "train": if train_blocks is not None and block_type != "original": @@ -291,6 +301,22 @@ def __init__( signature_id=signature["signature_id"], ) logger.info("loaded signatures") + + # Optional Sinonym pre-step: normalize Chinese names from papers and overwrite signatures + # This runs before other preprocessing so downstream steps use updated names + if use_sinonym_overwrite: + sinonym_results = sinonym_preprocess_papers_parallel(self.papers, n_jobs) + # Only allow block overwrites during inference to keep train/val/test splits reproducible + overwrite_count = apply_sinonym_overwrites( + self.signatures, + sinonym_results, + overwrite_blocks=(mode == "inference"), + ) + logger.info(f"Sinonym overwrote {overwrite_count} signature name(s)") + # Update paper-level author strings so co-author features use Sinonym-normalized names + paper_overwrite_count = apply_sinonym_overwrites_to_papers(self.papers, sinonym_results) + logger.info(f"Sinonym overwrote {paper_overwrite_count} paper author name(s)") + self.name = name self.mode = mode logger.info("loading clusters") @@ -470,22 +496,23 @@ def preprocess_signatures(self, load_name_counts: bool): for signature_id, signature in tqdm(self.signatures.items(), desc="Preprocessing signatures"): # our normalization scheme is to normalize first and middle separately, # join them, then take the first token of the combined join - # TODO: a lot of chinese names are not normalized correctly. they are names like Yue-Hua and Ying-Ying. - # we need some fix for these - first_normalized = normalize_text(signature.author_info_first or "") - first_normalized_without_apostrophe = normalize_text( - signature.author_info_first or "", special_case_apostrophes=True - ) - - middle_normalized = normalize_text(signature.author_info_middle or "") + # TODO: we now have good normalization for chinese names with dashes in the first and surname + # BUT we currently DO NOT do anything with those dashes. + first_raw = signature.author_info_first or "" + middle_raw = signature.author_info_middle or "" + + # Default normalization (keeps legacy behavior for counts/lookups) + first_normalized = normalize_text(first_raw) + middle_normalized = normalize_text(middle_raw) first_middle_normalized_split = (first_normalized + " " + middle_normalized).split(" ") - if first_middle_normalized_split[0] in NAME_PREFIXES: + if first_middle_normalized_split and first_middle_normalized_split[0] in NAME_PREFIXES: first_middle_normalized_split = first_middle_normalized_split[1:] - first_middle_normalized_split_without_apostrophe = ( - first_normalized_without_apostrophe + " " + middle_normalized - ).split(" ") - if first_middle_normalized_split_without_apostrophe[0] in NAME_PREFIXES: - first_middle_normalized_split_without_apostrophe = first_middle_normalized_split_without_apostrophe[1:] + + # Hyphen-preserving split for the "without_apostrophe" canonical fields + # Centralize in s2and.text for reuse by other scripts + from s2and.text import split_first_middle_hyphen_aware + + first_without_apostrophe, middle_without_apostrophe = split_first_middle_hyphen_aware(first_raw, middle_raw) coauthors: Optional[List[str]] = None if len(self.papers) != 0: @@ -495,13 +522,11 @@ def preprocess_signatures(self, load_name_counts: bool): ] signature = signature._replace( - # need this for name counts - author_info_first_normalized=first_middle_normalized_split[0], - # need this for featurization - author_info_first_normalized_without_apostrophe=first_middle_normalized_split_without_apostrophe[0], - author_info_middle_normalized_without_apostrophe=" ".join( - first_middle_normalized_split_without_apostrophe[1:] - ), + # need this for name counts (legacy single-token behavior) + author_info_first_normalized=first_middle_normalized_split[0] if first_middle_normalized_split else "", + # canonical fields used across featurization, prediction, etc. + author_info_first_normalized_without_apostrophe=first_without_apostrophe, + author_info_middle_normalized_without_apostrophe=middle_without_apostrophe, author_info_last_normalized=normalize_text(signature.author_info_last), author_info_suffix_normalized=normalize_text(signature.author_info_suffix or ""), author_info_coauthors=set(coauthors) if coauthors is not None else None, @@ -518,25 +543,30 @@ def preprocess_signatures(self, load_name_counts: bool): ) if load_name_counts: - first_last_for_count = ( - signature.author_info_first_normalized + " " + signature.author_info_last_normalized - ).strip() - first_initial = ( - signature.author_info_first_normalized - if len(signature.author_info_first_normalized) > 0 - else "" - ) + # Backward-compatibility for name count keys: + # - Historically, counts used the legacy single-token `author_info_first_normalized`. + # - With Sinonym, `author_info_first_normalized_without_apostrophe` can contain multiple tokens + # for hyphenated Chinese given names (e.g., "qi xin"). For counts only, we heuristically + # join internal spaces to form a single token ("qixin") IF the raw first contained a hyphen. + # - This preserves old behavior for most names while improving lookups for hyphenated cases. + # TODO: revisit once we re-extract name_counts using Sinonym-aware canonicalization. + first_for_counts = signature.author_info_first_normalized or "" + raw_first = signature.author_info_first or "" + if "-" in raw_first: + joined = (signature.author_info_first_normalized_without_apostrophe or "").replace(" ", "") + if joined: + first_for_counts = joined + + first_last_for_count = (first_for_counts + " " + signature.author_info_last_normalized).strip() + first_initial = first_for_counts if len(first_for_counts) > 0 else "" last_first_initial_for_count = (signature.author_info_last_normalized + " " + first_initial).strip() + counts = NameCounts( - first=( - self.first_dict.get(signature.author_info_first_normalized, 1) # type: ignore - if len(signature.author_info_first_normalized) > 1 - else np.nan - ), + first=(self.first_dict.get(first_for_counts, 1) if len(first_for_counts) > 1 else np.nan), # type: ignore last=self.last_dict.get(signature.author_info_last_normalized, 1), first_last=( self.first_last_dict.get(first_last_for_count, 1) # type: ignore - if len(signature.author_info_first_normalized) > 1 + if len(first_for_counts) > 1 else np.nan ), last_first_initial=self.last_first_initial_dict.get(last_first_initial_for_count, 1), @@ -792,8 +822,20 @@ def get_constraint( signature_2 = self.signatures[signature_id_2] # either a known alias or a prefix of the other # if neither, then we'll say it's impossible to be the same person - known_alias = (first_1, first_2) in self.name_tuples - prefix = first_1.startswith(first_2) or first_2.startswith(first_1) + # Backward-compatibility: `first_1`/`first_2` can now be multi-token (Sinonym output). + # Legacy name_tuples were curated over single-token first names. To remain compatible, + # try multiple forms for alias membership: exact, joined-without-spaces, and first-token only. + # TODO: revisit once we re-extract name_tuples aligned with Sinonym canonicalization. + f1_join = "".join(first_1.split()) if isinstance(first_1, str) else first_1 + f2_join = "".join(first_2.split()) if isinstance(first_2, str) else first_2 + f1_tok = first_1.split()[0] if isinstance(first_1, str) and len(first_1.split()) > 0 else first_1 + f2_tok = first_2.split()[0] if isinstance(first_2, str) and len(first_2.split()) > 0 else first_2 + known_alias = ( + (first_1, first_2) in self.name_tuples + or (f1_join, f2_join) in self.name_tuples + or (f1_tok, f2_tok) in self.name_tuples + ) + prefix = same_prefix_tokens(first_1, first_2) if not prefix and not known_alias: return high_value # dont cluster together if there is no intersection between the sets of middle initials @@ -1351,6 +1393,287 @@ def pair_sampling( return pairs +# ------------------------ Sinonym integration helpers ------------------------ + + +def _ensure_sinonym_detector(): + """Lazily import and initialize a process-level default detector.""" + global _SINONYM_DETECTOR # type: ignore + if _SINONYM_DETECTOR is None: + try: + from sinonym.detector import ChineseNameDetector # type: ignore + except Exception as e: # pragma: no cover - optional dependency + raise ImportError( + "Sinonym is not installed or failed to import. Install 'sinonym' to enable this feature." + ) from e + _SINONYM_DETECTOR = ChineseNameDetector() + return _SINONYM_DETECTOR + + +def _parse_sinonym_name(name_or_struct: Any) -> Tuple[str, str, str]: + """Extract (first, middle, last) from Sinonym output using ParsedName only. + + Expected input is a structure derived from ParseResult.parsed, either: + - a ParsedName-like object with attributes: surname_tokens, given_tokens + - or a dict with keys: 'surname_tokens', 'given_tokens', and optional 'original_compound_surname' + + Returns (first, middle, last), where 'first' is the joined given-name tokens, + and 'last' uses the original compound surname formatting if provided, otherwise + joins surname tokens with spaces. 'middle' is empty by design. + """ + # Handle ParsedName-like object + if hasattr(name_or_struct, "given_tokens") and hasattr(name_or_struct, "surname_tokens"): + given_tokens = getattr(name_or_struct, "given_tokens", []) + surname_tokens = getattr(name_or_struct, "surname_tokens", []) + original_compound = getattr(name_or_struct, "original_compound_surname", None) + # Middle can be provided as tokens or as a pre-joined string + middle_tokens = getattr(name_or_struct, "middle_tokens", None) + middle_name = getattr(name_or_struct, "middle_name", None) + + first = " ".join([t for t in given_tokens if isinstance(t, str) and t]) + + # Prefer explicit middle_name string if present; otherwise join tokens + middle = "" + if isinstance(middle_name, str) and middle_name.strip(): + middle = middle_name.strip() + elif isinstance(middle_tokens, list): + mt = [t for t in middle_tokens if isinstance(t, str) and t] + if mt: + middle = " ".join(mt) + + if isinstance(original_compound, str) and original_compound.strip(): + last = original_compound.strip() + else: + last = " ".join([t for t in surname_tokens if isinstance(t, str) and t]) + return first, middle, last + + # Handle dict form + if isinstance(name_or_struct, dict): + given_tokens = name_or_struct.get("given_tokens") + surname_tokens = name_or_struct.get("surname_tokens") + original_compound = name_or_struct.get("original_compound_surname") + middle_tokens = name_or_struct.get("middle_tokens") + middle_name = name_or_struct.get("middle_name") + if isinstance(given_tokens, list) and isinstance(surname_tokens, list): + first = "-".join([t for t in given_tokens if isinstance(t, str) and t]) + + # Build middle string if available + middle = "" + if isinstance(middle_name, str) and middle_name.strip(): + middle = middle_name.strip() + elif isinstance(middle_tokens, list): + mt = [t for t in middle_tokens if isinstance(t, str) and t] + if mt: + middle = " ".join(mt) + + if isinstance(original_compound, str) and original_compound.strip(): + last = original_compound.strip() + else: + last = "-".join([t for t in surname_tokens if isinstance(t, str) and t]) + return first, middle, last + + # If we got here, we don't have a parsed structure we recognize + return "", "", "" + + +def sinonym_preprocess_papers_parallel(papers_dict: Dict[str, Paper], n_jobs: int) -> Dict[str, Dict[int, Any]]: + """Parallel wrapper for running Sinonym preprocessing across papers. + + Returns a mapping: paper_id -> { position -> structured result }, where each + structured result is: + - { 'surname_tokens': [...], 'given_tokens': [...], 'original_compound_surname': Optional[str] } + """ + output: Dict[str, Dict[int, Any]] = {} + if n_jobs > 1: + # On Windows, prefer threads to avoid spawn/import guard issues in child processes. + # On Unix, use processes for CPU-bound work. + use_threads = platform.system() == "Windows" # not using at the moment because threads doesn't parallelize well + with UniversalPool(processes=n_jobs, use_threads=False) as p: # type: ignore + _max = len(papers_dict) + with tqdm(total=_max, desc="Sinonym: analyzing author batches") as pbar: + # Build a lightweight iterable to minimize serialization overhead + light_iter = ( + ( + key, + [(a.position, a.author_name) for a in paper.authors if a.author_name is not None], + ) + for key, paper in papers_dict.items() + ) + for key, value in p.imap(_sinonym_preprocess_paper_light, light_iter, CHUNK_SIZE): + output[key] = value + pbar.update() + else: + # Serial path uses the same lightweight items + light_iter = ( + ( + key, + [(a.position, a.author_name) for a in paper.authors if a.author_name is not None], + ) + for key, paper in papers_dict.items() + ) + for item in tqdm(light_iter, total=len(papers_dict), desc="Sinonym: analyzing author batches"): + k, v = _sinonym_preprocess_paper_light(item) + output[k] = v + return output + + +def _sinonym_preprocess_paper_light(item: Tuple[str, List[Tuple[int, str]]]) -> Tuple[str, Dict[int, Any]]: + """Lightweight variant: input is (paper_id, [(position, author_name), ...]). + Returns a mapping: paper_id -> { position -> structured result }, where each structured result is: + { + 'surname_tokens': [...], + 'given_tokens': [...], + 'original_compound_surname': Optional[str], + 'middle_tokens': Optional[list[str]] # may be present if available + } + """ + key, pos_names = item + + # Collect positions and names, skipping None defensively + positions: List[int] = [] + names: List[str] = [] + for pos, name in pos_names: + if name is not None: + positions.append(pos) + names.append(name) + + if not names: + return key, {} + + detector = _ensure_sinonym_detector() + results = detector.process_name_batch(names) # type: ignore[attr-defined] + # # Single unified batch call; always returns one result per input name + # try: + # results = detector.process_name_batch(names) # type: ignore[attr-defined] + # except Exception: + # # print(f"Sinonym failed on paper_id={key} with names={names}") + # return key, {} + + pos_to_norm: Dict[int, Any] = {} + + # Keep only successful (Chinese) parses; align safely via zip + for pos, res in zip(positions, (results or [])): + success = getattr(res, "success", False) + if not success: + continue + + parsed = getattr(res, "parsed", None) + original_compound = getattr(res, "original_compound_surname", None) + + if parsed is not None and hasattr(parsed, "surname_tokens") and hasattr(parsed, "given_tokens"): + surname_tokens = getattr(parsed, "surname_tokens", []) + given_tokens = getattr(parsed, "given_tokens", []) + middle_tokens = None + if hasattr(parsed, "middle_tokens"): + middle_tokens = getattr(parsed, "middle_tokens", None) + + entry = { + "surname_tokens": surname_tokens, + "given_tokens": given_tokens, + "original_compound_surname": original_compound, + } + if middle_tokens: + entry["middle_tokens"] = middle_tokens + pos_to_norm[pos] = entry + + return key, pos_to_norm + + +def apply_sinonym_overwrites( + signatures: Dict[str, Signature], + per_paper_results: Dict[str, Dict[int, Any]], + *, + overwrite_blocks: bool = False, +) -> int: + """Overwrite signature name parts with Sinonym-normalized names where applicable. + + Args: + signatures: signature_id -> Signature + per_paper_results: paper_id(str) -> { position -> parsed_struct } + overwrite_blocks: if True, also overwrite author_info_block with the new + block derived from normalized first/last. Use only in inference to + avoid changing dataset splits. + + Returns: + Number of signatures updated. + """ + overwrite_count = 0 + for sig_id, sig in list(signatures.items()): + paper_id_str = str(sig.paper_id) + by_pos = per_paper_results.get(paper_id_str) + if not by_pos: + continue + norm_struct = by_pos.get(sig.author_info_position) + if not norm_struct: + continue + first, middle, last = _parse_sinonym_name(norm_struct) + if first or last: + # Build the new block only when BOTH first and last are present. + # Otherwise, do not change the existing block value. + new_block = None + try: + if first and last: + new_block = f"{first[:1].lower()} {last.lower()}".strip() + except Exception as e: + # Log any unexpected formatting issues; keep prior block on error + logger.exception( + "Error computing new block for signature_id=%s (paper_id=%s, position=%s)", + sig_id, + sig.paper_id, + sig.author_info_position, + ) + new_block = None + + # Always update first/middle/last; conditionally update block in inference + new_sig = sig._replace( + author_info_first=first, + author_info_middle=middle, + author_info_last=last, + ) + if overwrite_blocks and new_block is not None: + # Note: changing blocks will affect clustering; only do this in inference + new_sig = new_sig._replace(author_info_block=new_block) + signatures[sig_id] = new_sig + overwrite_count += 1 + return overwrite_count + + +def apply_sinonym_overwrites_to_papers( + papers: Dict[str, Paper], + per_paper_results: Dict[str, Dict[int, Any]], +) -> int: + """Apply Sinonym-normalized names to Paper.authors for co-author features. + + For each paper and author position recognized by Sinonym, replace the + Author.author_name with a reconstructed full name built from Sinonym + (first, middle, last). Per-paper preprocessing will later normalize + casing/spacing consistently. + + Returns number of author entries updated. + """ + updates = 0 + for key, paper in papers.items(): + by_pos = per_paper_results.get(str(key)) + if not by_pos: + continue + new_authors = [] + for a in paper.authors: + repl = by_pos.get(a.position) if isinstance(by_pos, dict) else None + if repl: + first, middle, last = _parse_sinonym_name(repl) + if first or middle or last: + parts = [p for p in [first, middle, last] if isinstance(p, str) and p] + new_name = " ".join(parts).strip() + if new_name and new_name != a.author_name: + new_authors.append(Author(author_name=new_name, position=a.position)) + updates += 1 + continue + new_authors.append(a) + if new_authors and new_authors != list(paper.authors): + papers[key] = paper._replace(authors=new_authors) + return updates + + def preprocess_paper_1(item: Tuple[str, Paper]) -> Tuple[str, Paper]: """ helper function to perform most of the preprocessing of a paper @@ -1467,7 +1790,7 @@ def preprocess_papers_parallel(papers_dict: Dict, n_jobs: int, preprocess: bool) with UniversalPool(processes=n_jobs) as p: # type: ignore _max = len(papers_dict) with tqdm(total=_max, desc="Preprocessing papers 1/2") as pbar: - for key, value in p.imap(preprocess_paper_1, papers_dict.items(), 1000): + for key, value in p.imap(preprocess_paper_1, papers_dict.items(), CHUNK_SIZE): output[key] = value pbar.update() else: diff --git a/s2and/model.py b/s2and/model.py index d212734..5cbeace 100644 --- a/s2and/model.py +++ b/s2and/model.py @@ -5,6 +5,7 @@ from s2and.data import ANDData from s2and.consts import LARGE_INTEGER, DEFAULT_CHUNK_SIZE from s2and.subblocking import make_subblocks +from s2and.text import same_prefix_tokens from typing import Dict, Optional, Any, Union, List, Tuple, cast from collections import defaultdict @@ -1131,9 +1132,7 @@ def predict_incremental_helper( ].author_info_first_normalized_without_apostrophe match_found = False for first_assigned in all_firsts: - prefix = first_assigned.startswith(first_unassigned) or first_unassigned.startswith( - first_assigned - ) + prefix = same_prefix_tokens(first_assigned, first_unassigned) known_alias = (first_assigned, first_unassigned) in dataset.name_tuples if prefix or known_alias: diff --git a/s2and/subblocking.py b/s2and/subblocking.py index c3a5b73..b637925 100644 --- a/s2and/subblocking.py +++ b/s2and/subblocking.py @@ -11,6 +11,7 @@ from sklearn.decomposition import TruncatedSVD import genieclust from s2and.consts import SPECTER_DIM, PROJECT_ROOT_PATH +from s2and.text import same_prefix_tokens logger = logging.getLogger("s2and") @@ -348,18 +349,20 @@ def make_subblocks(signature_ids, anddata, maximum_size=7500, first_k_letter_cou else: score = 0 small_enough_pairs_counts.append((pair, 1e10 + score)) - # the name tuples allow the situation where a.startswith(b) or b.startswith(b) - elif name_for_splits_1.startswith(name_for_splits_2) or name_for_splits_2.startswith(name_for_splits_1): + # the name tuples allow the situation where prefixes match in either direction + elif same_prefix_tokens(name_for_splits_1, name_for_splits_2): score = min(len(name_for_splits_1), len(name_for_splits_2)) small_enough_pairs_counts.append((pair, 1e5 + score)) # the other option is that the names are different but we have counts - elif ( - name_for_splits_1 in first_k_letter_counts_sorted - and name_for_splits_2 in first_k_letter_counts_sorted[name_for_splits_1] - ): - small_enough_pairs_counts.append( - (pair, first_k_letter_counts_sorted[name_for_splits_1][name_for_splits_2]) - ) + else: + # TODO(s2and): Temporary compatibility tweak for hyphen-preserving first names. + # The ORCID-derived first_k_letter_counts were generated with legacy normalization. + # To preserve utility without regenerating, probe counts using token before first space. + # Consider removing this once counts are regenerated with new logic. + lookup_1 = name_for_splits_1.split(" ")[0] + lookup_2 = name_for_splits_2.split(" ")[0] + if lookup_1 in first_k_letter_counts_sorted and lookup_2 in first_k_letter_counts_sorted[lookup_1]: + small_enough_pairs_counts.append((pair, first_k_letter_counts_sorted[lookup_1][lookup_2])) small_enough_pairs_sorted = sorted(small_enough_pairs_counts, key=lambda x: (x[1], x[0][0], x[0][1]), reverse=True) # now we go down the list and merge until we reach merged subblocks not above maximum size diff --git a/s2and/text.py b/s2and/text.py index 9020b22..7e2e7ca 100644 --- a/s2and/text.py +++ b/s2and/text.py @@ -1,4 +1,4 @@ -from typing import List, Union, Optional, Set, TYPE_CHECKING +from typing import List, Union, Optional, Set, Tuple, TYPE_CHECKING if TYPE_CHECKING: from s2and.data import NameCounts @@ -340,6 +340,37 @@ def normalize_text(text: Optional[str], special_case_apostrophes: bool = False) return norm_text +def split_first_middle_hyphen_aware(first_raw: Optional[str], middle_raw: Optional[str]) -> Tuple[str, str]: + """Normalize and split first/middle with hyphen awareness for canonical fields. + + Rules: + - Apostrophes in first are removed (no spaces introduced). + - If a hyphen exists in the raw first name, keep all first tokens together (no spill into middle). + - Otherwise, first token stays in first; remaining first tokens spill into middle. + - A single leading prefix from NAME_PREFIXES is dropped if present. + + Returns (first_without_apostrophe, middle_without_apostrophe), both already normalized. + """ + first_raw = first_raw or "" + middle_raw = middle_raw or "" + + has_dash_in_first = "-" in first_raw + first_noapos = normalize_text(first_raw, special_case_apostrophes=True) + middle_norm = normalize_text(middle_raw) + + f_parts = first_noapos.split() + m_parts = middle_norm.split() + if f_parts and f_parts[0] in NAME_PREFIXES: + f_parts = f_parts[1:] + + if not f_parts: + return "", " ".join(m_parts) + if has_dash_in_first: + return " ".join(f_parts), " ".join(m_parts) + # Legacy spill behavior + return f_parts[0], " ".join(f_parts[1:] + m_parts) + + def name_text_features( name_1: str, name_2: str, diff --git a/scripts/compare_sinonym_overwrite.py b/scripts/compare_sinonym_overwrite.py new file mode 100644 index 0000000..d0d39b1 --- /dev/null +++ b/scripts/compare_sinonym_overwrite.py @@ -0,0 +1,439 @@ +""" +Run the ANDData pipeline twice (with and without Sinonym overwrites) and: +- Print all signatures whose normalized first/middle (without apostrophes) changed +- Compare pairwise feature vectors on the test split using the production featurizer +""" + +import os +from typing import Dict, Tuple, List + +from s2and.consts import PROJECT_ROOT_PATH, DEFAULT_CHUNK_SIZE +from s2and.data import ANDData, Signature, sinonym_preprocess_papers_parallel +from s2and.featurizer import many_pairs_featurize +from s2and.eval import cluster_eval +import numpy as np +import pickle + +N_JOBS = 4 + + +def collect_normalized_first_middle(signatures: Dict[str, Signature]) -> Dict[str, Tuple[str, str]]: + out: Dict[str, Tuple[str, str]] = {} + for sig_id, sig in signatures.items(): + out[sig_id] = ( + sig.author_info_first_normalized_without_apostrophe or "", + sig.author_info_middle_normalized_without_apostrophe or "", + ) + return out + + +def build_anddata(dataset_name: str, use_sinonym_overwrite: bool, n_jobs: int = 4) -> ANDData: + data_root = os.path.join(PROJECT_ROOT_PATH, "data", "s2and_mini") + anddata = ANDData( + signatures=os.path.join(data_root, dataset_name, dataset_name + "_signatures.json"), + papers=os.path.join(data_root, dataset_name, dataset_name + "_papers.json"), + name=dataset_name, + mode="train", + specter_embeddings=os.path.join(data_root, dataset_name, dataset_name + "_specter.pickle"), + clusters=os.path.join(data_root, dataset_name, dataset_name + "_clusters.json"), + block_type="s2", + train_pairs=None, + val_pairs=None, + test_pairs=None, + train_pairs_size=100000, + val_pairs_size=10000, + test_pairs_size=10000, + n_jobs=n_jobs, + load_name_counts=True, + preprocess=True, + random_seed=42, + name_tuples="filtered", + use_orcid_id=True, + use_sinonym_overwrite=use_sinonym_overwrite, + ) + return anddata + + +def main() -> None: + os.environ["OMP_NUM_THREADS"] = str(N_JOBS) + + datasets = [ + # "arnetminer", + "kisti", + ] + + for dataset_name in datasets: + print(f"\n=== Dataset: {dataset_name} ===") + + # With Sinonym overwrites + anddata_yes = build_anddata(dataset_name, use_sinonym_overwrite=True, n_jobs=N_JOBS) + fm_yes = collect_normalized_first_middle(anddata_yes.signatures) + + # Without Sinonym overwrites + anddata_no = build_anddata(dataset_name, use_sinonym_overwrite=False, n_jobs=N_JOBS) + fm_no = collect_normalized_first_middle(anddata_no.signatures) + + # Compute differences on intersection of signature ids + def only_space_change(a: str, b: str) -> bool: + """True if a != b but equal after removing spaces.""" + if a is None or b is None: + return False + return a != b and a.replace(" ", "") == b.replace(" ", "") + + changed = [] + for sig_id in sorted(set(fm_no.keys()) & set(fm_yes.keys())): + first_no, middle_no = fm_no[sig_id] + first_yes, middle_yes = fm_yes[sig_id] + if first_no != first_yes or middle_no != middle_yes: + # Exclude trivial diffs where only first changed by adding/removing spaces + if only_space_change(first_no or "", first_yes or "") and (middle_no == middle_yes): + continue + changed.append((sig_id, first_no, middle_no, first_yes, middle_yes)) + + print(f"Total signatures compared: {len(set(fm_no) & set(fm_yes))}") + print(f"Changed normalized first/middle: {len(changed)}") + for sig_id, first_no, middle_no, first_yes, middle_yes in changed: + print(f"sig={sig_id} | first: '{first_no}' -> '{first_yes}' | middle: '{middle_no}' -> '{middle_yes}'") + # Provide extra context: paper, authors, and Sinonym parsed tokens for this signature + try: + sig_obj_no = anddata_no.signatures[sig_id] + paper_id = str(sig_obj_no.paper_id) + paper_no = anddata_no.papers[paper_id] + paper_yes = anddata_yes.papers[paper_id] + print(f" paper_id={paper_id} | title={paper_no.title!r}") + print( + " authors no:", + [f"{a.position}:{a.author_name}" for a in paper_no.authors], + ) + print( + " authors yes:", + [f"{a.position}:{a.author_name}" for a in paper_yes.authors], + ) + # Run Sinonym preprocessing directly to show parsed tokens + parsed = sinonym_preprocess_papers_parallel({paper_id: paper_no}, n_jobs=1).get(paper_id, {}) + if parsed: + print(" sinonym parsed per position (given_tokens | surname_tokens | middle_tokens?):") + for pos, obj in sorted(parsed.items()): + try: + gt = obj.get("given_tokens") if isinstance(obj, dict) else getattr(obj, "given_tokens", []) + st = ( + obj.get("surname_tokens") + if isinstance(obj, dict) + else getattr(obj, "surname_tokens", []) + ) + mt = None + if isinstance(obj, dict): + mt = obj.get("middle_tokens") + elif hasattr(obj, "middle_tokens"): + mt = getattr(obj, "middle_tokens") + print(f" pos={pos}: {gt} | {st} | {mt}") + except Exception as e: + print(f" pos={pos}: ") + else: + print(" sinonym parsed: ") + except Exception as e: + print(f" ") + + # -------- Feature comparison on test split -------- + # Load production clusterer to reuse its featurizer settings + with open(os.path.join(PROJECT_ROOT_PATH, "data", "production_model_v1.1.pickle"), "rb") as fh: + prod = pickle.load(fh) + clusterer = prod["clusterer"] + clusterer.use_cache = False + clusterer.n_jobs = N_JOBS + + # Use identical test blocks from the build WITHOUT overwrites (blocks are stable in train mode) + _, _, test_blocks = anddata_no.split_blocks_helper(anddata_no.get_blocks()) + + # Build the exact pair list used by prediction (all unordered pairs per block) + pairs: List[Tuple[str, str, float]] = [] + pair_blocks: List[str] = [] + for block_key, sigs in test_blocks.items(): + n = len(sigs) + for i in range(n): + for j in range(i + 1, n): + pairs.append((sigs[i], sigs[j], np.nan)) + pair_blocks.append(block_key) + + if len(pairs) == 0: + print("No test pairs to compare features.") + continue + + # Compute features for both datasets with identical featurizer config + feats_no, _, _ = many_pairs_featurize( + pairs, + anddata_no, + clusterer.featurizer_info, + n_jobs=clusterer.n_jobs, + use_cache=False, + chunk_size=DEFAULT_CHUNK_SIZE, + nameless_featurizer_info=clusterer.nameless_featurizer_info, + ) + feats_yes, _, _ = many_pairs_featurize( + pairs, + anddata_yes, + clusterer.featurizer_info, + n_jobs=clusterer.n_jobs, + use_cache=False, + chunk_size=DEFAULT_CHUNK_SIZE, + nameless_featurizer_info=clusterer.nameless_featurizer_info, + ) + + fnames = clusterer.featurizer_info.get_feature_names() + assert feats_no.shape == feats_yes.shape, "Feature shapes differ between runs" + + # Compare with NaN-safe equality + a = feats_no + b = feats_yes + same = (a == b) | (np.isnan(a) & np.isnan(b)) + row_changed = ~np.all(same, axis=1) + changed_indices = np.where(row_changed)[0] + print(f"Total pairwise comparisons: {a.shape[0]}") + print(f"Pairs with any feature change: {len(changed_indices)}") + + # Summarize which features changed most + feat_change_counts = np.sum(~same, axis=0) + any_changed = np.where(feat_change_counts > 0)[0] + if len(any_changed) == 0: + print("No feature differences detected.") + else: + print("Changed feature counts:") + for idx in any_changed: + print(f" {idx:02d} {fnames[idx]}: {int(feat_change_counts[idx])}") + + # Show a few example diffs + max_examples = 25 + if len(changed_indices) > 0: + print(f"\nExample diffs (up to {max_examples}):") + for k, ridx in enumerate(changed_indices[:max_examples]): + sig1, sig2, _ = pairs[ridx] + sig1_no = anddata_no.signatures[sig1] + sig2_no = anddata_no.signatures[sig2] + sig1_yes = anddata_yes.signatures[sig1] + sig2_yes = anddata_yes.signatures[sig2] + name1_no = ANDData.get_full_name_for_features(sig1_no) + name2_no = ANDData.get_full_name_for_features(sig2_no) + name1_yes = ANDData.get_full_name_for_features(sig1_yes) + name2_yes = ANDData.get_full_name_for_features(sig2_yes) + block = pair_blocks[ridx] + + print(f"Pair {k+1}: block={block} | {sig1} [{name1_no}] <-> {sig2} [{name2_no}]") + print( + " NAME no/yes:", + f"[{name1_no}] | [{name1_yes}]", + "||", + f"[{name2_no}] | [{name2_yes}]", + ) + print( + " BLOCK no/yes:", + repr(sig1_no.author_info_block), + "|", + repr(sig1_yes.author_info_block), + "||", + repr(sig2_no.author_info_block), + "|", + repr(sig2_yes.author_info_block), + ) + # Show raw first/middle (as stored on signatures) for both runs + print( + " RAW first_no/yes:", + repr(sig1_no.author_info_first), + "|", + repr(sig1_yes.author_info_first), + "||", + repr(sig2_no.author_info_first), + "|", + repr(sig2_yes.author_info_first), + ) + print( + " RAW middle_no/yes:", + repr(sig1_no.author_info_middle), + "|", + repr(sig1_yes.author_info_middle), + "||", + repr(sig2_no.author_info_middle), + "|", + repr(sig2_yes.author_info_middle), + ) + print( + " first_no/yes:", + repr(sig1_no.author_info_first_normalized_without_apostrophe), + "|", + repr(sig1_yes.author_info_first_normalized_without_apostrophe), + "||", + repr(sig2_no.author_info_first_normalized_without_apostrophe), + "|", + repr(sig2_yes.author_info_first_normalized_without_apostrophe), + ) + print( + " middle_no/yes:", + repr(sig1_no.author_info_middle_normalized_without_apostrophe), + "|", + repr(sig1_yes.author_info_middle_normalized_without_apostrophe), + "||", + repr(sig2_no.author_info_middle_normalized_without_apostrophe), + "|", + repr(sig2_yes.author_info_middle_normalized_without_apostrophe), + ) + diff_cols = np.where(~same[ridx])[0] + for ci in diff_cols: + v0 = a[ridx, ci] + v1 = b[ridx, ci] + print(f" {ci:02d} {fnames[ci]}: {v0} -> {v1}") + + # Also compare pairwise classifier probabilities and crossings vs eps + print("\n--- Pairwise probability comparison ---") + # p(not same) from the classifier(s) + if clusterer.nameless_classifier is not None: + p_no = ( + clusterer.classifier.predict_proba(feats_no)[:, 0] + + clusterer.nameless_classifier.predict_proba( + many_pairs_featurize( + pairs, + anddata_no, + clusterer.featurizer_info, + n_jobs=clusterer.n_jobs, + use_cache=False, + chunk_size=DEFAULT_CHUNK_SIZE, + nameless_featurizer_info=clusterer.nameless_featurizer_info, + )[ + 2 + ] # nameless features + )[:, 0] + ) / 2 + p_yes = ( + clusterer.classifier.predict_proba(feats_yes)[:, 0] + + clusterer.nameless_classifier.predict_proba( + many_pairs_featurize( + pairs, + anddata_yes, + clusterer.featurizer_info, + n_jobs=clusterer.n_jobs, + use_cache=False, + chunk_size=DEFAULT_CHUNK_SIZE, + nameless_featurizer_info=clusterer.nameless_featurizer_info, + )[2] + )[:, 0] + ) / 2 + else: + p_no = clusterer.classifier.predict_proba(feats_no)[:, 0] + p_yes = clusterer.classifier.predict_proba(feats_yes)[:, 0] + + eps = getattr(clusterer.cluster_model, "eps", 0.5) + crossed = (p_no < eps) ^ (p_yes < eps) + print( + f"Prob delta summary: mean|std={float(np.mean(p_yes-p_no)):.4f}|{float(np.std(p_yes-p_no)):.4f}; " + f"crossed eps ({eps:.3f}): {int(np.sum(crossed))} of {len(p_no)}" + ) + + # Show top crossing examples by absolute delta + idx_sorted = np.argsort(-np.abs(p_yes - p_no)) + shown = 0 + for ridx in idx_sorted: + if not crossed[ridx]: + continue + sig1, sig2, _ = pairs[ridx] + print( + f" pair {sig1} <-> {sig2}: p_no={p_no[ridx]:.3f} -> p_yes={p_yes[ridx]:.3f} (Δ={p_yes[ridx]-p_no[ridx]:+.3f})" + ) + shown += 1 + if shown >= 10: + break + + # -------- End-to-end clustering comparison on test split -------- + print("\n--- Clustering comparison (test split) ---") + + # Metrics for both runs + m_no, _ = cluster_eval(anddata_no, clusterer, split="test", use_s2_clusters=False) + m_yes, _ = cluster_eval(anddata_yes, clusterer, split="test", use_s2_clusters=False) + print("Metrics no-overwrite:", m_no) + print("Metrics yes-overwrite:", m_yes) + + # Predicted clusters for both runs using identical test blocks (from no-overwrite) + pred_no, _ = clusterer.predict(test_blocks, anddata_no, use_s2_clusters=False) + pred_yes, _ = clusterer.predict(test_blocks, anddata_yes, use_s2_clusters=False) + + # Helper: build signature->cluster map and same-cluster pair set + def invert_clusters(pred_clusters): + sig_to_cid = {} + for cid, sigs in pred_clusters.items(): + for s in sigs: + sig_to_cid[s] = cid + return sig_to_cid + + def same_cluster_pairs(pred_clusters): + pairs_local = set() + for sigs in pred_clusters.values(): + n = len(sigs) + for i in range(n): + for j in range(i + 1, n): + a, b = sigs[i], sigs[j] + if a <= b: + pairs_local.add((a, b)) + else: + pairs_local.add((b, a)) + return pairs_local + + sig_to_cid_no = invert_clusters(pred_no) + sig_to_cid_yes = invert_clusters(pred_yes) + pairs_no = same_cluster_pairs(pred_no) + pairs_yes = same_cluster_pairs(pred_yes) + + only_no = pairs_no - pairs_yes + only_yes = pairs_yes - pairs_no + inter = pairs_no & pairs_yes + + denom = max(1, len(pairs_no | pairs_yes)) + jacc = len(inter) / denom + print( + f"Same-cluster pair counts | no={len(pairs_no)} yes={len(pairs_yes)} inter={len(inter)} jaccard={jacc:.3f}" + ) + print(f"Pairs only in no (splits in yes): {len(only_no)}") + print(f"Pairs only in yes (merges in yes): {len(only_yes)}") + + # Show a few illustrative differences + def describe_pair(sig_a: str, sig_b: str): + a_no = anddata_no.signatures[sig_a] + b_no = anddata_no.signatures[sig_b] + a_yes = anddata_yes.signatures[sig_a] + b_yes = anddata_yes.signatures[sig_b] + name_a_no = ANDData.get_full_name_for_features(a_no) + name_b_no = ANDData.get_full_name_for_features(b_no) + name_a_yes = ANDData.get_full_name_for_features(a_yes) + name_b_yes = ANDData.get_full_name_for_features(b_yes) + print(f" {sig_a}: [{name_a_no}] |yes-> [{name_a_yes}] || {sig_b}: [{name_b_no}] |yes-> [{name_b_yes}]") + # Also show first/middle normalized for both + print( + " first_no/yes:", + repr(a_no.author_info_first_normalized_without_apostrophe), + "|", + repr(a_yes.author_info_first_normalized_without_apostrophe), + "||", + repr(b_no.author_info_first_normalized_without_apostrophe), + "|", + repr(b_yes.author_info_first_normalized_without_apostrophe), + ) + print( + " middle_no/yes:", + repr(a_no.author_info_middle_normalized_without_apostrophe), + "|", + repr(a_yes.author_info_middle_normalized_without_apostrophe), + "||", + repr(b_no.author_info_middle_normalized_without_apostrophe), + "|", + repr(b_yes.author_info_middle_normalized_without_apostrophe), + ) + + max_show = 10 + if only_no: + print(f"\nExamples only in no (split in yes): showing up to {max_show}") + for i, (a, b) in enumerate(list(only_no)[:max_show]): + describe_pair(a, b) + if only_yes: + print(f"\nExamples only in yes (merged in yes): showing up to {max_show}") + for i, (a, b) in enumerate(list(only_yes)[:max_show]): + describe_pair(a, b) + + +if __name__ == "__main__": + main() diff --git a/scripts/get_orcid_name_prefix_counts.py b/scripts/get_orcid_name_prefix_counts.py index a0fb427..2cdcb01 100644 --- a/scripts/get_orcid_name_prefix_counts.py +++ b/scripts/get_orcid_name_prefix_counts.py @@ -2,16 +2,21 @@ Note: This script won't run because it relies on an internal Semantic Scholar package called pys2, and is here for documentation of how the prefix counts for subblocking were built. -TODO: rerun this when we update how names are normalized +TODO(s2and): This JSON was generated with legacy normalization (single-token first, apostrophes handled via + special_case_apostrophes=True for first). When we finalize the new unified normalization + (hyphen-aware, consistent apostrophe handling), rewrite this script to call + s2and.text.split_first_middle_hyphen_aware (or its eventual unified equivalent) and regenerate + data/first_k_letter_counts_from_orcid.json. Until then, runtime lookups use a first-token fallback + for compatibility. """ import os import json from collections import Counter from itertools import combinations -from pys2.pys2 import _evaluate_redshift_query -from s2and.text import normalize_text, NAME_PREFIXES +from s2and.text import normalize_text, NAME_PREFIXES, same_prefix_tokens from s2and.consts import PROJECT_ROOT_PATH +from pys2.pys2 import _evaluate_redshift_query """ Step 1: Get orcid name pairs from our internal databases @@ -39,8 +44,10 @@ def normalize_names(row): - """This is basically the same as what's in row 456 and on in s2and/data.py - TODO: if that changes due to, say, how dashes are treated, we have to rerun this + """Legacy normalization used when building ORCID prefix counts. + + TODO(s2and): Align with s2and.text.split_first_middle_hyphen_aware when regenerating counts. + Currently kept to document how the existing JSON was produced. """ first = row["first_name"] middle = row["middle"] @@ -118,7 +125,7 @@ def group_update(group, k_values=k_values): for k in k_values: for j in k_values: pair = (name1[:k], name2[:j]) - if pair[0] != pair[1] and not (pair[0].startswith(pair[1]) or pair[1].startswith(pair[0])): + if pair[0] != pair[1] and not same_prefix_tokens(pair[0], pair[1]): pairs.add(pair) name_tuples_first_k_letter_counts.update(list(pairs)) @@ -127,7 +134,7 @@ def group_update(group, k_values=k_values): # to save space orcid_first_k_letter_counts_filtered = {} for (name1, name2), count in orcid_first_k_letter_counts.items(): - if not (name1.startswith(name2) or name2.startswith(name1)): + if not same_prefix_tokens(name1, name2): # we also have a filter on this one where count has to be greater than 10 if count >= 10: orcid_first_k_letter_counts_filtered[(name1, name2)] = count diff --git a/scripts/tutorial_for_predicting_with_the_prod_model.py b/scripts/tutorial_for_predicting_with_the_prod_model.py index 7f6372d..263a044 100644 --- a/scripts/tutorial_for_predicting_with_the_prod_model.py +++ b/scripts/tutorial_for_predicting_with_the_prod_model.py @@ -7,12 +7,10 @@ import os import pickle -import numpy as np from s2and.data import ANDData from s2and.eval import cluster_eval -from s2and.consts import FEATURIZER_VERSION, DEFAULT_CHUNK_SIZE, PROJECT_ROOT_PATH -from s2and.featurizer import FeaturizationInfo, featurize -from s2and.model import PairwiseModeler, Clusterer +from s2and.consts import FEATURIZER_VERSION, PROJECT_ROOT_PATH +from s2and.featurizer import FeaturizationInfo def main() -> None: @@ -22,13 +20,17 @@ def main() -> None: # Limit BLAS threads to keep things responsive os.environ["OMP_NUM_THREADS"] = f"{n_jobs}" - data_original = os.path.join(PROJECT_ROOT_PATH, "data") + data_original = os.path.join(PROJECT_ROOT_PATH, "data", "s2and_mini") random_seed = 42 datasets = [ "arnetminer", + "inspire", + "kisti", "pubmed", + "qian", + "zbmath", ] features_to_use = [ @@ -90,6 +92,8 @@ def main() -> None: preprocess=True, random_seed=random_seed, name_tuples="filtered", + use_orcid_id=True, + use_sinonym_overwrite=False, ) train_block_dict, val_block_dict, test_block_dict = anddata.split_blocks_helper(anddata.get_blocks()) num_test_blocks[dataset_name] = len(test_block_dict) @@ -103,6 +107,15 @@ def main() -> None: print(cluster_metrics) cluster_metrics_all.append(cluster_metrics) + # cluster_to_signatures = anddata.construct_cluster_to_signatures(test_block_dict) + + # # now we need to print out the unique tuples of anddata(get_full_name_for_features(signature)) for the signatures that were clustered together + # for cluster_id, signatures in cluster_to_signatures.items(): + # full_names = [anddata.get_full_name_for_features(anddata.signatures[sig]) for sig in signatures] + # # also get the BLOCK author_info_block + # blocks = [anddata.signatures[sig].author_info_block for sig in signatures] + # print(f"Cluster {cluster_id}: {set(list(zip(full_names, blocks)))}") + b3s = [i["B3 (P, R, F1)"][-1] for i in cluster_metrics_all] print(b3s, sum(b3s) / len(b3s)) diff --git a/uv.lock b/uv.lock index a395826..836f39a 100644 --- a/uv.lock +++ b/uv.lock @@ -6,6 +6,24 @@ resolution-markers = [ "python_full_version < '3.12'", ] +[[package]] +name = "appnope" +version = "0.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/35/5d/752690df9ef5b76e169e68d6a129fa6d08a7100ca7f754c89495db3c6019/appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee", size = 4170, upload-time = "2024-02-06T09:43:11.258Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/29/5ecc3a15d5a33e31b26c11426c45c501e439cb865d0bff96315d86443b78/appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c", size = 4321, upload-time = "2024-02-06T09:43:09.663Z" }, +] + +[[package]] +name = "asttokens" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978, upload-time = "2024-11-30T04:30:14.439Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918, upload-time = "2024-11-30T04:30:10.946Z" }, +] + [[package]] name = "awscli" version = "1.42.25" @@ -70,6 +88,51 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" }, ] +[[package]] +name = "cffi" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621, upload-time = "2024-09-04T20:45:21.852Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/f4/927e3a8899e52a27fa57a48607ff7dc91a9ebe97399b357b85a0c7892e00/cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401", size = 182264, upload-time = "2024-09-04T20:43:51.124Z" }, + { url = "https://files.pythonhosted.org/packages/6c/f5/6c3a8efe5f503175aaddcbea6ad0d2c96dad6f5abb205750d1b3df44ef29/cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf", size = 178651, upload-time = "2024-09-04T20:43:52.872Z" }, + { url = "https://files.pythonhosted.org/packages/94/dd/a3f0118e688d1b1a57553da23b16bdade96d2f9bcda4d32e7d2838047ff7/cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4", size = 445259, upload-time = "2024-09-04T20:43:56.123Z" }, + { url = "https://files.pythonhosted.org/packages/2e/ea/70ce63780f096e16ce8588efe039d3c4f91deb1dc01e9c73a287939c79a6/cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41", size = 469200, upload-time = "2024-09-04T20:43:57.891Z" }, + { url = "https://files.pythonhosted.org/packages/1c/a0/a4fa9f4f781bda074c3ddd57a572b060fa0df7655d2a4247bbe277200146/cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1", size = 477235, upload-time = "2024-09-04T20:44:00.18Z" }, + { url = "https://files.pythonhosted.org/packages/62/12/ce8710b5b8affbcdd5c6e367217c242524ad17a02fe5beec3ee339f69f85/cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6", size = 459721, upload-time = "2024-09-04T20:44:01.585Z" }, + { url = "https://files.pythonhosted.org/packages/ff/6b/d45873c5e0242196f042d555526f92aa9e0c32355a1be1ff8c27f077fd37/cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d", size = 467242, upload-time = "2024-09-04T20:44:03.467Z" }, + { url = "https://files.pythonhosted.org/packages/1a/52/d9a0e523a572fbccf2955f5abe883cfa8bcc570d7faeee06336fbd50c9fc/cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6", size = 477999, upload-time = "2024-09-04T20:44:05.023Z" }, + { url = "https://files.pythonhosted.org/packages/44/74/f2a2460684a1a2d00ca799ad880d54652841a780c4c97b87754f660c7603/cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f", size = 454242, upload-time = "2024-09-04T20:44:06.444Z" }, + { url = "https://files.pythonhosted.org/packages/f8/4a/34599cac7dfcd888ff54e801afe06a19c17787dfd94495ab0c8d35fe99fb/cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b", size = 478604, upload-time = "2024-09-04T20:44:08.206Z" }, + { url = "https://files.pythonhosted.org/packages/34/33/e1b8a1ba29025adbdcda5fb3a36f94c03d771c1b7b12f726ff7fef2ebe36/cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655", size = 171727, upload-time = "2024-09-04T20:44:09.481Z" }, + { url = "https://files.pythonhosted.org/packages/3d/97/50228be003bb2802627d28ec0627837ac0bf35c90cf769812056f235b2d1/cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0", size = 181400, upload-time = "2024-09-04T20:44:10.873Z" }, + { url = "https://files.pythonhosted.org/packages/5a/84/e94227139ee5fb4d600a7a4927f322e1d4aea6fdc50bd3fca8493caba23f/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", size = 183178, upload-time = "2024-09-04T20:44:12.232Z" }, + { url = "https://files.pythonhosted.org/packages/da/ee/fb72c2b48656111c4ef27f0f91da355e130a923473bf5ee75c5643d00cca/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", size = 178840, upload-time = "2024-09-04T20:44:13.739Z" }, + { url = "https://files.pythonhosted.org/packages/cc/b6/db007700f67d151abadf508cbfd6a1884f57eab90b1bb985c4c8c02b0f28/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", size = 454803, upload-time = "2024-09-04T20:44:15.231Z" }, + { url = "https://files.pythonhosted.org/packages/1a/df/f8d151540d8c200eb1c6fba8cd0dfd40904f1b0682ea705c36e6c2e97ab3/cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", size = 478850, upload-time = "2024-09-04T20:44:17.188Z" }, + { url = "https://files.pythonhosted.org/packages/28/c0/b31116332a547fd2677ae5b78a2ef662dfc8023d67f41b2a83f7c2aa78b1/cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", size = 485729, upload-time = "2024-09-04T20:44:18.688Z" }, + { url = "https://files.pythonhosted.org/packages/91/2b/9a1ddfa5c7f13cab007a2c9cc295b70fbbda7cb10a286aa6810338e60ea1/cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", size = 471256, upload-time = "2024-09-04T20:44:20.248Z" }, + { url = "https://files.pythonhosted.org/packages/b2/d5/da47df7004cb17e4955df6a43d14b3b4ae77737dff8bf7f8f333196717bf/cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", size = 479424, upload-time = "2024-09-04T20:44:21.673Z" }, + { url = "https://files.pythonhosted.org/packages/0b/ac/2a28bcf513e93a219c8a4e8e125534f4f6db03e3179ba1c45e949b76212c/cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", size = 484568, upload-time = "2024-09-04T20:44:23.245Z" }, + { url = "https://files.pythonhosted.org/packages/d4/38/ca8a4f639065f14ae0f1d9751e70447a261f1a30fa7547a828ae08142465/cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", size = 488736, upload-time = "2024-09-04T20:44:24.757Z" }, + { url = "https://files.pythonhosted.org/packages/86/c5/28b2d6f799ec0bdecf44dced2ec5ed43e0eb63097b0f58c293583b406582/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", size = 172448, upload-time = "2024-09-04T20:44:26.208Z" }, + { url = "https://files.pythonhosted.org/packages/50/b9/db34c4755a7bd1cb2d1603ac3863f22bcecbd1ba29e5ee841a4bc510b294/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", size = 181976, upload-time = "2024-09-04T20:44:27.578Z" }, + { url = "https://files.pythonhosted.org/packages/8d/f8/dd6c246b148639254dad4d6803eb6a54e8c85c6e11ec9df2cffa87571dbe/cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e", size = 182989, upload-time = "2024-09-04T20:44:28.956Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f1/672d303ddf17c24fc83afd712316fda78dc6fce1cd53011b839483e1ecc8/cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2", size = 178802, upload-time = "2024-09-04T20:44:30.289Z" }, + { url = "https://files.pythonhosted.org/packages/0e/2d/eab2e858a91fdff70533cab61dcff4a1f55ec60425832ddfdc9cd36bc8af/cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3", size = 454792, upload-time = "2024-09-04T20:44:32.01Z" }, + { url = "https://files.pythonhosted.org/packages/75/b2/fbaec7c4455c604e29388d55599b99ebcc250a60050610fadde58932b7ee/cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683", size = 478893, upload-time = "2024-09-04T20:44:33.606Z" }, + { url = "https://files.pythonhosted.org/packages/4f/b7/6e4a2162178bf1935c336d4da8a9352cccab4d3a5d7914065490f08c0690/cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5", size = 485810, upload-time = "2024-09-04T20:44:35.191Z" }, + { url = "https://files.pythonhosted.org/packages/c7/8a/1d0e4a9c26e54746dc08c2c6c037889124d4f59dffd853a659fa545f1b40/cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4", size = 471200, upload-time = "2024-09-04T20:44:36.743Z" }, + { url = "https://files.pythonhosted.org/packages/26/9f/1aab65a6c0db35f43c4d1b4f580e8df53914310afc10ae0397d29d697af4/cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd", size = 479447, upload-time = "2024-09-04T20:44:38.492Z" }, + { url = "https://files.pythonhosted.org/packages/5f/e4/fb8b3dd8dc0e98edf1135ff067ae070bb32ef9d509d6cb0f538cd6f7483f/cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed", size = 484358, upload-time = "2024-09-04T20:44:40.046Z" }, + { url = "https://files.pythonhosted.org/packages/f1/47/d7145bf2dc04684935d57d67dff9d6d795b2ba2796806bb109864be3a151/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", size = 488469, upload-time = "2024-09-04T20:44:41.616Z" }, + { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475, upload-time = "2024-09-04T20:44:43.733Z" }, + { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009, upload-time = "2024-09-04T20:44:45.309Z" }, +] + [[package]] name = "charset-normalizer" version = "3.4.3" @@ -153,6 +216,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "comm" +version = "0.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4c/13/7d740c5849255756bc17888787313b61fd38a0a8304fc4f073dfc46122aa/comm-0.2.3.tar.gz", hash = "sha256:2dc8048c10962d55d7ad693be1e7045d891b7ce8d999c97963a5e3e99c055971", size = 6319, upload-time = "2025-07-25T14:02:04.452Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/97/891a0971e1e4a8c5d2b20bbe0e524dc04548d2307fee33cdeba148fd4fc7/comm-0.2.3-py3-none-any.whl", hash = "sha256:c615d91d75f7f04f095b30d1c1711babd43bdc6419c1be9886a85f2f4e489417", size = 7294, upload-time = "2025-07-25T14:02:02.896Z" }, +] + [[package]] name = "contourpy" version = "1.3.3" @@ -373,6 +445,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/56/c8/46ac27096684f33e27dab749ef43c6b0119c6a0d852971eaefb73256dc4c/cython-3.1.3-py3-none-any.whl", hash = "sha256:d13025b34f72f77bf7f65c1cd628914763e6c285f4deb934314c922b91e6be5a", size = 1225725, upload-time = "2025-08-13T06:19:09.593Z" }, ] +[[package]] +name = "debugpy" +version = "1.8.16" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/d4/722d0bcc7986172ac2ef3c979ad56a1030e3afd44ced136d45f8142b1f4a/debugpy-1.8.16.tar.gz", hash = "sha256:31e69a1feb1cf6b51efbed3f6c9b0ef03bc46ff050679c4be7ea6d2e23540870", size = 1643809, upload-time = "2025-08-06T18:00:02.647Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/63/d6/ad70ba8b49b23fa286fb21081cf732232cc19374af362051da9c7537ae52/debugpy-1.8.16-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:67371b28b79a6a12bcc027d94a06158f2fde223e35b5c4e0783b6f9d3b39274a", size = 2184063, upload-time = "2025-08-06T18:00:11.885Z" }, + { url = "https://files.pythonhosted.org/packages/aa/49/7b03e88dea9759a4c7910143f87f92beb494daaae25560184ff4ae883f9e/debugpy-1.8.16-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2abae6dd02523bec2dee16bd6b0781cccb53fd4995e5c71cc659b5f45581898", size = 3134837, upload-time = "2025-08-06T18:00:13.782Z" }, + { url = "https://files.pythonhosted.org/packages/5d/52/b348930316921de7565fbe37a487d15409041713004f3d74d03eb077dbd4/debugpy-1.8.16-cp311-cp311-win32.whl", hash = "sha256:f8340a3ac2ed4f5da59e064aa92e39edd52729a88fbde7bbaa54e08249a04493", size = 5159142, upload-time = "2025-08-06T18:00:15.391Z" }, + { url = "https://files.pythonhosted.org/packages/d8/ef/9aa9549ce1e10cea696d980292e71672a91ee4a6a691ce5f8629e8f48c49/debugpy-1.8.16-cp311-cp311-win_amd64.whl", hash = "sha256:70f5fcd6d4d0c150a878d2aa37391c52de788c3dc680b97bdb5e529cb80df87a", size = 5183117, upload-time = "2025-08-06T18:00:17.251Z" }, + { url = "https://files.pythonhosted.org/packages/61/fb/0387c0e108d842c902801bc65ccc53e5b91d8c169702a9bbf4f7efcedf0c/debugpy-1.8.16-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:b202e2843e32e80b3b584bcebfe0e65e0392920dc70df11b2bfe1afcb7a085e4", size = 2511822, upload-time = "2025-08-06T18:00:18.526Z" }, + { url = "https://files.pythonhosted.org/packages/37/44/19e02745cae22bf96440141f94e15a69a1afaa3a64ddfc38004668fcdebf/debugpy-1.8.16-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64473c4a306ba11a99fe0bb14622ba4fbd943eb004847d9b69b107bde45aa9ea", size = 4230135, upload-time = "2025-08-06T18:00:19.997Z" }, + { url = "https://files.pythonhosted.org/packages/f3/0b/19b1ba5ee4412f303475a2c7ad5858efb99c90eae5ec627aa6275c439957/debugpy-1.8.16-cp312-cp312-win32.whl", hash = "sha256:833a61ed446426e38b0dd8be3e9d45ae285d424f5bf6cd5b2b559c8f12305508", size = 5281271, upload-time = "2025-08-06T18:00:21.281Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e0/bc62e2dc141de53bd03e2c7cb9d7011de2e65e8bdcdaa26703e4d28656ba/debugpy-1.8.16-cp312-cp312-win_amd64.whl", hash = "sha256:75f204684581e9ef3dc2f67687c3c8c183fde2d6675ab131d94084baf8084121", size = 5323149, upload-time = "2025-08-06T18:00:23.033Z" }, + { url = "https://files.pythonhosted.org/packages/62/66/607ab45cc79e60624df386e233ab64a6d8d39ea02e7f80e19c1d451345bb/debugpy-1.8.16-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:85df3adb1de5258dca910ae0bb185e48c98801ec15018a263a92bb06be1c8787", size = 2496157, upload-time = "2025-08-06T18:00:24.361Z" }, + { url = "https://files.pythonhosted.org/packages/4d/a0/c95baae08a75bceabb79868d663a0736655e427ab9c81fb848da29edaeac/debugpy-1.8.16-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bee89e948bc236a5c43c4214ac62d28b29388453f5fd328d739035e205365f0b", size = 4222491, upload-time = "2025-08-06T18:00:25.806Z" }, + { url = "https://files.pythonhosted.org/packages/5b/2f/1c8db6ddd8a257c3cd2c46413b267f1d5fa3df910401c899513ce30392d6/debugpy-1.8.16-cp313-cp313-win32.whl", hash = "sha256:cf358066650439847ec5ff3dae1da98b5461ea5da0173d93d5e10f477c94609a", size = 5281126, upload-time = "2025-08-06T18:00:27.207Z" }, + { url = "https://files.pythonhosted.org/packages/d3/ba/c3e154ab307366d6c5a9c1b68de04914e2ce7fa2f50d578311d8cc5074b2/debugpy-1.8.16-cp313-cp313-win_amd64.whl", hash = "sha256:b5aea1083f6f50023e8509399d7dc6535a351cc9f2e8827d1e093175e4d9fa4c", size = 5323094, upload-time = "2025-08-06T18:00:29.03Z" }, + { url = "https://files.pythonhosted.org/packages/52/57/ecc9ae29fa5b2d90107cd1d9bf8ed19aacb74b2264d986ae9d44fe9bdf87/debugpy-1.8.16-py2.py3-none-any.whl", hash = "sha256:19c9521962475b87da6f673514f7fd610328757ec993bf7ec0d8c96f9a325f9e", size = 5287700, upload-time = "2025-08-06T18:00:42.333Z" }, +] + +[[package]] +name = "decorator" +version = "5.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711, upload-time = "2025-02-24T04:41:34.073Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" }, +] + [[package]] name = "docutils" version = "0.19" @@ -382,6 +484,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/93/69/e391bd51bc08ed9141ecd899a0ddb61ab6465309f1eb470905c0c8868081/docutils-0.19-py3-none-any.whl", hash = "sha256:5e1de4d849fee02c63b040a4a3fd567f4ab104defd8a5511fbbc24a8a017efbc", size = 570472, upload-time = "2022-07-05T20:17:26.388Z" }, ] +[[package]] +name = "executing" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cc/28/c14e053b6762b1044f34a13aab6859bbf40456d37d23aa286ac24cfd9a5d/executing-2.2.1.tar.gz", hash = "sha256:3632cc370565f6648cc328b32435bd120a1e4ebb20c77e3fdde9a13cd1e533c4", size = 1129488, upload-time = "2025-09-01T09:48:10.866Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c1/ea/53f2148663b321f21b5a606bd5f191517cf40b7072c0497d3c92c4a13b1e/executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017", size = 28317, upload-time = "2025-09-01T09:48:08.5Z" }, +] + [[package]] name = "fastcluster" version = "1.2.6" @@ -574,6 +685,76 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" }, ] +[[package]] +name = "ipykernel" +version = "6.30.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "appnope", marker = "sys_platform == 'darwin'" }, + { name = "comm" }, + { name = "debugpy" }, + { name = "ipython" }, + { name = "jupyter-client" }, + { name = "jupyter-core" }, + { name = "matplotlib-inline" }, + { name = "nest-asyncio" }, + { name = "packaging" }, + { name = "psutil" }, + { name = "pyzmq" }, + { name = "tornado" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bb/76/11082e338e0daadc89c8ff866185de11daf67d181901038f9e139d109761/ipykernel-6.30.1.tar.gz", hash = "sha256:6abb270161896402e76b91394fcdce5d1be5d45f456671e5080572f8505be39b", size = 166260, upload-time = "2025-08-04T15:47:35.018Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/c7/b445faca8deb954fe536abebff4ece5b097b923de482b26e78448c89d1dd/ipykernel-6.30.1-py3-none-any.whl", hash = "sha256:aa6b9fb93dca949069d8b85b6c79b2518e32ac583ae9c7d37c51d119e18b3fb4", size = 117484, upload-time = "2025-08-04T15:47:32.622Z" }, +] + +[[package]] +name = "ipython" +version = "9.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "decorator" }, + { name = "ipython-pygments-lexers" }, + { name = "jedi" }, + { name = "matplotlib-inline" }, + { name = "pexpect", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" }, + { name = "prompt-toolkit" }, + { name = "pygments" }, + { name = "stack-data" }, + { name = "traitlets" }, + { name = "typing-extensions", marker = "python_full_version < '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6e/71/a86262bf5a68bf211bcc71fe302af7e05f18a2852fdc610a854d20d085e6/ipython-9.5.0.tar.gz", hash = "sha256:129c44b941fe6d9b82d36fc7a7c18127ddb1d6f02f78f867f402e2e3adde3113", size = 4389137, upload-time = "2025-08-29T12:15:21.519Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/2a/5628a99d04acb2d2f2e749cdf4ea571d2575e898df0528a090948018b726/ipython-9.5.0-py3-none-any.whl", hash = "sha256:88369ffa1d5817d609120daa523a6da06d02518e582347c29f8451732a9c5e72", size = 612426, upload-time = "2025-08-29T12:15:18.866Z" }, +] + +[[package]] +name = "ipython-pygments-lexers" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ef/4c/5dd1d8af08107f88c7f741ead7a40854b8ac24ddf9ae850afbcf698aa552/ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81", size = 8393, upload-time = "2025-01-17T11:24:34.505Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d9/33/1f075bf72b0b747cb3288d011319aaf64083cf2efef8354174e3ed4540e2/ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c", size = 8074, upload-time = "2025-01-17T11:24:33.271Z" }, +] + +[[package]] +name = "jedi" +version = "0.19.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "parso" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287, upload-time = "2024-11-11T01:41:42.873Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278, upload-time = "2024-11-11T01:41:40.175Z" }, +] + [[package]] name = "jellyfish" version = "1.2.0" @@ -638,6 +819,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/e8/685f47e0d754320684db4425a0967f7d3fa70126bffd76110b7009a0090f/joblib-1.5.2-py3-none-any.whl", hash = "sha256:4e1f0bdbb987e6d843c70cf43714cb276623def372df3c22fe5266b2670bc241", size = 308396, upload-time = "2025-08-27T12:15:45.188Z" }, ] +[[package]] +name = "jupyter-client" +version = "8.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jupyter-core" }, + { name = "python-dateutil" }, + { name = "pyzmq" }, + { name = "tornado" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/22/bf9f12fdaeae18019a468b68952a60fe6dbab5d67cd2a103cac7659b41ca/jupyter_client-8.6.3.tar.gz", hash = "sha256:35b3a0947c4a6e9d589eb97d7d4cd5e90f910ee73101611f01283732bd6d9419", size = 342019, upload-time = "2024-09-17T10:44:17.613Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/85/b0394e0b6fcccd2c1eeefc230978a6f8cb0c5df1e4cd3e7625735a0d7d1e/jupyter_client-8.6.3-py3-none-any.whl", hash = "sha256:e8a19cc986cc45905ac3362915f410f3af85424b4c0905e94fa5f2cb08e8f23f", size = 106105, upload-time = "2024-09-17T10:44:15.218Z" }, +] + +[[package]] +name = "jupyter-core" +version = "5.8.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "platformdirs" }, + { name = "pywin32", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'win32'" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/1b/72906d554acfeb588332eaaa6f61577705e9ec752ddb486f302dafa292d9/jupyter_core-5.8.1.tar.gz", hash = "sha256:0a5f9706f70e64786b75acba995988915ebd4601c8a52e534a40b51c95f59941", size = 88923, upload-time = "2025-05-27T07:38:16.655Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2f/57/6bffd4b20b88da3800c5d691e0337761576ee688eb01299eae865689d2df/jupyter_core-5.8.1-py3-none-any.whl", hash = "sha256:c28d268fc90fb53f1338ded2eb410704c5449a358406e8a948b75706e24863d0", size = 28880, upload-time = "2025-05-27T07:38:15.137Z" }, +] + [[package]] name = "kiwisolver" version = "1.4.9" @@ -800,6 +1011,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7d/ca/e7bd1876a341ed8c456095962a582696cac1691cb6e55bd5ead15a755c5d/matplotlib-3.8.4-cp312-cp312-win_amd64.whl", hash = "sha256:7a6769f58ce51791b4cb8b4d7642489df347697cd3e23d88266aaaee93b41d9a", size = 7659712, upload-time = "2024-04-04T01:50:26.938Z" }, ] +[[package]] +name = "matplotlib-inline" +version = "0.1.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/5b/a36a337438a14116b16480db471ad061c36c3694df7c2084a0da7ba538b7/matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90", size = 8159, upload-time = "2024-04-15T13:44:44.803Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899, upload-time = "2024-04-15T13:44:43.265Z" }, +] + [[package]] name = "mccabe" version = "0.7.0" @@ -856,6 +1079,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, ] +[[package]] +name = "nest-asyncio" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/83/f8/51569ac65d696c8ecbee95938f89d4abf00f47d58d48f6fbabfe8f0baefe/nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", size = 7418, upload-time = "2024-01-21T14:25:19.227Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195, upload-time = "2024-01-21T14:25:17.223Z" }, +] + [[package]] name = "networkx" version = "3.5" @@ -1015,6 +1247,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ae/d9/3741b344f57484b423cd22194025a8489992ad9962196a62721ef9980045/pandas-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:f69b0c9bb174a2342818d3e2778584e18c740d56857fc5cdb944ec8bbe4082cf", size = 10498689, upload-time = "2023-12-08T15:38:05.834Z" }, ] +[[package]] +name = "parso" +version = "0.8.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d4/de/53e0bcf53d13e005bd8c92e7855142494f41171b34c2536b86187474184d/parso-0.8.5.tar.gz", hash = "sha256:034d7354a9a018bdce352f48b2a8a450f05e9d6ee85db84764e9b6bd96dafe5a", size = 401205, upload-time = "2025-08-23T15:15:28.028Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/32/f8e3c85d1d5250232a5d3477a2a28cc291968ff175caeadaf3cc19ce0e4a/parso-0.8.5-py2.py3-none-any.whl", hash = "sha256:646204b5ee239c396d040b90f9e272e9a8017c630092bf59980beb62fd033887", size = 106668, upload-time = "2025-08-23T15:15:25.663Z" }, +] + [[package]] name = "pathspec" version = "0.12.1" @@ -1024,6 +1265,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, ] +[[package]] +name = "pexpect" +version = "4.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ptyprocess" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450, upload-time = "2023-11-25T09:07:26.339Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" }, +] + [[package]] name = "pillow" version = "11.3.0" @@ -1126,6 +1379,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "prettytable" +version = "3.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wcwidth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/b1/85e18ac92afd08c533603e3393977b6bc1443043115a47bb094f3b98f94f/prettytable-3.16.0.tar.gz", hash = "sha256:3c64b31719d961bf69c9a7e03d0c1e477320906a98da63952bc6698d6164ff57", size = 66276, upload-time = "2025-03-24T19:39:04.008Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/c7/5613524e606ea1688b3bdbf48aa64bafb6d0a4ac3750274c43b6158a390f/prettytable-3.16.0-py3-none-any.whl", hash = "sha256:b5eccfabb82222f5aa46b798ff02a8452cf530a352c31bddfa29be41242863aa", size = 33863, upload-time = "2025-03-24T19:39:02.359Z" }, +] + +[[package]] +name = "prompt-toolkit" +version = "3.0.52" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wcwidth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/96/06e01a7b38dce6fe1db213e061a4602dd6032a8a97ef6c1a862537732421/prompt_toolkit-3.0.52.tar.gz", hash = "sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855", size = 434198, upload-time = "2025-08-27T15:24:02.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431, upload-time = "2025-08-27T15:23:59.498Z" }, +] + +[[package]] +name = "psutil" +version = "7.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2a/80/336820c1ad9286a4ded7e845b2eccfcb27851ab8ac6abece774a6ff4d3de/psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456", size = 497003, upload-time = "2025-02-13T21:54:07.946Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/e6/2d26234410f8b8abdbf891c9da62bee396583f713fb9f3325a4760875d22/psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25", size = 238051, upload-time = "2025-02-13T21:54:12.36Z" }, + { url = "https://files.pythonhosted.org/packages/04/8b/30f930733afe425e3cbfc0e1468a30a18942350c1a8816acfade80c005c4/psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da", size = 239535, upload-time = "2025-02-13T21:54:16.07Z" }, + { url = "https://files.pythonhosted.org/packages/2a/ed/d362e84620dd22876b55389248e522338ed1bf134a5edd3b8231d7207f6d/psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91", size = 275004, upload-time = "2025-02-13T21:54:18.662Z" }, + { url = "https://files.pythonhosted.org/packages/bf/b9/b0eb3f3cbcb734d930fdf839431606844a825b23eaf9a6ab371edac8162c/psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34", size = 277986, upload-time = "2025-02-13T21:54:21.811Z" }, + { url = "https://files.pythonhosted.org/packages/eb/a2/709e0fe2f093556c17fbafda93ac032257242cabcc7ff3369e2cb76a97aa/psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993", size = 279544, upload-time = "2025-02-13T21:54:24.68Z" }, + { url = "https://files.pythonhosted.org/packages/50/e6/eecf58810b9d12e6427369784efe814a1eec0f492084ce8eb8f4d89d6d61/psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99", size = 241053, upload-time = "2025-02-13T21:54:34.31Z" }, + { url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885, upload-time = "2025-02-13T21:54:37.486Z" }, +] + +[[package]] +name = "ptyprocess" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762, upload-time = "2020-12-28T15:15:30.155Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993, upload-time = "2020-12-28T15:15:28.35Z" }, +] + +[[package]] +name = "pure-eval" +version = "0.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/05/0a34433a064256a578f1783a10da6df098ceaa4a57bbeaa96a6c0352786b/pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42", size = 19752, upload-time = "2024-07-21T12:58:21.801Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842, upload-time = "2024-07-21T12:58:20.04Z" }, +] + [[package]] name = "pyasn1" version = "0.6.1" @@ -1182,6 +1492,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/27/a58ddaf8c588a3ef080db9d0b7e0b97215cee3a45df74f3a94dbbf5c893a/pycodestyle-2.14.0-py2.py3-none-any.whl", hash = "sha256:dd6bf7cb4ee77f8e016f9c8e74a35ddd9f67e1d5fd4184d86c3b98e07099f42d", size = 31594, upload-time = "2025-06-20T18:49:47.491Z" }, ] +[[package]] +name = "pycparser" +version = "2.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/b2/31537cf4b1ca988837256c910a668b553fceb8f069bedc4b1c826024b52c/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", size = 172736, upload-time = "2024-03-30T13:22:22.564Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552, upload-time = "2024-03-30T13:22:20.476Z" }, +] + [[package]] name = "pyflakes" version = "3.4.0" @@ -1268,6 +1587,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, ] +[[package]] +name = "pywin32" +version = "311" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/af/449a6a91e5d6db51420875c54f6aff7c97a86a3b13a0b4f1a5c13b988de3/pywin32-311-cp311-cp311-win32.whl", hash = "sha256:184eb5e436dea364dcd3d2316d577d625c0351bf237c4e9a5fabbcfa5a58b151", size = 8697031, upload-time = "2025-07-14T20:13:13.266Z" }, + { url = "https://files.pythonhosted.org/packages/51/8f/9bb81dd5bb77d22243d33c8397f09377056d5c687aa6d4042bea7fbf8364/pywin32-311-cp311-cp311-win_amd64.whl", hash = "sha256:3ce80b34b22b17ccbd937a6e78e7225d80c52f5ab9940fe0506a1a16f3dab503", size = 9508308, upload-time = "2025-07-14T20:13:15.147Z" }, + { url = "https://files.pythonhosted.org/packages/44/7b/9c2ab54f74a138c491aba1b1cd0795ba61f144c711daea84a88b63dc0f6c/pywin32-311-cp311-cp311-win_arm64.whl", hash = "sha256:a733f1388e1a842abb67ffa8e7aad0e70ac519e09b0f6a784e65a136ec7cefd2", size = 8703930, upload-time = "2025-07-14T20:13:16.945Z" }, + { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" }, + { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" }, + { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" }, + { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" }, + { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" }, + { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" }, + { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" }, + { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" }, +] + [[package]] name = "pyyaml" version = "6.0.2" @@ -1303,6 +1641,64 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" }, ] +[[package]] +name = "pyzmq" +version = "27.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "implementation_name == 'pypy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f8/66/159f38d184f08b5f971b467f87b1ab142ab1320d5200825c824b32b84b66/pyzmq-27.0.2.tar.gz", hash = "sha256:b398dd713b18de89730447347e96a0240225e154db56e35b6bb8447ffdb07798", size = 281440, upload-time = "2025-08-21T04:23:26.334Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/73/034429ab0f4316bf433eb6c20c3f49d1dc13b2ed4e4d951b283d300a0f35/pyzmq-27.0.2-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:063845960df76599ad4fad69fa4d884b3ba38304272104fdcd7e3af33faeeb1d", size = 1333169, upload-time = "2025-08-21T04:21:12.483Z" }, + { url = "https://files.pythonhosted.org/packages/35/02/c42b3b526eb03a570c889eea85a5602797f800a50ba8b09ddbf7db568b78/pyzmq-27.0.2-cp311-cp311-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:845a35fb21b88786aeb38af8b271d41ab0967985410f35411a27eebdc578a076", size = 909176, upload-time = "2025-08-21T04:21:13.835Z" }, + { url = "https://files.pythonhosted.org/packages/1b/35/a1c0b988fabbdf2dc5fe94b7c2bcfd61e3533e5109297b8e0daf1d7a8d2d/pyzmq-27.0.2-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:515d20b5c3c86db95503faa989853a8ab692aab1e5336db011cd6d35626c4cb1", size = 668972, upload-time = "2025-08-21T04:21:15.315Z" }, + { url = "https://files.pythonhosted.org/packages/a0/63/908ac865da32ceaeecea72adceadad28ca25b23a2ca5ff018e5bff30116f/pyzmq-27.0.2-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:862aedec0b0684a5050cdb5ec13c2da96d2f8dffda48657ed35e312a4e31553b", size = 856962, upload-time = "2025-08-21T04:21:16.652Z" }, + { url = "https://files.pythonhosted.org/packages/2f/5a/90b3cc20b65cdf9391896fcfc15d8db21182eab810b7ea05a2986912fbe2/pyzmq-27.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2cb5bcfc51c7a4fce335d3bc974fd1d6a916abbcdd2b25f6e89d37b8def25f57", size = 1657712, upload-time = "2025-08-21T04:21:18.666Z" }, + { url = "https://files.pythonhosted.org/packages/c4/3c/32a5a80f9be4759325b8d7b22ce674bb87e586b4c80c6a9d77598b60d6f0/pyzmq-27.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:38ff75b2a36e3a032e9fef29a5871e3e1301a37464e09ba364e3c3193f62982a", size = 2035054, upload-time = "2025-08-21T04:21:20.073Z" }, + { url = "https://files.pythonhosted.org/packages/13/61/71084fe2ff2d7dc5713f8740d735336e87544845dae1207a8e2e16d9af90/pyzmq-27.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7a5709abe8d23ca158a9d0a18c037f4193f5b6afeb53be37173a41e9fb885792", size = 1894010, upload-time = "2025-08-21T04:21:21.96Z" }, + { url = "https://files.pythonhosted.org/packages/cb/6b/77169cfb13b696e50112ca496b2ed23c4b7d8860a1ec0ff3e4b9f9926221/pyzmq-27.0.2-cp311-cp311-win32.whl", hash = "sha256:47c5dda2018c35d87be9b83de0890cb92ac0791fd59498847fc4eca6ff56671d", size = 566819, upload-time = "2025-08-21T04:21:23.31Z" }, + { url = "https://files.pythonhosted.org/packages/37/cd/86c4083e0f811f48f11bc0ddf1e7d13ef37adfd2fd4f78f2445f1cc5dec0/pyzmq-27.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:f54ca3e98f8f4d23e989c7d0edcf9da7a514ff261edaf64d1d8653dd5feb0a8b", size = 633264, upload-time = "2025-08-21T04:21:24.761Z" }, + { url = "https://files.pythonhosted.org/packages/a0/69/5b8bb6a19a36a569fac02153a9e083738785892636270f5f68a915956aea/pyzmq-27.0.2-cp311-cp311-win_arm64.whl", hash = "sha256:2ef3067cb5b51b090fb853f423ad7ed63836ec154374282780a62eb866bf5768", size = 559316, upload-time = "2025-08-21T04:21:26.1Z" }, + { url = "https://files.pythonhosted.org/packages/68/69/b3a729e7b03e412bee2b1823ab8d22e20a92593634f664afd04c6c9d9ac0/pyzmq-27.0.2-cp312-abi3-macosx_10_15_universal2.whl", hash = "sha256:5da05e3c22c95e23bfc4afeee6ff7d4be9ff2233ad6cb171a0e8257cd46b169a", size = 1305910, upload-time = "2025-08-21T04:21:27.609Z" }, + { url = "https://files.pythonhosted.org/packages/15/b7/f6a6a285193d489b223c340b38ee03a673467cb54914da21c3d7849f1b10/pyzmq-27.0.2-cp312-abi3-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4e4520577971d01d47e2559bb3175fce1be9103b18621bf0b241abe0a933d040", size = 895507, upload-time = "2025-08-21T04:21:29.005Z" }, + { url = "https://files.pythonhosted.org/packages/17/e6/c4ed2da5ef9182cde1b1f5d0051a986e76339d71720ec1a00be0b49275ad/pyzmq-27.0.2-cp312-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56d7de7bf73165b90bd25a8668659ccb134dd28449116bf3c7e9bab5cf8a8ec9", size = 652670, upload-time = "2025-08-21T04:21:30.71Z" }, + { url = "https://files.pythonhosted.org/packages/0e/66/d781ab0636570d32c745c4e389b1c6b713115905cca69ab6233508622edd/pyzmq-27.0.2-cp312-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:340e7cddc32f147c6c00d116a3f284ab07ee63dbd26c52be13b590520434533c", size = 840581, upload-time = "2025-08-21T04:21:32.008Z" }, + { url = "https://files.pythonhosted.org/packages/a6/df/f24790caf565d72544f5c8d8500960b9562c1dc848d6f22f3c7e122e73d4/pyzmq-27.0.2-cp312-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ba95693f9df8bb4a9826464fb0fe89033936f35fd4a8ff1edff09a473570afa0", size = 1641931, upload-time = "2025-08-21T04:21:33.371Z" }, + { url = "https://files.pythonhosted.org/packages/65/65/77d27b19fc5e845367f9100db90b9fce924f611b14770db480615944c9c9/pyzmq-27.0.2-cp312-abi3-musllinux_1_2_i686.whl", hash = "sha256:ca42a6ce2d697537da34f77a1960d21476c6a4af3e539eddb2b114c3cf65a78c", size = 2021226, upload-time = "2025-08-21T04:21:35.301Z" }, + { url = "https://files.pythonhosted.org/packages/5b/65/1ed14421ba27a4207fa694772003a311d1142b7f543179e4d1099b7eb746/pyzmq-27.0.2-cp312-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3e44e665d78a07214b2772ccbd4b9bcc6d848d7895f1b2d7653f047b6318a4f6", size = 1878047, upload-time = "2025-08-21T04:21:36.749Z" }, + { url = "https://files.pythonhosted.org/packages/dd/dc/e578549b89b40dc78a387ec471c2a360766690c0a045cd8d1877d401012d/pyzmq-27.0.2-cp312-abi3-win32.whl", hash = "sha256:272d772d116615397d2be2b1417b3b8c8bc8671f93728c2f2c25002a4530e8f6", size = 558757, upload-time = "2025-08-21T04:21:38.2Z" }, + { url = "https://files.pythonhosted.org/packages/b5/89/06600980aefcc535c758414da969f37a5194ea4cdb73b745223f6af3acfb/pyzmq-27.0.2-cp312-abi3-win_amd64.whl", hash = "sha256:734be4f44efba0aa69bf5f015ed13eb69ff29bf0d17ea1e21588b095a3147b8e", size = 619281, upload-time = "2025-08-21T04:21:39.909Z" }, + { url = "https://files.pythonhosted.org/packages/30/84/df8a5c089552d17c9941d1aea4314b606edf1b1622361dae89aacedc6467/pyzmq-27.0.2-cp312-abi3-win_arm64.whl", hash = "sha256:41f0bd56d9279392810950feb2785a419c2920bbf007fdaaa7f4a07332ae492d", size = 552680, upload-time = "2025-08-21T04:21:41.571Z" }, + { url = "https://files.pythonhosted.org/packages/b4/7b/b79e976508517ab80dc800f7021ef1fb602a6d55e4caa2d47fb3dca5d8b6/pyzmq-27.0.2-cp313-cp313-android_24_arm64_v8a.whl", hash = "sha256:7f01118133427cd7f34ee133b5098e2af5f70303fa7519785c007bca5aa6f96a", size = 1122259, upload-time = "2025-08-21T04:21:43.063Z" }, + { url = "https://files.pythonhosted.org/packages/2b/1c/777217b9940ebcb7e71c924184ca5f31e410580a58d9fd93798589f0d31c/pyzmq-27.0.2-cp313-cp313-android_24_x86_64.whl", hash = "sha256:e4b860edf6379a7234ccbb19b4ed2c57e3ff569c3414fadfb49ae72b61a8ef07", size = 1156113, upload-time = "2025-08-21T04:21:44.566Z" }, + { url = "https://files.pythonhosted.org/packages/59/7d/654657a4c6435f41538182e71b61eac386a789a2bbb6f30171915253a9a7/pyzmq-27.0.2-cp313-cp313t-macosx_10_15_universal2.whl", hash = "sha256:cb77923ea163156da14295c941930bd525df0d29c96c1ec2fe3c3806b1e17cb3", size = 1341437, upload-time = "2025-08-21T04:21:46.019Z" }, + { url = "https://files.pythonhosted.org/packages/20/a0/5ed7710037f9c096017adc748bcb1698674a2d297f8b9422d38816f7b56a/pyzmq-27.0.2-cp313-cp313t-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:61678b7407b04df8f9423f188156355dc94d0fb52d360ae79d02ed7e0d431eea", size = 897888, upload-time = "2025-08-21T04:21:47.362Z" }, + { url = "https://files.pythonhosted.org/packages/2c/8a/6e4699a60931c17e7406641d201d7f2c121e2a38979bc83226a6d8f1ba32/pyzmq-27.0.2-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e3c824b70925963bdc8e39a642672c15ffaa67e7d4b491f64662dd56d6271263", size = 660727, upload-time = "2025-08-21T04:21:48.734Z" }, + { url = "https://files.pythonhosted.org/packages/7b/d8/d761e438c186451bd89ce63a665cde5690c084b61cd8f5d7b51e966e875a/pyzmq-27.0.2-cp313-cp313t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c4833e02fcf2751975457be1dfa2f744d4d09901a8cc106acaa519d868232175", size = 848136, upload-time = "2025-08-21T04:21:50.416Z" }, + { url = "https://files.pythonhosted.org/packages/43/f1/a0f31684efdf3eb92f46b7dd2117e752208115e89d278f8ca5f413c5bb85/pyzmq-27.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b18045668d09cf0faa44918af2a67f0dbbef738c96f61c2f1b975b1ddb92ccfc", size = 1650402, upload-time = "2025-08-21T04:21:52.235Z" }, + { url = "https://files.pythonhosted.org/packages/41/fd/0d7f2a1732812df02c85002770da4a7864c79b210084bcdab01ea57e8d92/pyzmq-27.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:bbbb7e2f3ac5a22901324e7b086f398b8e16d343879a77b15ca3312e8cd8e6d5", size = 2024587, upload-time = "2025-08-21T04:21:54.07Z" }, + { url = "https://files.pythonhosted.org/packages/f1/73/358be69e279a382dd09e46dda29df8446365cddee4f79ef214e71e5b2b5a/pyzmq-27.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:b751914a73604d40d88a061bab042a11d4511b3ddbb7624cd83c39c8a498564c", size = 1885493, upload-time = "2025-08-21T04:21:55.588Z" }, + { url = "https://files.pythonhosted.org/packages/c5/7b/e9951ad53b3dfed8cfb4c2cfd6e0097c9b454e5c0d0e6df5f2b60d7c8c3d/pyzmq-27.0.2-cp313-cp313t-win32.whl", hash = "sha256:3e8f833dd82af11db5321c414638045c70f61009f72dd61c88db4a713c1fb1d2", size = 574934, upload-time = "2025-08-21T04:21:57.52Z" }, + { url = "https://files.pythonhosted.org/packages/55/33/1a7fc3a92f2124a63e6e2a6afa0af471a5c0c713e776b476d4eda5111b13/pyzmq-27.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:5b45153cb8eadcab14139970643a84f7a7b08dda541fbc1f6f4855c49334b549", size = 640932, upload-time = "2025-08-21T04:21:59.527Z" }, + { url = "https://files.pythonhosted.org/packages/2a/52/2598a94ac251a7c83f3887866225eea1952b0d4463a68df5032eb00ff052/pyzmq-27.0.2-cp313-cp313t-win_arm64.whl", hash = "sha256:86898f5c9730df23427c1ee0097d8aa41aa5f89539a79e48cd0d2c22d059f1b7", size = 561315, upload-time = "2025-08-21T04:22:01.295Z" }, + { url = "https://files.pythonhosted.org/packages/42/7d/10ef02ea36590b29d48ef88eb0831f0af3eb240cccca2752556faec55f59/pyzmq-27.0.2-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:d2b4b261dce10762be5c116b6ad1f267a9429765b493c454f049f33791dd8b8a", size = 1341463, upload-time = "2025-08-21T04:22:02.712Z" }, + { url = "https://files.pythonhosted.org/packages/94/36/115d18dade9a3d4d3d08dd8bfe5459561b8e02815f99df040555fdd7768e/pyzmq-27.0.2-cp314-cp314t-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:4e4d88b6cff156fed468903006b24bbd85322612f9c2f7b96e72d5016fd3f543", size = 897840, upload-time = "2025-08-21T04:22:04.845Z" }, + { url = "https://files.pythonhosted.org/packages/39/66/083b37839b95c386a95f1537bb41bdbf0c002b7c55b75ee737949cecb11f/pyzmq-27.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8426c0ebbc11ed8416a6e9409c194142d677c2c5c688595f2743664e356d9e9b", size = 660704, upload-time = "2025-08-21T04:22:06.389Z" }, + { url = "https://files.pythonhosted.org/packages/76/5a/196ab46e549ba35bf3268f575e10cfac0dc86b78dcaa7a3e36407ecda752/pyzmq-27.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:565bee96a155fe6452caed5fb5f60c9862038e6b51a59f4f632562081cdb4004", size = 848037, upload-time = "2025-08-21T04:22:07.817Z" }, + { url = "https://files.pythonhosted.org/packages/70/ea/a27b9eb44b2e615a9ecb8510ebb023cc1d2d251181e4a1e50366bfbf94d6/pyzmq-27.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5de735c745ca5cefe9c2d1547d8f28cfe1b1926aecb7483ab1102fd0a746c093", size = 1650278, upload-time = "2025-08-21T04:22:09.269Z" }, + { url = "https://files.pythonhosted.org/packages/62/ac/3e9af036bfaf718ab5e69ded8f6332da392c5450ad43e8e3ca66797f145a/pyzmq-27.0.2-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:ea4f498f8115fd90d7bf03a3e83ae3e9898e43362f8e8e8faec93597206e15cc", size = 2024504, upload-time = "2025-08-21T04:22:10.778Z" }, + { url = "https://files.pythonhosted.org/packages/ae/e9/3202d31788df8ebaa176b23d846335eb9c768d8b43c0506bbd6265ad36a0/pyzmq-27.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d00e81cb0afd672915257a3927124ee2ad117ace3c256d39cd97ca3f190152ad", size = 1885381, upload-time = "2025-08-21T04:22:12.718Z" }, + { url = "https://files.pythonhosted.org/packages/4b/ed/42de80b7ab4e8fcf13376f81206cf8041740672ac1fd2e1c598d63f595bf/pyzmq-27.0.2-cp314-cp314t-win32.whl", hash = "sha256:0f6e9b00d81b58f859fffc112365d50413954e02aefe36c5b4c8fb4af79f8cc3", size = 587526, upload-time = "2025-08-21T04:22:14.18Z" }, + { url = "https://files.pythonhosted.org/packages/ed/c8/8f3c72d6f0bfbf090aa5e283576073ca5c59839b85a5cc8c66ddb9b59801/pyzmq-27.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:2e73cf3b127a437fef4100eb3ac2ebe6b49e655bb721329f667f59eca0a26221", size = 661368, upload-time = "2025-08-21T04:22:15.677Z" }, + { url = "https://files.pythonhosted.org/packages/69/a4/7ee652ea1c77d872f5d99ed937fa8bbd1f6f4b7a39a6d3a0076c286e0c3e/pyzmq-27.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:4108785f2e5ac865d06f678a07a1901e3465611356df21a545eeea8b45f56265", size = 574901, upload-time = "2025-08-21T04:22:17.423Z" }, + { url = "https://files.pythonhosted.org/packages/c7/60/027d0032a1e3b1aabcef0e309b9ff8a4099bdd5a60ab38b36a676ff2bd7b/pyzmq-27.0.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e297784aea724294fe95e442e39a4376c2f08aa4fae4161c669f047051e31b02", size = 836007, upload-time = "2025-08-21T04:23:00.447Z" }, + { url = "https://files.pythonhosted.org/packages/25/20/2ed1e6168aaea323df9bb2c451309291f53ba3af372ffc16edd4ce15b9e5/pyzmq-27.0.2-pp311-pypy311_pp73-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:e3659a79ded9745bc9c2aef5b444ac8805606e7bc50d2d2eb16dc3ab5483d91f", size = 799932, upload-time = "2025-08-21T04:23:02.052Z" }, + { url = "https://files.pythonhosted.org/packages/fd/25/5c147307de546b502c9373688ce5b25dc22288d23a1ebebe5d587bf77610/pyzmq-27.0.2-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f3dba49ff037d02373a9306b58d6c1e0be031438f822044e8767afccfdac4c6b", size = 567459, upload-time = "2025-08-21T04:23:03.593Z" }, + { url = "https://files.pythonhosted.org/packages/71/06/0dc56ffc615c8095cd089c9b98ce5c733e990f09ce4e8eea4aaf1041a532/pyzmq-27.0.2-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de84e1694f9507b29e7b263453a2255a73e3d099d258db0f14539bad258abe41", size = 747088, upload-time = "2025-08-21T04:23:05.334Z" }, + { url = "https://files.pythonhosted.org/packages/06/f6/4a50187e023b8848edd3f0a8e197b1a7fb08d261d8c60aae7cb6c3d71612/pyzmq-27.0.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f0944d65ba2b872b9fcece08411d6347f15a874c775b4c3baae7f278550da0fb", size = 544639, upload-time = "2025-08-21T04:23:07.279Z" }, +] + [[package]] name = "quitefastmst" version = "0.9.0" @@ -1424,6 +1820,7 @@ dev = [ { name = "black" }, { name = "click" }, { name = "flake8" }, + { name = "ipykernel" }, { name = "mypy" }, { name = "pytest" }, { name = "pytest-cov" }, @@ -1441,6 +1838,7 @@ requires-dist = [ { name = "genieclust", specifier = ">=1.1.4,<2" }, { name = "hyperopt", git = "https://github.com/hyperopt/hyperopt.git" }, { name = "importlib-metadata", marker = "python_full_version < '3.10'", specifier = ">=4.13" }, + { name = "ipykernel", marker = "extra == 'dev'" }, { name = "jellyfish", specifier = ">=0.9,<2" }, { name = "lightgbm", specifier = "==3.2.1" }, { name = "matplotlib", specifier = ">=3.7,<3.9" }, @@ -1453,10 +1851,10 @@ requires-dist = [ { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4,<6" }, { name = "requests", specifier = ">=2.28,<3" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4,<0.7" }, - { name = "scikit-learn", specifier = ">=1.2,<1.5" }, + { name = "scikit-learn", specifier = "==1.7.1" }, { name = "seaborn", specifier = ">=0.12,<0.14" }, { name = "shap" }, - { name = "sinonym" }, + { name = "sinonym", specifier = ">=0.2.0" }, { name = "strsimpy", specifier = ">=0.2,<0.3" }, { name = "text-unidecode", specifier = "==1.3" }, { name = "tqdm", specifier = ">=4.64,<5" }, @@ -1477,7 +1875,7 @@ wheels = [ [[package]] name = "scikit-learn" -version = "1.4.2" +version = "1.7.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "joblib" }, @@ -1485,18 +1883,28 @@ dependencies = [ { name = "scipy" }, { name = "threadpoolctl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ef/e5/c09d20723bfd91315f6f4ddc77912b0dcc09588b4ca7ad2ffa204607ad7f/scikit-learn-1.4.2.tar.gz", hash = "sha256:daa1c471d95bad080c6e44b4946c9390a4842adc3082572c20e4f8884e39e959", size = 7763055, upload-time = "2024-04-09T19:54:06.726Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/59/11/63de36e6933b03490fdfe5cbc9b5a68870a1281d8e705a23b33076dc82fb/scikit_learn-1.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:45dee87ac5309bb82e3ea633955030df9bbcb8d2cdb30383c6cd483691c546cc", size = 11558461, upload-time = "2024-04-09T19:53:22.402Z" }, - { url = "https://files.pythonhosted.org/packages/f2/30/1299e84d2ba3bc735baf17cebbf5b9d55144243c41b3ec6559ce3cf61e23/scikit_learn-1.4.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1d0b25d9c651fd050555aadd57431b53d4cf664e749069da77f3d52c5ad14b3b", size = 10451621, upload-time = "2024-04-09T19:53:25.577Z" }, - { url = "https://files.pythonhosted.org/packages/cc/6d/2b03edb51e688db0dc2958ab18edf71c8cc313172636cbdc0b1fc7670777/scikit_learn-1.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0203c368058ab92efc6168a1507d388d41469c873e96ec220ca8e74079bf62e", size = 11523470, upload-time = "2024-04-09T19:53:29.433Z" }, - { url = "https://files.pythonhosted.org/packages/4e/53/14405a47292b59235d811a2af8634aba188ccfd1a38ef4b8042f3447d79a/scikit_learn-1.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44c62f2b124848a28fd695db5bc4da019287abf390bfce602ddc8aa1ec186aae", size = 12146964, upload-time = "2024-04-09T19:53:32.662Z" }, - { url = "https://files.pythonhosted.org/packages/79/3d/02d5d3ed359498fec3abdf65407d3c07e3b8765af17464969055aaec5171/scikit_learn-1.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:5cd7b524115499b18b63f0c96f4224eb885564937a0b3477531b2b63ce331904", size = 10602955, upload-time = "2024-04-09T19:53:35.147Z" }, - { url = "https://files.pythonhosted.org/packages/81/3f/bdd6c812eb5356410ed26a673f80670138c24eea1ea7c484da022783cc28/scikit_learn-1.4.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:90378e1747949f90c8f385898fff35d73193dfcaec3dd75d6b542f90c4e89755", size = 11555151, upload-time = "2024-04-09T19:53:37.797Z" }, - { url = "https://files.pythonhosted.org/packages/fc/f1/7028da970a41c542a0f3a2234f78040c820dae87ed7e949cec9f585f2b1a/scikit_learn-1.4.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ff4effe5a1d4e8fed260a83a163f7dbf4f6087b54528d8880bab1d1377bd78be", size = 10462894, upload-time = "2024-04-09T19:53:41.271Z" }, - { url = "https://files.pythonhosted.org/packages/5d/ce/8937a0c6afd79f3486f39361ff58dd299ca1b19deb6b9deb59fe510d212f/scikit_learn-1.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:671e2f0c3f2c15409dae4f282a3a619601fa824d2c820e5b608d9d775f91780c", size = 11507077, upload-time = "2024-04-09T19:53:43.937Z" }, - { url = "https://files.pythonhosted.org/packages/bc/f6/761881cb1cec60874be76831571c76d596bcf3d13959390e73f4c745086f/scikit_learn-1.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d36d0bc983336bbc1be22f9b686b50c964f593c8a9a913a792442af9bf4f5e68", size = 12247981, upload-time = "2024-04-09T19:53:46.531Z" }, - { url = "https://files.pythonhosted.org/packages/40/77/91f92b2fddbd14201bf36cd0c0e7279f1501a88e7a00ef11261c4b95bb7a/scikit_learn-1.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:d762070980c17ba3e9a4a1e043ba0518ce4c55152032f1af0ca6f39b376b5928", size = 10600450, upload-time = "2024-04-09T19:53:49.637Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/41/84/5f4af978fff619706b8961accac84780a6d298d82a8873446f72edb4ead0/scikit_learn-1.7.1.tar.gz", hash = "sha256:24b3f1e976a4665aa74ee0fcaac2b8fccc6ae77c8e07ab25da3ba6d3292b9802", size = 7190445, upload-time = "2025-07-18T08:01:54.5Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/bd/a23177930abd81b96daffa30ef9c54ddbf544d3226b8788ce4c3ef1067b4/scikit_learn-1.7.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90c8494ea23e24c0fb371afc474618c1019dc152ce4a10e4607e62196113851b", size = 9334838, upload-time = "2025-07-18T08:01:11.239Z" }, + { url = "https://files.pythonhosted.org/packages/8d/a1/d3a7628630a711e2ac0d1a482910da174b629f44e7dd8cfcd6924a4ef81a/scikit_learn-1.7.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:bb870c0daf3bf3be145ec51df8ac84720d9972170786601039f024bf6d61a518", size = 8651241, upload-time = "2025-07-18T08:01:13.234Z" }, + { url = "https://files.pythonhosted.org/packages/26/92/85ec172418f39474c1cd0221d611345d4f433fc4ee2fc68e01f524ccc4e4/scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:40daccd1b5623f39e8943ab39735cadf0bdce80e67cdca2adcb5426e987320a8", size = 9718677, upload-time = "2025-07-18T08:01:15.649Z" }, + { url = "https://files.pythonhosted.org/packages/df/ce/abdb1dcbb1d2b66168ec43b23ee0cee356b4cc4100ddee3943934ebf1480/scikit_learn-1.7.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:30d1f413cfc0aa5a99132a554f1d80517563c34a9d3e7c118fde2d273c6fe0f7", size = 9511189, upload-time = "2025-07-18T08:01:18.013Z" }, + { url = "https://files.pythonhosted.org/packages/b2/3b/47b5eaee01ef2b5a80ba3f7f6ecf79587cb458690857d4777bfd77371c6f/scikit_learn-1.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:c711d652829a1805a95d7fe96654604a8f16eab5a9e9ad87b3e60173415cb650", size = 8914794, upload-time = "2025-07-18T08:01:20.357Z" }, + { url = "https://files.pythonhosted.org/packages/cb/16/57f176585b35ed865f51b04117947fe20f130f78940c6477b6d66279c9c2/scikit_learn-1.7.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3cee419b49b5bbae8796ecd690f97aa412ef1674410c23fc3257c6b8b85b8087", size = 9260431, upload-time = "2025-07-18T08:01:22.77Z" }, + { url = "https://files.pythonhosted.org/packages/67/4e/899317092f5efcab0e9bc929e3391341cec8fb0e816c4789686770024580/scikit_learn-1.7.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2fd8b8d35817b0d9ebf0b576f7d5ffbbabdb55536b0655a8aaae629d7ffd2e1f", size = 8637191, upload-time = "2025-07-18T08:01:24.731Z" }, + { url = "https://files.pythonhosted.org/packages/f3/1b/998312db6d361ded1dd56b457ada371a8d8d77ca2195a7d18fd8a1736f21/scikit_learn-1.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:588410fa19a96a69763202f1d6b7b91d5d7a5d73be36e189bc6396bfb355bd87", size = 9486346, upload-time = "2025-07-18T08:01:26.713Z" }, + { url = "https://files.pythonhosted.org/packages/ad/09/a2aa0b4e644e5c4ede7006748f24e72863ba2ae71897fecfd832afea01b4/scikit_learn-1.7.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e3142f0abe1ad1d1c31a2ae987621e41f6b578144a911ff4ac94781a583adad7", size = 9290988, upload-time = "2025-07-18T08:01:28.938Z" }, + { url = "https://files.pythonhosted.org/packages/15/fa/c61a787e35f05f17fc10523f567677ec4eeee5f95aa4798dbbbcd9625617/scikit_learn-1.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:3ddd9092c1bd469acab337d87930067c87eac6bd544f8d5027430983f1e1ae88", size = 8735568, upload-time = "2025-07-18T08:01:30.936Z" }, + { url = "https://files.pythonhosted.org/packages/52/f8/e0533303f318a0f37b88300d21f79b6ac067188d4824f1047a37214ab718/scikit_learn-1.7.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b7839687fa46d02e01035ad775982f2470be2668e13ddd151f0f55a5bf123bae", size = 9213143, upload-time = "2025-07-18T08:01:32.942Z" }, + { url = "https://files.pythonhosted.org/packages/71/f3/f1df377d1bdfc3e3e2adc9c119c238b182293e6740df4cbeac6de2cc3e23/scikit_learn-1.7.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a10f276639195a96c86aa572ee0698ad64ee939a7b042060b98bd1930c261d10", size = 8591977, upload-time = "2025-07-18T08:01:34.967Z" }, + { url = "https://files.pythonhosted.org/packages/99/72/c86a4cd867816350fe8dee13f30222340b9cd6b96173955819a5561810c5/scikit_learn-1.7.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:13679981fdaebc10cc4c13c43344416a86fcbc61449cb3e6517e1df9d12c8309", size = 9436142, upload-time = "2025-07-18T08:01:37.397Z" }, + { url = "https://files.pythonhosted.org/packages/e8/66/277967b29bd297538dc7a6ecfb1a7dce751beabd0d7f7a2233be7a4f7832/scikit_learn-1.7.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f1262883c6a63f067a980a8cdd2d2e7f2513dddcef6a9eaada6416a7a7cbe43", size = 9282996, upload-time = "2025-07-18T08:01:39.721Z" }, + { url = "https://files.pythonhosted.org/packages/e2/47/9291cfa1db1dae9880420d1e07dbc7e8dd4a7cdbc42eaba22512e6bde958/scikit_learn-1.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:ca6d31fb10e04d50bfd2b50d66744729dbb512d4efd0223b864e2fdbfc4cee11", size = 8707418, upload-time = "2025-07-18T08:01:42.124Z" }, + { url = "https://files.pythonhosted.org/packages/61/95/45726819beccdaa34d3362ea9b2ff9f2b5d3b8bf721bd632675870308ceb/scikit_learn-1.7.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:781674d096303cfe3d351ae6963ff7c958db61cde3421cd490e3a5a58f2a94ae", size = 9561466, upload-time = "2025-07-18T08:01:44.195Z" }, + { url = "https://files.pythonhosted.org/packages/ee/1c/6f4b3344805de783d20a51eb24d4c9ad4b11a7f75c1801e6ec6d777361fd/scikit_learn-1.7.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:10679f7f125fe7ecd5fad37dd1aa2daae7e3ad8df7f3eefa08901b8254b3e12c", size = 9040467, upload-time = "2025-07-18T08:01:46.671Z" }, + { url = "https://files.pythonhosted.org/packages/6f/80/abe18fe471af9f1d181904203d62697998b27d9b62124cd281d740ded2f9/scikit_learn-1.7.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1f812729e38c8cb37f760dce71a9b83ccfb04f59b3dca7c6079dcdc60544fa9e", size = 9532052, upload-time = "2025-07-18T08:01:48.676Z" }, + { url = "https://files.pythonhosted.org/packages/14/82/b21aa1e0c4cee7e74864d3a5a721ab8fcae5ca55033cb6263dca297ed35b/scikit_learn-1.7.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:88e1a20131cf741b84b89567e1717f27a2ced228e0f29103426102bc2e3b8ef7", size = 9361575, upload-time = "2025-07-18T08:01:50.639Z" }, + { url = "https://files.pythonhosted.org/packages/f2/20/f4777fcd5627dc6695fa6b92179d0edb7a3ac1b91bcd9a1c7f64fa7ade23/scikit_learn-1.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:b1bd1d919210b6a10b7554b717c9000b5485aa95a1d0f177ae0d7ee8ec750da5", size = 9277310, upload-time = "2025-07-18T08:01:52.547Z" }, ] [[package]] @@ -1627,7 +2035,7 @@ wheels = [ [[package]] name = "sinonym" -version = "0.1.1" +version = "0.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "joblib" }, @@ -1636,10 +2044,11 @@ dependencies = [ { name = "requests" }, { name = "scikit-learn" }, { name = "scipy" }, + { name = "skops" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d8/14/1536a93f700f219930cd799222f1a797c4f5fd6ad30a9246012b5ad5e1b7/sinonym-0.1.1.tar.gz", hash = "sha256:bf50113968d109a490de94837eac5422d76b5c333060ff18a5ec5741cb51fc24", size = 265739, upload-time = "2025-09-01T00:28:42.682Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9d/54/72167a80d79867baaba0769523293a829853855d7025b9747d3088302630/sinonym-0.2.0.tar.gz", hash = "sha256:0f3ad3c9be5d18d9f493d5cc197a743848ec50eabf46793d877dad280ce4407a", size = 555811, upload-time = "2025-09-08T16:34:19.268Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/33/72/69258cafa6e22eecdcab461476df979889fbd904e964482546bb2e30a7ea/sinonym-0.1.1-py3-none-any.whl", hash = "sha256:dcbd3261e744896f768088524c279d4436624121961e8197d75b8ce69bdb0bea", size = 245679, upload-time = "2025-09-01T00:28:40.789Z" }, + { url = "https://files.pythonhosted.org/packages/20/9c/f326c80d2ae15982c323088c2a991f0dc0605ff20d7a20f78b40b4450162/sinonym-0.2.0-py3-none-any.whl", hash = "sha256:82f59c11ddc08f04d207454dd9560fe1fa58ba30d538359dbc2cc3065b7f322e", size = 549274, upload-time = "2025-09-08T16:34:17.546Z" }, ] [[package]] @@ -1651,6 +2060,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "skops" +version = "0.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "packaging" }, + { name = "prettytable" }, + { name = "scikit-learn" }, + { name = "scipy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b5/0c/5ec987633e077dd0076178ea6ade2d6e57780b34afea0b497fb507d7a1ed/skops-0.13.0.tar.gz", hash = "sha256:66949fd3c95cbb5c80270fbe40293c0fe1e46cb4a921860e42584dd9c20ebeb1", size = 581312, upload-time = "2025-08-06T09:48:14.916Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/e8/6a2b2030f0689f894432b9c2f0357f2f3286b2a00474827e04b8fe9eea13/skops-0.13.0-py3-none-any.whl", hash = "sha256:55e2cccb18c86f5916e4cfe5acf55ed7b0eecddf08a151906414c092fa5926dc", size = 131200, upload-time = "2025-08-06T09:48:13.356Z" }, +] + [[package]] name = "slicer" version = "0.0.8" @@ -1660,6 +2085,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/63/81/9ef641ff4e12cbcca30e54e72fb0951a2ba195d0cda0ba4100e532d929db/slicer-0.0.8-py3-none-any.whl", hash = "sha256:6c206258543aecd010d497dc2eca9d2805860a0b3758673903456b7df7934dc3", size = 15251, upload-time = "2024-03-09T07:03:07.708Z" }, ] +[[package]] +name = "stack-data" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asttokens" }, + { name = "executing" }, + { name = "pure-eval" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/28/e3/55dcc2cfbc3ca9c29519eb6884dd1415ecb53b0e934862d3559ddcb7e20b/stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", size = 44707, upload-time = "2023-09-30T13:58:05.479Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521, upload-time = "2023-09-30T13:58:03.53Z" }, +] + [[package]] name = "strsimpy" version = "0.2.1" @@ -1726,6 +2165,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257, upload-time = "2024-11-27T22:38:35.385Z" }, ] +[[package]] +name = "tornado" +version = "6.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/09/ce/1eb500eae19f4648281bb2186927bb062d2438c2e5093d1360391afd2f90/tornado-6.5.2.tar.gz", hash = "sha256:ab53c8f9a0fa351e2c0741284e06c7a45da86afb544133201c5cc8578eb076a0", size = 510821, upload-time = "2025-08-08T18:27:00.78Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f6/48/6a7529df2c9cc12efd2e8f5dd219516184d703b34c06786809670df5b3bd/tornado-6.5.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2436822940d37cde62771cff8774f4f00b3c8024fe482e16ca8387b8a2724db6", size = 442563, upload-time = "2025-08-08T18:26:42.945Z" }, + { url = "https://files.pythonhosted.org/packages/f2/b5/9b575a0ed3e50b00c40b08cbce82eb618229091d09f6d14bce80fc01cb0b/tornado-6.5.2-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:583a52c7aa94ee046854ba81d9ebb6c81ec0fd30386d96f7640c96dad45a03ef", size = 440729, upload-time = "2025-08-08T18:26:44.473Z" }, + { url = "https://files.pythonhosted.org/packages/1b/4e/619174f52b120efcf23633c817fd3fed867c30bff785e2cd5a53a70e483c/tornado-6.5.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0fe179f28d597deab2842b86ed4060deec7388f1fd9c1b4a41adf8af058907e", size = 444295, upload-time = "2025-08-08T18:26:46.021Z" }, + { url = "https://files.pythonhosted.org/packages/95/fa/87b41709552bbd393c85dd18e4e3499dcd8983f66e7972926db8d96aa065/tornado-6.5.2-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b186e85d1e3536d69583d2298423744740986018e393d0321df7340e71898882", size = 443644, upload-time = "2025-08-08T18:26:47.625Z" }, + { url = "https://files.pythonhosted.org/packages/f9/41/fb15f06e33d7430ca89420283a8762a4e6b8025b800ea51796ab5e6d9559/tornado-6.5.2-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e792706668c87709709c18b353da1f7662317b563ff69f00bab83595940c7108", size = 443878, upload-time = "2025-08-08T18:26:50.599Z" }, + { url = "https://files.pythonhosted.org/packages/11/92/fe6d57da897776ad2e01e279170ea8ae726755b045fe5ac73b75357a5a3f/tornado-6.5.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:06ceb1300fd70cb20e43b1ad8aaee0266e69e7ced38fa910ad2e03285009ce7c", size = 444549, upload-time = "2025-08-08T18:26:51.864Z" }, + { url = "https://files.pythonhosted.org/packages/9b/02/c8f4f6c9204526daf3d760f4aa555a7a33ad0e60843eac025ccfd6ff4a93/tornado-6.5.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:74db443e0f5251be86cbf37929f84d8c20c27a355dd452a5cfa2aada0d001ec4", size = 443973, upload-time = "2025-08-08T18:26:53.625Z" }, + { url = "https://files.pythonhosted.org/packages/ae/2d/f5f5707b655ce2317190183868cd0f6822a1121b4baeae509ceb9590d0bd/tornado-6.5.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b5e735ab2889d7ed33b32a459cac490eda71a1ba6857b0118de476ab6c366c04", size = 443954, upload-time = "2025-08-08T18:26:55.072Z" }, + { url = "https://files.pythonhosted.org/packages/e8/59/593bd0f40f7355806bf6573b47b8c22f8e1374c9b6fd03114bd6b7a3dcfd/tornado-6.5.2-cp39-abi3-win32.whl", hash = "sha256:c6f29e94d9b37a95013bb669616352ddb82e3bfe8326fccee50583caebc8a5f0", size = 445023, upload-time = "2025-08-08T18:26:56.677Z" }, + { url = "https://files.pythonhosted.org/packages/c7/2a/f609b420c2f564a748a2d80ebfb2ee02a73ca80223af712fca591386cafb/tornado-6.5.2-cp39-abi3-win_amd64.whl", hash = "sha256:e56a5af51cc30dd2cae649429af65ca2f6571da29504a07995175df14c18f35f", size = 445427, upload-time = "2025-08-08T18:26:57.91Z" }, + { url = "https://files.pythonhosted.org/packages/5e/4f/e1f65e8f8c76d73658b33d33b81eed4322fb5085350e4328d5c956f0c8f9/tornado-6.5.2-cp39-abi3-win_arm64.whl", hash = "sha256:d6c33dc3672e3a1f3618eb63b7ef4683a7688e7b9e6e8f0d9aa5726360a004af", size = 444456, upload-time = "2025-08-08T18:26:59.207Z" }, +] + [[package]] name = "tqdm" version = "4.67.1" @@ -1738,6 +2196,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, ] +[[package]] +name = "traitlets" +version = "5.14.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621, upload-time = "2024-04-19T11:11:49.746Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -1765,6 +2232,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" }, ] +[[package]] +name = "wcwidth" +version = "0.2.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301, upload-time = "2024-01-06T02:10:57.829Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166, upload-time = "2024-01-06T02:10:55.763Z" }, +] + [[package]] name = "wheel" version = "0.45.1"