Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/claude-docs/bingo-elastic-python.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ The standalone Python library at `bingo/bingo-elastic/python/` that indexes Indi
- `bingo_elastic/elastic.py` — `ElasticRepository` and `AsyncElasticRepository` (parallel sync/async classes, both take a `tau_search: bool` flag), `IndexName` enum (`BINGO_MOLECULE`, `BINGO_REACTION`, `BINGO_CUSTOM`), `build_index_body(tau_search)` builder for the index mapping, and `compile_query` (dispatches a query subject + kwargs to the right query class; reroutes substructure to the tautomer path when `options` contains `TAU`).
- `bingo_elastic/queries.py` — `CompilableQuery` hierarchy: `SubstructureQuery`, `TautomerSubstructureQuery` (subclass swapping in the `sub-tau` fingerprint and `tau_fingerprint` field), `ExactMatch`, similarity matches (`TanimotoSimilarityMatch`, `EuclidSimilarityMatch`, `TverskySimilarityMatch`), plus `KeywordQuery`, `RangeQuery`, `WildcardQuery` for non-chemical fields. `query_factory` maps kwarg keys (`"substructure"`, `"tautomer"`, `"exact"`, …) to a class.
- `bingo_elastic/model/record.py` — `IndigoRecord` (abstract), `IndigoRecordMolecule`, `IndigoRecordReaction`, and the `WithIndigoObject` descriptor that extracts fingerprints + `cmf` + `hash` from an `IndigoObject` at construction time. The descriptor also computes the `sub-tau` fingerprint when the record was built with `tau_search=True`.
- `bingo_elastic/model/helpers.py` — file iterators (`iterate_file`, `load_reaction`).
- `bingo_elastic/model/helpers.py` — file iterators (`iterate_file` generic dispatcher plus format-specific wrappers `iterate_sdf` / `iterate_smiles` / `iterate_cml`) and single-file loaders (`load_molecule`, `load_reaction`).
- `tests/` — its own pytest suite with `conftest.py` fixtures that connect to `localhost:9200`.

## Core flow (the non-obvious bit)
Expand Down
33 changes: 32 additions & 1 deletion bingo/bingo-elastic/python/bingo_elastic/model/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,27 @@

MOL_TYPES = ["#02: <molecule>", "#03: <query reaction>", "#12: <RDFMolecule>"]
REAC_TYPES = ["#04: <reaction>", "#05: <query reaction>"]
RESERVED_FIELDS = frozenset(
{
"cmf",
"name",
"hash",
"has_error",
"rawData",
"sim_fingerprint",
"sim_fingerprint_len",
"sub_fingerprint",
"sub_fingerprint_len",
"tau_fingerprint",
"tau_fingerprint_len",
"record_id",
"error_handler",
"skip_errors",
"tau_search",
"indigo_object",
"elastic_response",
}
)


# pylint: disable=unused-argument
Expand All @@ -31,7 +52,7 @@ def __set__(self, instance: IndigoRecord, value: Dict):


class WithIndigoObject:
def __set__( # pylint: disable=too-many-branches
def __set__( # pylint: disable=too-many-statements, too-many-branches
self, instance: IndigoRecord, value: IndigoObject
) -> None:
try:
Expand Down Expand Up @@ -92,6 +113,15 @@ def __set__( # pylint: disable=too-many-branches
except IndigoException as err_:
check_error(instance, err_)

try:
for prop in value_dup.iterateProperties():
prop_name = prop.name()
if prop_name in RESERVED_FIELDS:
continue
setattr(instance, prop_name, prop.rawData())
except IndigoException as err_:
check_error(instance, err_)


class IndigoRecord:
"""
Expand Down Expand Up @@ -143,6 +173,7 @@ def __init__(self, **kwargs) -> None:
:param skip_errors: if True, all errors will be skipped,
no error_handler is required
:type skip_errors: bool
SDF tags are auto populated as attributes
"""

# First check if skip_errors flag passed
Expand Down
35 changes: 34 additions & 1 deletion bingo/bingo-elastic/python/tests/test_elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
ElasticRepository,
IndexName,
)
from bingo_elastic.model.helpers import iterate_file
from bingo_elastic.model.helpers import iterate_file, iterate_sdf
from bingo_elastic.model.record import (
IndigoRecord,
IndigoRecordMolecule,
Expand Down Expand Up @@ -527,6 +527,39 @@ async def test_a_custom_fields(
assert iupac_inch == "RDHQFKQIGNGIED-UHFFFAOYSA-N"


def test_sdf_custom_properties(
elastic_repository_molecule: ElasticRepository,
resource_loader,
):
for rec in iterate_sdf(
resource_loader("molecules/rand_queries_small.sdf")
):
elastic_repository_molecule.index_record(rec)
time.sleep(1)
result = elastic_repository_molecule.filter(n="1")
hits = [item for item in result]
assert len(hits) >= 1
assert hits[0].n == "1" # type: ignore


@pytest.mark.asyncio
async def test_a_sdf_custom_properties(
a_elastic_repository_molecule: AsyncRepositoryT,
resource_loader,
):
async with a_elastic_repository_molecule() as rep:
for rec in iterate_sdf(
resource_loader("molecules/rand_queries_small.sdf")
):
await rep.index_record(rec)

async with a_elastic_repository_molecule() as rep:
result = rep.filter(n="1")
hits = [item async for item in result]
assert len(hits) >= 1
assert hits[0].n == "1" # type: ignore


def test_search_empty_fingerprint(
elastic_repository_molecule: ElasticRepository,
indigo_fixture: Indigo,
Expand Down
Loading