Skip to content

Commit 04c1572

Browse files
committed
fix: render >1 result in csv/tsv/json value-search
- stream pages as lists, not dicts with colliding keys - tidy types - move `iter_unique` to `trove.util.iter` and add tests
1 parent bed1b2a commit 04c1572

File tree

7 files changed

+64
-52
lines changed

7 files changed

+64
-52
lines changed

tests/trove/test_doctest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import trove.util.chainmap
44
import trove.util.frozen
55
import trove.util.iris
6+
import trove.util.iter
67
import trove.util.propertypath
78
import trove.vocab.mediatypes
89

@@ -15,6 +16,7 @@
1516
trove.util.chainmap,
1617
trove.util.frozen,
1718
trove.util.iris,
19+
trove.util.iter,
1820
trove.util.propertypath,
1921
trove.vocab.mediatypes,
2022
)

trove/render/_simple_trovesearch.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from __future__ import annotations
2-
from collections.abc import Generator, Iterator
2+
from collections.abc import Generator, Iterator, Sequence
3+
import itertools
34
import json
5+
import logging
46
from typing import Any, TYPE_CHECKING
57

68
from primitive_metadata import primitive_rdf as rdf
@@ -13,6 +15,8 @@
1315
if TYPE_CHECKING:
1416
from trove.util.json import JsonObject
1517

18+
_logger = logging.getLogger(__name__)
19+
1620

1721
class SimpleTrovesearchRenderer(BaseRenderer):
1822
'''for "simple" search api responses (including only result metadata)
@@ -35,12 +39,8 @@ def unicard_rendering(self, card_iri: str, osfmap_json: JsonObject) -> ProtoRend
3539
rendered_content=self.simple_unicard_rendering(card_iri, osfmap_json),
3640
)
3741

38-
def multicard_rendering(self, card_pages: Iterator[dict[str, JsonObject]]) -> ProtoRendering:
39-
_cards = (
40-
(_card_iri, _card_contents)
41-
for _page in card_pages
42-
for _card_iri, _card_contents in _page.items()
43-
)
42+
def multicard_rendering(self, card_pages: Iterator[Sequence[tuple[str, JsonObject]]]) -> ProtoRendering:
43+
_cards = itertools.chain.from_iterable(card_pages)
4444
return SimpleRendering(
4545
mediatype=self.MEDIATYPE,
4646
rendered_content=self.simple_multicard_rendering(_cards),
@@ -57,7 +57,7 @@ def render_document(self) -> ProtoRendering:
5757
)
5858
raise trove_exceptions.UnsupportedRdfType(_focustypes)
5959

60-
def _iter_card_pages(self) -> Generator[dict[str, JsonObject]]:
60+
def _iter_card_pages(self) -> Generator[list[tuple[str, JsonObject]]]:
6161
assert not self.__already_iterated_cards
6262
self.__already_iterated_cards = True
6363
self._page_links = set()
@@ -67,22 +67,22 @@ def _iter_card_pages(self) -> Generator[dict[str, JsonObject]]:
6767
if (RDF.type, JSONAPI_LINK_OBJECT) in _page:
6868
self._page_links.add(_page)
6969
elif rdf.is_container(_page):
70-
_cardpage = []
71-
for _search_result in rdf.container_objects(_page):
70+
_cardpage: list[tuple[str, JsonObject]] = []
71+
for _search_result_blanknode in rdf.container_objects(_page):
7272
try:
7373
_card = next(
7474
_obj
75-
for _pred, _obj in _search_result
75+
for _pred, _obj in _search_result_blanknode
7676
if _pred == TROVE.indexCard
7777
)
7878
except StopIteration:
7979
pass # skip malformed
8080
else:
81-
_cardpage.append(_card)
82-
yield {
83-
self._get_card_iri(_card): self._get_card_content(_card, _page_graph)
84-
for _card in _cardpage
85-
}
81+
_cardpage.append((
82+
self._get_card_iri(_card),
83+
self._get_card_content(_card, _page_graph),
84+
))
85+
yield _cardpage
8686

8787
def _get_card_iri(self, card: str | rdf.RdfBlanknode) -> str:
8888
return card if isinstance(card, str) else ''

trove/render/simple_csv.py

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,20 @@
22
from collections.abc import (
33
Generator,
44
Iterator,
5-
Iterable,
65
Sequence,
76
)
87
import csv
8+
import dataclasses
99
import functools
1010
import itertools
11-
import dataclasses
11+
import logging
1212
from typing import TYPE_CHECKING, ClassVar
1313

1414
from trove.trovesearch.search_params import (
1515
CardsearchParams,
1616
ValuesearchParams,
1717
)
18+
from trove.util.iter import iter_unique
1819
from trove.util.propertypath import Propertypath, GLOB_PATHSTEP
1920
from trove.vocab import mediatypes
2021
from trove.vocab import osfmap
@@ -26,6 +27,7 @@
2627
from trove.util.trove_params import BasicTroveParams
2728
from trove.util.json import JsonValue, JsonObject
2829

30+
_logger = logging.getLogger(__name__)
2931

3032
type Jsonpath = Sequence[str] # path of json keys
3133
type CsvValue = str | int | float | None
@@ -41,9 +43,10 @@ class TrovesearchSimpleCsvRenderer(SimpleTrovesearchRenderer):
4143
CSV_DIALECT: ClassVar[type[csv.Dialect]] = csv.excel
4244

4345
def unicard_rendering(self, card_iri: str, osfmap_json: JsonObject) -> ProtoRendering:
44-
return self.multicard_rendering(card_pages=iter([{card_iri: osfmap_json}]))
46+
_page = [(card_iri, osfmap_json)]
47+
return self.multicard_rendering(card_pages=iter([_page]))
4548

46-
def multicard_rendering(self, card_pages: Iterator[dict[str, JsonObject]]) -> ProtoRendering:
49+
def multicard_rendering(self, card_pages: Iterator[Sequence[tuple[str, JsonObject]]]) -> ProtoRendering:
4750
_doc = TabularDoc(
4851
card_pages,
4952
trove_params=getattr(self.response_focus, 'search_params', None),
@@ -67,7 +70,7 @@ def csv_stream(
6770

6871
@dataclasses.dataclass
6972
class TabularDoc:
70-
card_pages: Iterator[dict[str, JsonObject]]
73+
card_pages: Iterator[Sequence[tuple[str, JsonObject]]]
7174
trove_params: BasicTroveParams | None = None
7275
_started: bool = False
7376

@@ -79,10 +82,6 @@ def column_jsonpaths(self) -> tuple[Jsonpath, ...]:
7982
)
8083
return (_ID_JSONPATH, *_column_jsonpaths)
8184

82-
@functools.cached_property
83-
def first_page(self) -> dict[str, JsonObject]:
84-
return next(self.card_pages, {})
85-
8685
def _column_paths(self) -> Iterator[Propertypath]:
8786
_pathlists: list[Sequence[Propertypath]] = []
8887
if self.trove_params is not None: # hacks
@@ -103,29 +102,16 @@ def _column_paths(self) -> Iterator[Propertypath]:
103102
_pathlists.append(_pathlist)
104103
if not _pathlists:
105104
_pathlists.append(osfmap.DEFAULT_TABULAR_SEARCH_COLUMN_PATHS)
106-
return self.iter_unique(itertools.chain.from_iterable(_pathlists))
107-
108-
@staticmethod
109-
def iter_unique[T](iterable: Iterable[T]) -> Generator[T]:
110-
_seen = set()
111-
for _item in iterable:
112-
if _item not in _seen:
113-
_seen.add(_item)
114-
yield _item
115-
116-
def _iter_card_pages(self) -> Generator[dict[str, JsonObject]]:
117-
assert not self._started
118-
self._started = True
119-
if self.first_page:
120-
yield self.first_page
121-
yield from self.card_pages
105+
return iter_unique(itertools.chain.from_iterable(_pathlists))
122106

123107
def header(self) -> list[CsvValue]:
124108
return ['.'.join(_path) for _path in self.column_jsonpaths]
125109

126110
def rows(self) -> Generator[list[CsvValue]]:
127-
for _page in self._iter_card_pages():
128-
for _card_iri, _osfmap_json in _page.items():
111+
assert not self._started
112+
self._started = True
113+
for _page in self.card_pages:
114+
for _card_iri, _osfmap_json in _page:
129115
yield self._row_values(_osfmap_json)
130116

131117
def _row_values(self, osfmap_json: JsonObject) -> list[CsvValue]:

trove/render/simple_json.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@
1515
from .rendering.streamable import StreamableRendering
1616
from ._simple_trovesearch import SimpleTrovesearchRenderer
1717
if typing.TYPE_CHECKING:
18+
from collections.abc import (
19+
Generator,
20+
Iterator,
21+
Sequence,
22+
)
1823
from trove.util.json import JsonObject
1924

2025

@@ -24,25 +29,25 @@ class TrovesearchSimpleJsonRenderer(SimpleTrovesearchRenderer):
2429
MEDIATYPE = mediatypes.JSON
2530
INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json']
2631

27-
def simple_unicard_rendering(self, card_iri: str, osfmap_json: dict[str, typing.Any]) -> str:
32+
def simple_unicard_rendering(self, card_iri: str, osfmap_json: JsonObject) -> str:
2833
return json.dumps({
2934
'data': self._render_card_content(card_iri, osfmap_json),
3035
'links': self._render_links(),
3136
'meta': self._render_meta(),
3237
}, indent=2)
3338

34-
def multicard_rendering(self, card_pages: typing.Iterator[dict[str, dict[str, typing.Any]]]) -> ProtoRendering:
39+
def multicard_rendering(self, card_pages: Iterator[Sequence[tuple[str, JsonObject]]]) -> ProtoRendering:
3540
return StreamableRendering(
3641
mediatype=self.MEDIATYPE,
3742
content_stream=self._stream_json(card_pages),
3843
)
3944

40-
def _stream_json(self, card_pages: typing.Iterator[dict[str, typing.Any]]) -> typing.Generator[str]:
45+
def _stream_json(self, card_pages: Iterator[Sequence[tuple[str, JsonObject]]]) -> Generator[str]:
4146
_prefix = '{"data": ['
4247
yield _prefix
4348
_datum_prefix = None
4449
for _page in card_pages:
45-
for _card_iri, _osfmap_json in _page.items():
50+
for _card_iri, _osfmap_json in _page:
4651
if _datum_prefix is not None:
4752
yield _datum_prefix
4853
yield json.dumps(self._render_card_content(_card_iri, _osfmap_json), indent=2)
@@ -79,7 +84,7 @@ def _render_meta(self) -> dict[str, int | str]:
7984
pass
8085
return _meta
8186

82-
def _render_links(self) -> dict[str, typing.Any]:
87+
def _render_links(self) -> JsonObject:
8388
_links = {}
8489
for _pagelink in self._page_links:
8590
_twopledict = rdf.twopledict_from_twopleset(_pagelink)
@@ -89,8 +94,8 @@ def _render_links(self) -> dict[str, typing.Any]:
8994
_links[_membername.unicode_value] = _link_url
9095
return _links
9196

92-
def _add_twople(self, json_dict: dict[str, typing.Any], predicate_iri: str, object_iri: str) -> None:
93-
_obj_ref = {'@id': object_iri}
97+
def _add_twople(self, json_dict: JsonObject, predicate_iri: str, object_iri: str) -> None:
98+
_obj_ref: JsonObject = {'@id': object_iri}
9499
_obj_list = json_dict.setdefault(predicate_iri, [])
95100
if isinstance(_obj_list, list):
96101
_obj_list.append(_obj_ref)

trove/trovebrowse_gathering.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def gather_cards_focused_on(focus: gather.Focus, *, blend_cards: bool) -> Gather
4646
)
4747
if blend_cards:
4848
for _resource_description in _lrd_qs:
49-
yield from rdf.iter_tripleset(_resource_description.as_rdf_tripledict())
49+
yield from rdf.iter_tripleset(_resource_description.as_rdfdoc_with_supplements().tripledict)
5050
yield (ns.FOAF.isPrimaryTopicOf, _resource_description.indexcard.get_iri())
5151
else:
5252
for _resource_description in _lrd_qs:

trove/trovesearch/trovesearch_gathering.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
)
4141

4242

43-
logger = logging.getLogger(__name__)
43+
_logger = logging.getLogger(__name__)
4444

4545

4646
type GathererGenerator = Generator[rdf.RdfTriple | rdf.RdfTwople]

trove/util/iter.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from collections.abc import (
2+
Generator,
3+
Hashable,
4+
Iterable,
5+
)
6+
7+
8+
def iter_unique[T: Hashable](iterable: Iterable[T]) -> Generator[T]:
9+
'''
10+
>>> list(iter_unique([1,1,1]))
11+
[1]
12+
>>> list(iter_unique([1,2,3,2,4,2,1,5]))
13+
[1, 2, 3, 4, 5]
14+
'''
15+
_seen = set()
16+
for _item in iterable:
17+
if _item not in _seen:
18+
_seen.add(_item)
19+
yield _item

0 commit comments

Comments
 (0)