Skip to content

Store the dataset featured field as a bool #51

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ authors = [{name = "Opendata Team", email = "[email protected]"}]
readme = "README.md"
dynamic = ["version", "description"]
dependencies = [
"dependency-injector==4.41.0",
"dependency-injector==4.42.0b1",
"elasticsearch==7.15.0",
"elasticsearch_dsl==7.4.0",
"factory-boy==3.2.1",
Expand All @@ -20,7 +20,7 @@ dependencies = [
"pydantic==1.9.0",
"pytest==6.2.5",
"pytest-flask==1.2.0",
"markdown==3.3.3",
"markdown==3.7",
"beautifulsoup4==4.10.0",
# pinned to a known working version, since 3.1.3 will fail while running tests
"werkzeug==3.0.4",
Expand Down
4 changes: 2 additions & 2 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_api_dataset_index_unindex(app, client, faker):
'views': faker.random_int(),
'followers': faker.random_int(),
'reuses': faker.random_int(),
'featured': faker.random_int(min=0, max=1),
'featured': faker.pybool(),
'resources_count': faker.random_int(min=1, max=15),
'organization': {
'id': faker.md5(),
Expand Down Expand Up @@ -104,7 +104,7 @@ def test_api_dataset_index_on_another_index(app, client, search_client, faker):
'views': faker.random_int(),
'followers': faker.random_int(),
'reuses': faker.random_int(),
'featured': faker.random_int(min=0, max=1),
'featured': faker.pybool(),
'resources_count': faker.random_int(min=1, max=15),
'organization': {
'id': faker.md5(),
Expand Down
2 changes: 1 addition & 1 deletion tests/test_consumers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_parse_dataset_obj():
'views': 7806,
'followers': 72,
'reuses': 45,
'featured': 0,
'featured': False,
'resources_count': 10,
'organization': {
'id': '534fff8ea3a7292c64a77f02',
Expand Down
2 changes: 1 addition & 1 deletion udata_search_service/domain/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class Dataset(EntityBase):
views: int
followers: int
reuses: int
featured: int
featured: bool
resources_count: int
concat_title_org: str
description: str
Expand Down
1 change: 0 additions & 1 deletion udata_search_service/infrastructure/consumers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def load_from_dict(cls, data):
data["reuses"] = log2p(data.get("reuses", 0))
data["orga_followers"] = log2p(data.get("orga_followers", 0))
data["orga_sp"] = 4 if data.get("orga_sp", 0) else 1
data["featured"] = 4 if data.get("featured", 0) else 1

return super().load_from_dict(data)

Expand Down
14 changes: 11 additions & 3 deletions udata_search_service/infrastructure/search_clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from elasticsearch import Elasticsearch
from elasticsearch.exceptions import NotFoundError
from elasticsearch_dsl import Date, Document, Float, Integer, Keyword, Text, tokenizer, token_filter, analyzer, query
from elasticsearch_dsl import Boolean, Date, Document, Float, Integer, Keyword, Text, tokenizer, token_filter, analyzer, query
from elasticsearch_dsl.connections import connections
from udata_search_service.domain.entities import Dataset, Organization, Reuse, Dataservice
from udata_search_service.config import Config
Expand Down Expand Up @@ -139,7 +139,7 @@ class SearchableDataset(IndexDocument):
views = Float()
followers = Float()
reuses = Float()
featured = Integer()
featured = Boolean()
resources_count = Integer()
concat_title_org = Text(analyzer=dgv_analyzer)
temporal_coverage_start = Date()
Expand Down Expand Up @@ -272,7 +272,15 @@ def query_datasets(self, query_text: str, offset: int, page_size: int, filters:
query.SF("field_value_factor", field="views", factor=4, modifier='sqrt', missing=1),
query.SF("field_value_factor", field="followers", factor=4, modifier='sqrt', missing=1),
query.SF("field_value_factor", field="orga_followers", factor=1, modifier='sqrt', missing=1),
query.SF("field_value_factor", field="featured", factor=1, modifier='sqrt', missing=1),
query.SF("script_score",
script={
"source": """
if (doc['featured'].value) {
return 2
} else {
return 1
}
"""})
]

if query_text:
Expand Down
2 changes: 1 addition & 1 deletion udata_search_service/presentation/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ class DatasetToIndex(BaseModel):
followers: int
reuses: int
resources_count: Optional[int] = 0
featured: Optional[int] = 0
featured: Optional[bool] = False
format: Optional[list] = []
schema_: Optional[list] = Field([], alias="schema")
extras: Optional[dict] = {}
Expand Down