Skip to content

Facet terms grouping + Production homepage stats #342

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,091 changes: 1,614 additions & 1,477 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ certifi = ">=2021.5.30"
chardet = "^5.0.0"
codeguru-profiler-agent = "^1.2.4"
colorama = "0.3.3"
dcicsnovault = "^11.24.0"
dcicsnovault = "^11.26.0"
dcicutils = "^8.18.3"
docutils = ">=0.16,<1"
encoded-core = "^0.9.6"
Expand Down
83 changes: 69 additions & 14 deletions src/encoded/homepage.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,33 @@ class SearchBase:
ALL_RELEASED_FILES_SEARCH_PARAMS = {
'type': 'File',
'status': ['released', 'restricted', 'public'],
'additional_facet': [
'file_sets.libraries.assay.display_title'
]
}
COLO829_RELEASED_FILES_SEARCH_PARAMS = {
'type': 'File',
'status': ['released', 'restricted', 'public'],
'dataset': ['colo829blt_50to1', 'colo829t', 'colo829bl']
'dataset': ['colo829blt_50to1', 'colo829t', 'colo829bl'],
'additional_facet': [
'file_sets.libraries.assay.display_title'
]
}
HAPMAP_RELEASED_FILES_SEARCH_PARAMS = {
'type': 'File',
'status': ['released', 'restricted', 'public'],
'dataset': ['hapmap']
'dataset': ['hapmap'],
'additional_facet': [
'file_sets.libraries.assay.display_title'
]
}
IPSC_RELEASED_FILES_SEARCH_PARAMS = {
'type': 'File',
'status': ['released', 'restricted', 'public'],
'dataset': ['lb_fibroblast', 'lb_ipsc_1', 'lb_ipsc_2', 'lb_ipsc_4', 'lb_ipsc_52', 'lb_ipsc_60']
'dataset': ['lb_fibroblast', 'lb_ipsc_1', 'lb_ipsc_2', 'lb_ipsc_4', 'lb_ipsc_52', 'lb_ipsc_60'],
'additional_facet': [
'file_sets.libraries.assay.display_title'
]
}
TISSUES_RELEASED_FILES_SEARCH_PARAMS = {
'type': 'File',
Expand All @@ -49,7 +61,18 @@ class SearchBase:
'ST003-1Q',
'ST004-1Q'
],
'additional_facet': 'donors.display_title' # required since this is default_hidden for now
'additional_facet': [
'donors.display_title', 'file_sets.libraries.assay.display_title'
] # required since this is default_hidden for now
}
PRODUCTION_TISSUES_FILES_SEARCH_PARAMS = {
'type': 'File',
'status': ['released', 'restricted', 'public'],
'sample_summary.studies': ['Production'],
'additional_facet': [
'file_sets.libraries.assay.display_title',
'file_sets.libraries.analytes.samples.sample_sources.uberon_id'
]
}


Expand Down Expand Up @@ -84,16 +107,18 @@ def extract_desired_facet_from_search(facets, desired_facet_name):
""" Grabs a single facet from a search response facets block """
for d in facets:
if d['field'] == desired_facet_name:
if 'original_terms' in d:
d['terms'] = d['original_terms'] # discard group_by_field information
return d
log.error(f'Did not locate specified facet on homepage: {desired_facet_name}')
return None
return {}


def generate_unique_facet_count(context, request, search_param, desired_fact):
def generate_unique_facet_count(context, request, search_param, desired_facet):
""" Helper function that extracts the number of unique facet terms """
search_param['limit'] = 0 # we do not care about search results, just facet counts
result = generate_admin_search_given_params(context, request, search_param)
facet = extract_desired_facet_from_search(result['facets'], desired_fact)
facet = extract_desired_facet_from_search(result['facets'], desired_facet)
# correct for no value, worst case we check the whole list of facet terms
# but this is usually a manageable sized list - Will 28 March 2024
for term in facet['terms']:
Expand Down Expand Up @@ -139,23 +164,48 @@ def generate_ipsc_assay_count(context, request):


def generate_tissue_file_count(context, request):
""" Get total file count for tissues """
""" Get total file count for benchmarking tissues """
search_param = SearchBase.TISSUES_RELEASED_FILES_SEARCH_PARAMS
return generate_search_total(context, request, search_param)


def generate_tissue_donor_count(context, request):
""" Get donor count by aggregating on donor """
""" Get benchmarking tissue donor count by aggregating on donor """
search_param = SearchBase.TISSUES_RELEASED_FILES_SEARCH_PARAMS
return generate_unique_facet_count(context, request, search_param, 'donors.display_title')


def generate_tissue_assay_count(context, request):
""" Get total assay count for tissues """
""" Get total assay count for benchmarking tissues """
search_param = SearchBase.TISSUES_RELEASED_FILES_SEARCH_PARAMS
return generate_unique_facet_count(context, request, search_param, 'file_sets.libraries.assay.display_title')


def generate_production_file_count(context, request):
""" Get total file count for production tissues """
search_param = SearchBase.PRODUCTION_TISSUES_FILES_SEARCH_PARAMS
return generate_search_total(context, request, search_param)


def generate_production_tissue_donor_count(context, request):
""" Get production tissue donor count """
search_param = SearchBase.PRODUCTION_TISSUES_FILES_SEARCH_PARAMS
return generate_unique_facet_count(context, request, search_param, 'donors.display_title')


def generate_production_tissue_assay_count(context, request):
""" Get production tissue assay counts """
search_param = SearchBase.PRODUCTION_TISSUES_FILES_SEARCH_PARAMS
return generate_unique_facet_count(context, request, search_param, 'file_sets.libraries.assay.display_title')


def generate_production_tissue_type_count(context, request):
""" Get production tissue type counts """
search_param = SearchBase.PRODUCTION_TISSUES_FILES_SEARCH_PARAMS
return generate_unique_facet_count(context, request, search_param,
'file_sets.libraries.analytes.samples.sample_sources.uberon_id')


@view_config(route_name='home', request_method=['GET'])
@debug_log
def home(context, request):
Expand All @@ -181,6 +231,11 @@ def home(context, request):
(generate_tissue_donor_count, {'context': context, 'request': request}), # 7
(generate_tissue_assay_count, {'context': context, 'request': request}), # 8

# Production stats
(generate_production_file_count, {'context': context, 'request': request}), # 9
(generate_production_tissue_donor_count, {'context': context, 'request': request}), # 10
(generate_production_tissue_assay_count, {'context': context, 'request': request}), # 11
(generate_production_tissue_type_count, {'context': context, 'request': request}), # 12
])
time = datetime.now(timezone('EST'))
response = {
Expand Down Expand Up @@ -244,10 +299,10 @@ def home(context, request):
"title": "Primary Tissues",
"link": "/browse",
"figures": [
{ "value": 0, "unit": "Donors" },
{ "value": 0, "unit": "Tissue Types" },
{ "value": 0, "unit": "Assays" },
{ "value": 0, "unit": "Files Generated" }
{ "value": search_results[10], "unit": "Donors" },
{ "value": search_results[12], "unit": "Tissue Types" },
{ "value": search_results[11], "unit": "Assays" },
{ "value": search_results[9], "unit": "Files Generated" }
]
}
]
Expand Down
5 changes: 5 additions & 0 deletions src/encoded/static/scss/encoded/modules/_search.scss
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,11 @@ $facetlist-excluding: #450000;
& > .facet-item {
color: #727272;
font-size: 0.813rem;

&.facet-item-group-header {
color: #3B3A3A;
font-weight: 500;
}
}
& > .facet-count {
color: #727272;
Expand Down
4 changes: 2 additions & 2 deletions src/encoded/tests/test_homepage.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ def test_extract_desired_facet():
{'field': 'type', 'title': 'Data Type', 'total': 0, 'hide_from_view': True, 'aggregation_type': 'terms',
'terms': [{'key': 'File', 'doc_count': 7}, {'key': 'Item', 'doc_count': 7}]}
]
assert extract_desired_facet_from_search(example_facets, 'type') is not None
assert extract_desired_facet_from_search(example_facets, 'not-found') is None
assert extract_desired_facet_from_search(example_facets, 'type') is not {}
assert extract_desired_facet_from_search(example_facets, 'not-found') is {}


@pytest.mark.workbook
Expand Down
8 changes: 4 additions & 4 deletions src/encoded/types/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,7 +665,7 @@ def meta_workflow_run_outputs(self, request: Request) -> Union[List[str], None]:
result = self.rev_link_atids(request, "meta_workflow_run_outputs")
if result:
request_handler = RequestHandler(request = request)
mwfrs=[
mwfrs=[
mwfr for mwfr in result
if get_property_value_from_identifier(
request_handler,
Expand Down Expand Up @@ -786,7 +786,7 @@ def release_tracker_title(
request_handler,
file_properties=self.properties
)
return result
return result

def _get_libraries(
self, request: Request, file_sets: Optional[List[str]] = None
Expand Down Expand Up @@ -968,7 +968,7 @@ def _get_group_coverage(
self, request_handler: Request, file_properties: Optional[List[str]] = None
) -> Union[List[str], None]:
""""Get group coverage for display on file overview page.

Use override_group_coverage if present, otherwise grab target_coverage from sequencing."""
if (override_group_coverage := file_utils.get_override_group_coverage(file_properties)):
return [override_group_coverage]
Expand Down Expand Up @@ -1092,7 +1092,7 @@ def _get_release_tracker_title(
) -> Union[str, None]:
"""Get release tracker title for display on the home page."""
to_include = None
if "file_sets" in file_properties:
if "file_sets" in file_properties:
if (cell_culture_mixture_title := get_unique_values(
request_handler.get_items(
file_utils.get_cell_culture_mixtures(file_properties, request_handler)),
Expand Down
8 changes: 4 additions & 4 deletions src/encoded/types/file_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ def _build_file_set_embedded_list():
"libraries.analytes.samples.sample_sources.code",
"libraries.analytes.samples.sample_sources.uberon_id",
"libraries.analytes.samples.sample_sources.cell_line.code",
"libraries.analytes.samples.sample_sources.uberon_id",
"libraries.analytes.samples.sample_sources.donor.display_title",


# Sequencing/Sequencer LinkTo - used in file_merge_group
"sequencing.submitted_id",
"sequencing.target_coverage",
Expand All @@ -84,7 +84,7 @@ def _build_file_set_embedded_list():
"files.file_format.display_title",
"files.file_status_tracking",
"files.quality_metrics.overall_quality_status",

"meta_workflow_runs.meta_workflow.display_title",
"meta_workflow_runs.meta_workflow.category",
"meta_workflow_runs.accession",
Expand Down Expand Up @@ -337,8 +337,8 @@ def validate_compatible_assay_and_sequencer_on_edit(context, request):

def check_compatible_assay_and_sequencer(request, libraries: List[str], sequencing: str):
"""Checks that if library.assay has a valid_sequencer property, that sequencing.sequencer is among them.
The assays with `valid_sequencers` property may need to be updated as new techologies come out

The assays with `valid_sequencers` property may need to be updated as new techologies come out
or are added to the portal.
"""
assays = []
Expand Down
Loading