-
Notifications
You must be signed in to change notification settings - Fork 52
Export generalizations #1387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Export generalizations #1387
Changes from 22 commits
ded2544
2d57e64
02fb80c
f34a079
eab1f33
c809db7
342bc97
346f17a
e9d0b34
e9051a5
8d2929b
0363267
f6fe1d0
4dad0c2
cb03db4
8bf01ff
4f9dc92
78f3af5
3fef2b0
41d6177
3b958e6
e58f2bf
e6fca26
9947282
ac8ce65
11d7349
8dfe2c6
126a028
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -6,10 +6,13 @@ | |||||
| plan future development of Spyglass. | ||||||
| """ | ||||||
|
|
||||||
| from multiprocessing import Pool, cpu_count | ||||||
| from typing import List, Union | ||||||
|
|
||||||
| import datajoint as dj | ||||||
| from datajoint.condition import make_condition | ||||||
| from pynwb import NWBHDF5IO | ||||||
| from tqdm import tqdm | ||||||
|
|
||||||
| from spyglass.common.common_nwbfile import AnalysisNwbfile, Nwbfile | ||||||
| from spyglass.settings import test_mode | ||||||
|
|
@@ -24,6 +27,9 @@ | |||||
| from spyglass.utils.sql_helper_fn import SQLDumpHelper | ||||||
|
|
||||||
| schema = dj.schema("common_usage") | ||||||
| INCLUDED_NWB_FILES = ( | ||||||
| None # global variable to temporarily hold included nwb files | ||||||
| ) | ||||||
|
|
||||||
|
|
||||||
| @schema | ||||||
|
|
@@ -203,7 +209,11 @@ def _externals(self) -> dj.external.ExternalMapping: | |||||
| return dj.external.ExternalMapping(schema=AnalysisNwbfile) | ||||||
|
|
||||||
| def _add_externals_to_restr_graph( | ||||||
| self, restr_graph: RestrGraph, key: dict | ||||||
| self, | ||||||
| restr_graph: RestrGraph, | ||||||
| key: dict, | ||||||
| raw_files=None, | ||||||
| analysis_files=None, | ||||||
| ) -> RestrGraph: | ||||||
| """Add external tables to a RestrGraph for a given restriction/key. | ||||||
|
|
||||||
|
|
@@ -221,21 +231,32 @@ def _add_externals_to_restr_graph( | |||||
| A RestrGraph object to add external tables to. | ||||||
| key : dict | ||||||
| Any valid restriction key for ExportSelection.Table | ||||||
| raw_files : list, optional | ||||||
| A list of raw nwb file names to add. Default None, which retrieves | ||||||
| from ExportSelection._list_raw_files. | ||||||
| analysis_files : list, optional | ||||||
| A list of analysis nwb file names to add. Default None, which retrieves | ||||||
| from ExportSelection._list_analysis_files. | ||||||
|
|
||||||
| Returns | ||||||
| ------- | ||||||
| restr_graph : RestrGraph | ||||||
| The updated RestrGraph | ||||||
| """ | ||||||
| if raw_files is None: | ||||||
samuelbray32 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| raw_files = self._list_raw_files(key) | ||||||
| if analysis_files is None: | ||||||
| analysis_files = self._list_analysis_files(key) | ||||||
|
|
||||||
| # only add items if found respective file types | ||||||
| if raw_files := self._list_raw_files(key): | ||||||
| if raw_files: | ||||||
| raw_tbl = self._externals["raw"] | ||||||
| raw_name = raw_tbl.full_table_name | ||||||
| raw_restr = "filepath in ('" + "','".join(raw_files) + "')" | ||||||
| restr_graph.graph.add_node(raw_name, ft=raw_tbl, restr=raw_restr) | ||||||
| restr_graph.visited.add(raw_name) | ||||||
|
|
||||||
| if analysis_files := self._list_analysis_files(key): | ||||||
| if analysis_files: | ||||||
| analysis_tbl = self._externals["analysis"] | ||||||
| analysis_name = analysis_tbl.full_table_name | ||||||
| # to avoid issues with analysis subdir, we use REGEXP | ||||||
|
|
@@ -251,10 +272,13 @@ def _add_externals_to_restr_graph( | |||||
| return restr_graph | ||||||
|
|
||||||
| def get_restr_graph( | ||||||
| self, key: dict, verbose=False, cascade=True | ||||||
| self, key: dict, verbose=False, cascade=True, included_nwb_files=None | ||||||
| ) -> RestrGraph: | ||||||
| """Return a RestrGraph for a restriction/key's tables/restrictions. | ||||||
|
|
||||||
| Restriction graph limits to entries stemming from the raw nwb_files | ||||||
| listed in included_nwb_files, if provided. | ||||||
|
|
||||||
| Ignores duplicate entries. | ||||||
|
|
||||||
| Parameters | ||||||
|
|
@@ -265,12 +289,25 @@ def get_restr_graph( | |||||
| Turn on RestrGraph verbosity. Default False. | ||||||
| cascade : bool, optional | ||||||
| Propagate restrictions to upstream tables. Default True. | ||||||
| included_nwb_files : list, optional | ||||||
| A whitelist of nwb files to include in the export. Default None applies | ||||||
| no whitelist restriction. | ||||||
| """ | ||||||
| leaves = unique_dicts( | ||||||
| (self * self.Table & key).fetch( | ||||||
| "table_name", "restriction", as_dict=True | ||||||
| selection_tables = self * self.Table & key | ||||||
| tracked_tables = set(selection_tables.fetch("table_name")) | ||||||
| leaves = [] | ||||||
| # Condense to single restriction per table (OR of all restrictions). | ||||||
| # Large performance boost for large exports with many logged entries | ||||||
| for table_name in tracked_tables: | ||||||
| restr_list = (selection_tables & dict(table_name=table_name)).fetch( | ||||||
| "restriction" | ||||||
| ) | ||||||
| restriction = make_condition( | ||||||
| dj.FreeTable(dj.conn(), table_name), restr_list, set() | ||||||
| ) | ||||||
| leaves.append( | ||||||
| {"table_name": table_name, "restriction": restriction} | ||||||
| ) | ||||||
| ) | ||||||
|
|
||||||
| restr_graph = RestrGraph( | ||||||
| seed_table=self, | ||||||
|
|
@@ -279,7 +316,55 @@ def get_restr_graph( | |||||
| cascade=False, | ||||||
| include_files=True, | ||||||
| ) | ||||||
| restr_graph = self._add_externals_to_restr_graph(restr_graph, key) | ||||||
|
|
||||||
| if included_nwb_files is None: | ||||||
| restr_graph = self._add_externals_to_restr_graph(restr_graph, key) | ||||||
| if cascade: | ||||||
| restr_graph.cascade() | ||||||
| return restr_graph | ||||||
|
|
||||||
| # Restrict the graph to only include entries stemming from the | ||||||
| # included nwb files | ||||||
| logger.info("Generating restriction graph of included nwb files") | ||||||
samuelbray32 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| nwb_restr = make_condition( | ||||||
| Nwbfile(), | ||||||
| [f"nwb_file_name = '{f}'" for f in included_nwb_files], | ||||||
| set(), | ||||||
| ) | ||||||
| whitelist_graph = RestrGraph( | ||||||
| seed_table=Nwbfile, | ||||||
| leaves={ | ||||||
| "table_name": Nwbfile.full_table_name, | ||||||
| "restriction": nwb_restr, | ||||||
| }, | ||||||
| verbose=verbose, | ||||||
| cascade=True, | ||||||
| include_files=True, | ||||||
| direction="down", | ||||||
| ) | ||||||
| logger.info("Intersecting with export restriction graph") | ||||||
samuelbray32 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| restr_graph = restr_graph & whitelist_graph | ||||||
| raw_files_to_add = [ | ||||||
| f | ||||||
| for f in ExportSelection()._list_raw_files(key) | ||||||
samuelbray32 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| if f in included_nwb_files | ||||||
samuelbray32 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| ] | ||||||
| analysis_files_to_add = [ | ||||||
| f | ||||||
| for f in ExportSelection()._list_analysis_files(key) | ||||||
| if any( | ||||||
| [ | ||||||
| nwb_file_name.split("_.nwb")[0] in f | ||||||
| for nwb_file_name in included_nwb_files | ||||||
| ] | ||||||
| ) | ||||||
| ] | ||||||
| restr_graph = self._add_externals_to_restr_graph( | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At minimum, it's not a rate limiting point in the export I ran, but good to keep in mind |
||||||
| restr_graph, | ||||||
| key, | ||||||
| raw_files=raw_files_to_add, | ||||||
| analysis_files=analysis_files_to_add, | ||||||
| ) | ||||||
|
|
||||||
| if cascade: | ||||||
| restr_graph.cascade() | ||||||
|
|
@@ -326,6 +411,7 @@ class Export(SpyglassMixin, dj.Computed): | |||||
| -> ExportSelection | ||||||
| --- | ||||||
| paper_id: varchar(32) | ||||||
| included_nwb_file_names = null: mediumblob # list of nwb files included in export | ||||||
| """ | ||||||
|
|
||||||
| # In order to get a many-to-one relationship btwn Selection and Export, | ||||||
|
|
@@ -350,18 +436,40 @@ class File(SpyglassMixin, dj.Part): | |||||
| file_path: varchar(255) | ||||||
| """ | ||||||
|
|
||||||
| def populate_paper(self, paper_id: Union[str, dict]): | ||||||
| def populate_paper( | ||||||
| self, | ||||||
| paper_id: Union[str, dict], | ||||||
| included_nwb_files=None, | ||||||
| n_processes=1, | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add mention of new args in docstring. I think anything with more than one arg needs the You're adding this new feature, and then disabling it by default?
Suggested change
|
||||||
| ): | ||||||
| """Populate Export for a given paper_id.""" | ||||||
| self.load_shared_schemas() | ||||||
| if isinstance(paper_id, dict): | ||||||
| paper_id = paper_id.get("paper_id") | ||||||
| self.populate(ExportSelection().paper_export_id(paper_id)) | ||||||
| global INCLUDED_NWB_FILES | ||||||
| INCLUDED_NWB_FILES = included_nwb_files # store in global variable | ||||||
|
||||||
| global N_PROCESSES | ||||||
| if n_processes < 1: | ||||||
| n_processes = 1 | ||||||
| elif n_processes > cpu_count(): | ||||||
| n_processes = cpu_count() | ||||||
| N_PROCESSES = n_processes | ||||||
|
||||||
| self.populate( | ||||||
| { | ||||||
| **ExportSelection().paper_export_id(paper_id), | ||||||
| } | ||||||
| ) | ||||||
|
|
||||||
| def make(self, key): | ||||||
| """Populate Export table with the latest export for a given paper.""" | ||||||
| logger.info(f"Populating Export for {key}") | ||||||
samuelbray32 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| paper_key = (ExportSelection & key).fetch("paper_id", as_dict=True)[0] | ||||||
| query = ExportSelection & paper_key | ||||||
|
|
||||||
| included_nwb_files = INCLUDED_NWB_FILES | ||||||
| # included_nwb_files = INCLUDED_NWB_FILES.copy() | ||||||
| # INCLUDED_NWB_FILES = None # reset global variable | ||||||
samuelbray32 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
|
|
||||||
| # Null insertion if export_id is not the maximum for the paper | ||||||
| all_export_ids = ExportSelection()._max_export_id(paper_key, True) | ||||||
| max_export_id = max(all_export_ids) | ||||||
|
|
@@ -384,27 +492,47 @@ def make(self, key): | |||||
| (self.Table & id_dict).delete_quick() | ||||||
| (self.Table & id_dict).delete_quick() | ||||||
|
|
||||||
| restr_graph = ExportSelection().get_restr_graph(paper_key) | ||||||
| logger.info(f"Generating export_id {key['export_id']}") | ||||||
samuelbray32 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||
| restr_graph = ExportSelection().get_restr_graph( | ||||||
| paper_key, included_nwb_files=included_nwb_files, verbose=True | ||||||
|
||||||
| paper_key, included_nwb_files=included_nwb_files, verbose=True | |
| paper_key, included_nwb_files=included_nwb_files, verbose=False |
samuelbray32 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
would this warning show multiple times? Repeated warnings can have the opposite effect of having the user ignore the statement alltogether. Flaw in the original design, but something that only occurred to me with the multithreading
Uh oh!
There was an error while loading. Please reload this page.