Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
ded2544
fallback to chunked entry insert in _log_fetch
samuelbray32 Aug 27, 2025
2d57e64
fallback to chunked entry insert in _log_fetch
samuelbray32 Aug 27, 2025
02fb80c
cleanup code
samuelbray32 Aug 27, 2025
f34a079
cleanup and performance fix
samuelbray32 Aug 28, 2025
eab1f33
Apply suggestion from @CBroz1
samuelbray32 Aug 28, 2025
c809db7
fix restr_str in _log_fetch_nwb
samuelbray32 Sep 5, 2025
342bc97
log proper restrictions for projected table
samuelbray32 Sep 8, 2025
346f17a
handle case where no entries and key recording
samuelbray32 Sep 8, 2025
e9d0b34
update tests
samuelbray32 Sep 8, 2025
e9051a5
methods for graph intersection
samuelbray32 Sep 10, 2025
8d2929b
add nwb list restriction to export
samuelbray32 Sep 11, 2025
0363267
clear export cache in fixture to ensure logged in each analysis
samuelbray32 Sep 12, 2025
f6fe1d0
test results of compound restriction logging
samuelbray32 Sep 12, 2025
4dad0c2
spelling and reduce log calls
samuelbray32 Sep 12, 2025
cb03db4
Merge branch 'master' into export_barriers
samuelbray32 Sep 12, 2025
8bf01ff
multiprocessing for linked file scanning
samuelbray32 Sep 12, 2025
4f9dc92
add test for export nwb file intersection
samuelbray32 Sep 12, 2025
78f3af5
make chunking of restriction key entries recursive to prevent error
samuelbray32 Sep 12, 2025
3fef2b0
condense selection table restrictions prior to restriction graph
samuelbray32 Sep 12, 2025
41d6177
allow intersection of un-cascaded self graph
samuelbray32 Sep 15, 2025
3b958e6
efficiency improvements
samuelbray32 Sep 15, 2025
e58f2bf
enforce string restrictions on intersect results
samuelbray32 Sep 16, 2025
e6fca26
utility function for editing existing hdmf dataset type
samuelbray32 Sep 18, 2025
9947282
add parallelization
samuelbray32 Sep 19, 2025
ac8ce65
add parallelization
samuelbray32 Sep 19, 2025
11d7349
fix pandas table id issue in dandi updater
samuelbray32 Sep 23, 2025
8dfe2c6
remove franklab specific note from RawPosition
samuelbray32 Sep 24, 2025
126a028
suggestions from review
samuelbray32 Oct 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions src/spyglass/common/common_behav.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,16 +169,6 @@ def get_epoch_num(name: str) -> int:

@schema
class RawPosition(SpyglassMixin, dj.Imported):
"""

Notes
-----
The position timestamps come from: .pos_cameraHWSync.dat.
If PTP is not used, the position timestamps are inferred by finding the
closest timestamps from the neural recording via the trodes time.

"""

definition = """
-> PositionSource
"""
Expand Down
91 changes: 77 additions & 14 deletions src/spyglass/common/common_dandi.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,18 @@ def compile_dandiset(
dandi_api_key: Optional[str] = None,
dandi_instance: Optional[str] = "dandi",
skip_raw_files: Optional[bool] = False,
n_compile_processes: Optional[int] = 1,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like it's safe to assume a user will want n_processes to match across these cases, no? No harm in parsing them out, but I think tighter signatures are more likely to be leveraged

n_upload_processes: Optional[int] = None,
n_organize_processes: Optional[int] = None,
n_validate_processes: Optional[int] = 1,
):
"""Compile a Dandiset from the export.
Parameters
----------
key : dict
ExportSelection key
dandiset_id : str
Dandiset ID generated by the user on the dadndi server
Dandiset ID generated by the user on the dandi server
dandi_api_key : str, optional
API key for the dandi server. Optional if the environment variable
DANDI_API_KEY is set.
Expand Down Expand Up @@ -162,19 +166,44 @@ def compile_dandiset(
)

os.makedirs(destination_dir, exist_ok=False)
for file in source_files:
if os.path.exists(f"{destination_dir}/{os.path.basename(file)}"):
continue
if skip_raw_files and raw_dir in file:
continue
# copy the file if it has external links so can be safely edited
if nwb_has_external_links(file):
shutil.copy(file, f"{destination_dir}/{os.path.basename(file)}")
else:
os.symlink(file, f"{destination_dir}/{os.path.basename(file)}")
# for file in source_files:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can remove?

# if os.path.exists(f"{destination_dir}/{os.path.basename(file)}"):
# continue
# if skip_raw_files and raw_dir in file:
# continue
# # copy the file if it has external links so can be safely edited
# if nwb_has_external_links(file):
# shutil.copy(file, f"{destination_dir}/{os.path.basename(file)}")
# else:
# os.symlink(file, f"{destination_dir}/{os.path.basename(file)}")
logger.info(
f"Compiling dandiset in {destination_dir} from {len(source_files)} files"
)
if n_compile_processes == 1:
for file in source_files:
_make_file_in_dandi_dir(file, destination_dir, skip_raw_files)
else:
from multiprocessing import Pool

print(
f"Using multiprocessing to compile dandi export. {n_compile_processes} processes"
)
with Pool(processes=n_compile_processes) as pool:
pool.starmap(
_make_file_in_dandi_dir,
[
(file, destination_dir, skip_raw_files)
for file in source_files
],
)

# validate the dandiset
validate_dandiset(destination_dir, ignore_external_files=True)
logger.info("Validating dandiset before organization")
validate_dandiset(
destination_dir,
ignore_external_files=True,
n_processes=n_validate_processes,
)

# given dandiset_id, download the dandiset to the export_dir
url = (
Expand All @@ -184,24 +213,28 @@ def compile_dandiset(
dandi.download.download(url, output_dir=paper_dir)

# organize the files in the dandiset directory
logger.info("Organizing dandiset")
dandi.organize.organize(
destination_dir,
dandiset_dir,
update_external_file_paths=True,
invalid=OrganizeInvalid.FAIL,
media_files_mode=CopyMode.SYMLINK,
files_mode=FileOperationMode.COPY,
jobs=n_organize_processes,
)

# get the dandi name translations
translations = lookup_dandi_translation(destination_dir, dandiset_dir)

# upload the dandiset to the dandi server
logger.info("Uploading dandiset")
if dandi_api_key:
os.environ["DANDI_API_KEY"] = dandi_api_key
dandi.upload.upload(
[dandiset_dir],
dandi_instance=dandi_instance,
jobs=n_upload_processes,
)
logger.info(f"Dandiset {dandiset_id} uploaded")
# insert the translations into the dandi table
Expand Down Expand Up @@ -239,6 +272,18 @@ def write_mysqldump(self, export_key: dict):
sql_dump.write_mysqldump([self & key], file_suffix="_dandi")


def _make_file_in_dandi_dir(file, destination_dir, skip_raw_files):
if os.path.exists(f"{destination_dir}/{os.path.basename(file)}"):
return
if skip_raw_files and raw_dir in file:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, raw_dir refers to the path fetched from spyglass.settings, yeah? Not now, but I think it's worth adopting a caps convention for those to make it clear they're constants, and not missing func args

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To confirm, you're saying use this for import
from spyglass.settings import raw_dir as RAW_DIR

an propagate changes accordingly

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm saying that a future PR should edit settings to raw_dir -> RAW_DIR for the whole codebase. This is fine for now

return
# copy the file if it has external links so can be safely edited
if nwb_has_external_links(file):
shutil.copy(file, f"{destination_dir}/{os.path.basename(file)}")
else:
os.symlink(file, f"{destination_dir}/{os.path.basename(file)}")


def _get_metadata(path):
# taken from definition within dandi.organize.organize
try:
Expand Down Expand Up @@ -314,7 +359,7 @@ def lookup_dandi_translation(source_dir: str, dandiset_dir: str):


def validate_dandiset(
folder, min_severity="ERROR", ignore_external_files=False
folder, min_severity="ERROR", ignore_external_files=False, n_processes=1
):
"""Validate the dandiset directory

Expand All @@ -329,7 +374,21 @@ def validate_dandiset(
whether to ignore external file errors. Used if validating
before the organize step
"""
validator_result = dandi.validate.validate(folder)
if n_processes == 1:
validator_result = dandi.validate.validate(folder)
else:
from multiprocessing import Pool

from dandi.files import find_dandi_files

files_to_validate = [x.filepath for x in find_dandi_files(folder)]

print(
f"Using multiprocessing to validate dandi export. {n_processes} processes"
)
with Pool(processes=n_processes) as pool:
per_file_results = pool.map(validate_1, files_to_validate)
validator_result = [item for sub in per_file_results for item in sub]
min_severity_value = Severity[min_severity].value

filtered_results = [
Expand All @@ -356,3 +415,7 @@ def validate_dandiset(
]
)
)


def validate_1(path):
return list(dandi.validate.validate(path))
Loading
Loading