Skip to content

Commit 58d46e1

Browse files
committed
tools to check table disc usage
1 parent 727ef79 commit 58d46e1

File tree

2 files changed

+61
-0
lines changed

2 files changed

+61
-0
lines changed

src/spyglass/utils/dj_helper_fn.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,38 @@ def make_file_obj_id_unique(nwb_path: str):
559559
return new_id
560560

561561

562+
def _quick_get_analysis_path(file):
563+
"""Get the absolute path to an analysis file on disc without integrity checks.
564+
For use when scanning large number of files.
565+
566+
Parameters
567+
----------
568+
file : str
569+
Name of the analysis file to get the path for.
570+
Returns
571+
-------
572+
str
573+
Absolute path to the analysis file. Returns None if the file is not found locally.
574+
"""
575+
from spyglass.common import AnalysisNwbfile
576+
from spyglass.settings import sg_config
577+
578+
analysis_dir = sg_config.analysis_dir
579+
if os.path.exists(path := f"{analysis_dir}/{file}"):
580+
return path
581+
582+
folder = "_".join(file.split("_")[:-1])
583+
path = f"{analysis_dir}/{folder}/{file}"
584+
if os.path.exists(path):
585+
return path
586+
path = AnalysisNwbfile().get_abs_path(file)
587+
if os.path.exists(path):
588+
return path
589+
590+
print(f"File {file} not found in {analysis_dir}")
591+
return None
592+
593+
562594
def populate_pass_function(value):
563595
"""Pass function for parallel populate.
564596
@@ -612,3 +644,4 @@ def str_to_bool(value) -> bool:
612644
if not value:
613645
return False
614646
return str(value).lower() in ("y", "yes", "t", "true", "on", "1")
647+
return str(value).lower() in ("y", "yes", "t", "true", "on", "1")

src/spyglass/utils/dj_mixin.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
import os
12
from contextlib import nullcontext
23
from functools import cached_property
34
from time import time
45
from typing import List
56

67
import datajoint as dj
8+
import humanize
79
from datajoint.condition import make_condition
810
from datajoint.errors import DataJointError
911
from datajoint.expression import QueryExpression
@@ -16,6 +18,7 @@
1618
from spyglass.utils.database_settings import SHARED_MODULES
1719
from spyglass.utils.dj_helper_fn import (
1820
NonDaemonPool,
21+
_quick_get_analysis_path,
1922
ensure_names,
2023
fetch_nwb,
2124
get_nwb_table,
@@ -863,6 +866,31 @@ def check_threads(self, detailed=False, all_threads=False) -> DataFrame:
863866

864867
return df
865868

869+
# --------------------------- Check disc usage ------------------------------
870+
def get_table_storage_usage(self):
871+
"""Total size of all analysis files in the table.
872+
Uses the analysis_file_name field to find the file paths and sum their
873+
sizes.
874+
875+
Returns
876+
-------
877+
tuple
878+
(human-readable string, total size in bytes)
879+
"""
880+
if "analysis_file_name" not in self.heading.names:
881+
logger.warning(
882+
f"{self.full_table_name} does not have an analysis_file_name field."
883+
)
884+
return "0 Mib", 0
885+
file_names = self.fetch("analysis_file_name")
886+
file_paths = [
887+
_quick_get_analysis_path(file_name) for file_name in file_names
888+
]
889+
file_paths = [path for path in file_paths if path is not None]
890+
file_sizes = [os.stat(path).st_size for path in file_paths]
891+
total_size = sum(file_sizes)
892+
return humanize.naturalsize(total_size, binary=True), total_size
893+
866894

867895
class SpyglassMixinPart(SpyglassMixin, dj.Part):
868896
"""

0 commit comments

Comments
 (0)