Skip to content

Commit bdc6a0b

Browse files
authored
Add gzip JSON support and test (#992)
* Add gzip JSON support and test * Fix lint issues * Fix TypeError issues * Fix Exception issues * Fix Lint and TypeError issue * fix lint issues * Fix lint issues * Fix Studyset issues * enhance gzip test * delete test_gz.py * refactor: standardise JSON loading with load_json utility * add import json * fix lint issues and add import json * Add import json
1 parent 2002031 commit bdc6a0b

8 files changed

Lines changed: 35 additions & 32 deletions

File tree

examples/01_datasets/06_plot_dataset_json.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,12 +133,11 @@
133133
import json
134134
import os
135135

136-
from nimare.utils import get_resource_path
136+
from nimare.utils import get_resource_path, load_json
137137

138138
dset_file = os.path.join(get_resource_path(), "nidm_pain_dset.json")
139139

140-
with open(dset_file, "r") as f_obj:
141-
data = json.load(f_obj)
140+
data = load_json(dset_file)
142141

143142
###############################################################################
144143
# Example of accessing coordinates for a study

nimare/dataset.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import copy
44
import inspect
5-
import json
65
import logging
76
import os.path as op
87
import warnings
@@ -23,6 +22,7 @@
2322
_validate_images_df,
2423
get_masker,
2524
get_template,
25+
load_json,
2626
mm2vox,
2727
)
2828

@@ -79,12 +79,11 @@ class Dataset(NiMAREBase):
7979

8080
def __init__(self, source, target="mni152_2mm", mask=None):
8181
if isinstance(source, str):
82-
with open(source, "r") as f_obj:
83-
data = json.load(f_obj)
82+
data = load_json(source)
8483
elif isinstance(source, dict):
8584
data = source
8685
else:
87-
raise Exception("`source` needs to be a file path or a dictionary")
86+
raise TypeError("`source` needs to be a file path or a dictionary")
8887

8988
# Datasets are organized by study, then experiment
9089
# To generate unique IDs, we combine study ID with experiment ID

nimare/extract/extract.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""Tools for downloading datasets."""
22

33
import itertools
4-
import json
54
import logging
65
import os
76
import os.path as op
@@ -22,7 +21,7 @@
2221
_get_dataset_dir,
2322
_longify,
2423
)
25-
from nimare.utils import get_resource_path
24+
from nimare.utils import get_resource_path, load_json
2625

2726
LGR = logging.getLogger(__name__)
2827

@@ -61,8 +60,7 @@ def _find_entities(filename, search_pairs, log=False):
6160
def _fetch_database(search_pairs, database_url, out_dir, overwrite=False):
6261
"""Fetch generic database."""
6362
res_dir = get_resource_path()
64-
with open(op.join(res_dir, "database_file_manifest.json"), "r") as fo:
65-
database_file_manifest = json.load(fo)
63+
database_file_manifest = load_json(op.join(res_dir, "database_file_manifest.json"))
6664

6765
out_dir = op.abspath(out_dir)
6866
os.makedirs(out_dir, exist_ok=True)

nimare/nimads.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import os
77
import weakref
88
from copy import deepcopy
9+
from pathlib import Path
910

1011
import numpy as np
1112
import pandas as pd
@@ -18,6 +19,7 @@
1819
_mask_img_to_bool,
1920
_validate_df,
2021
_validate_images_df,
22+
load_json,
2123
mm2vox,
2224
)
2325

@@ -28,6 +30,9 @@
2830

2931
def _validate_studyset_source(source):
3032
"""Validate the minimal schema required to construct a Studyset."""
33+
if isinstance(source, (str, Path)):
34+
return
35+
3136
if not isinstance(source, dict):
3237
raise InvalidStudysetError("Studyset source must be a dictionary or JSON path")
3338

@@ -163,8 +168,7 @@ def __init__(
163168

164169
# load source as json
165170
if isinstance(source, str):
166-
with open(source, "r+") as f:
167-
source = json.load(f)
171+
source = load_json(source)
168172

169173
_validate_studyset_source(source)
170174

@@ -350,8 +354,7 @@ def _coerce_annotation(self, annotation):
350354
if isinstance(annotation, dict):
351355
return Annotation(annotation, self)
352356
if isinstance(annotation, str):
353-
with open(annotation, "r+") as f:
354-
return Annotation(json.load(f), self)
357+
return Annotation(load_json(annotation), self)
355358
if isinstance(annotation, Annotation):
356359
return annotation
357360
raise TypeError(f"Unsupported annotation type: {type(annotation)}")
@@ -863,8 +866,7 @@ def from_table_cache(
863866
@classmethod
864867
def from_nimads(cls, filename):
865868
"""Create a Studyset from a NIMADS JSON file."""
866-
with open(filename, "r+") as fn:
867-
nimads = json.load(fn)
869+
nimads = load_json(filename)
868870

869871
return cls(nimads)
870872

nimare/tests/conftest.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""Generate fixtures for tests."""
22

33
import copy
4-
import json
54
import os
65
from shutil import copyfile
76

@@ -15,7 +14,7 @@
1514
import nimare
1615
from nimare.generate import create_coordinate_dataset
1716
from nimare.tests.utils import get_test_data_path
18-
from nimare.utils import get_resource_path
17+
from nimare.utils import get_resource_path, load_json
1918

2019
# Only enable the following once in a while for a check for SettingWithCopyWarnings
2120
# pd.options.mode.chained_assignment = "raise"
@@ -197,8 +196,7 @@ def sample_size_nimads_studyset():
197196
response = request("GET", url)
198197
with open(out_file, "wb") as f:
199198
f.write(response.content)
200-
with open(out_file, "r") as f:
201-
studyset = json.load(f)
199+
studyset = load_json(out_file)
202200
return studyset
203201

204202

@@ -211,8 +209,7 @@ def _example_nimads_studyset_data():
211209
response = request("GET", url)
212210
with open(out_file, "wb") as f:
213211
f.write(response.content)
214-
with open(out_file, "r") as f:
215-
studyset = json.load(f)
212+
studyset = load_json(out_file)
216213
return studyset
217214

218215

@@ -231,8 +228,7 @@ def example_nimads_annotation():
231228
response = request("GET", url)
232229
with open(out_file, "wb") as f:
233230
f.write(response.content)
234-
with open(out_file, "r") as f:
235-
annotation = json.load(f)
231+
annotation = load_json(out_file)
236232
return annotation
237233

238234

nimare/tests/test_dataset.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""Test nimare.dataset (Dataset IO/transformations)."""
22

33
import copy
4-
import json
54
import os.path as op
65
import pickle
76
import warnings
@@ -14,6 +13,7 @@
1413
from nimare import dataset
1514
from nimare.nimads import Studyset
1615
from nimare.tests.utils import get_test_data_path
16+
from nimare.utils import load_json
1717

1818
# ---------------------------------------------------------------------------
1919
# Helpers for the parameterized smoke test
@@ -188,8 +188,7 @@ def test_empty_dset():
188188
def test_posneg_warning():
189189
"""Smoke test for nimare.dataset.Dataset initialization with positive and negative z_stat."""
190190
db_file = op.join(get_test_data_path(), "neurosynth_dset.json")
191-
with open(db_file, "r") as f_obj:
192-
data = json.load(f_obj)
191+
data = load_json(db_file)
193192

194193
data_pos_zstats = copy.deepcopy(data)
195194
data_neg_zstats = copy.deepcopy(data)

nimare/tests/test_nimads.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from nimare import nimads
1313
from nimare.dataset import Dataset
1414
from nimare.tests.utils import get_test_data_path
15+
from nimare.utils import load_json
1516

1617

1718
def test_load_nimads(example_nimads_studyset, example_nimads_annotation):
@@ -336,8 +337,7 @@ def test_studyset_to_nimads(example_nimads_studyset):
336337
assert os.path.exists(tmp_path)
337338

338339
# Verify the saved file can be loaded
339-
with open(tmp_path, "r") as f:
340-
saved_data = json.load(f)
340+
saved_data = load_json(tmp_path)
341341

342342
assert saved_data["id"] == studyset.id
343343
assert saved_data["name"] == studyset.name

nimare/utils.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Utility functions for NiMARE."""
22

33
import datetime
4+
import gzip
45
import inspect
56
import json
67
import logging
@@ -62,6 +63,16 @@ def get_resource_path():
6263
return op.abspath(op.join(op.dirname(__file__), "resources") + op.sep)
6364

6465

66+
def load_json(source):
67+
"""Load a JSON file (plain or gzipped) and return parsed content."""
68+
source_str = str(source)
69+
if source_str.endswith(".gz"):
70+
with gzip.open(source, "rt", encoding="utf-8") as f_obj:
71+
return json.load(f_obj)
72+
with open(source, "r", encoding="utf-8") as f_obj:
73+
return json.load(f_obj)
74+
75+
6576
def get_template(space="mni152_2mm", mask=None):
6677
"""Load template file.
6778
@@ -1315,8 +1326,7 @@ def load_nimads(studyset, annotation=None):
13151326
if isinstance(studyset, dict):
13161327
studyset = Studyset(studyset)
13171328
elif isinstance(studyset, str):
1318-
with open(studyset, "r") as f:
1319-
studyset = Studyset(json.load(f))
1329+
studyset = Studyset(load_json(studyset))
13201330
elif isinstance(studyset, Studyset):
13211331
pass
13221332
else:

0 commit comments

Comments
 (0)