Skip to content

Commit 1fdacad

Browse files
committed
pre-commit run --all
1 parent c35d322 commit 1fdacad

File tree

186 files changed

+2785
-4440
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

186 files changed

+2785
-4440
lines changed

docs/source/contributing_examples/example.py

+33-44
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Example Dataset Loader
1+
"""Example Dataset Loader.
22
33
.. admonition:: Dataset Info
44
:class: dropdown
@@ -11,7 +11,6 @@
1111
4. Describe the type of music included in the dataset
1212
5. Indicate any relevant papers related to the dataset
1313
6. Include a description about how the data can be accessed and the license it uses (if applicable)
14-
1514
"""
1615
import csv
1716
import json
@@ -22,9 +21,11 @@
2221
# -- example imports you won't use
2322
import librosa
2423
import numpy as np
25-
from smart_open import open # if you use the open function, make sure you include this line!
24+
from smart_open import (
25+
open, # if you use the open function, make sure you include this line!
26+
)
2627

27-
from mirdata import download_utils, jams_utils, core, annotations
28+
from mirdata import annotations, core, download_utils, jams_utils
2829

2930
# -- Add any relevant citations here
3031
BIBTEX = """
@@ -54,19 +55,19 @@
5455
"default": "1.0",
5556
"test": "sample",
5657
"1.0": core.Index(filename="example_index_1.0.json"),
57-
"sample": core.Index(filename="example_index_sample.json")
58+
"sample": core.Index(filename="example_index_sample.json"),
5859
}
5960

6061
# -- REMOTES is a dictionary containing all files that need to be downloaded.
6162
# -- The keys should be descriptive (e.g. 'annotations', 'audio').
6263
# -- When having data that can be partially downloaded, remember to set up
6364
# -- correctly destination_dir to download the files following the correct structure.
6465
REMOTES = {
65-
'remote_data': download_utils.RemoteFileMetadata(
66-
filename='a_zip_file.zip',
67-
url='http://website/hosting/the/zipfile.zip',
68-
checksum='00000000000000000000000000000000', # -- the md5 checksum
69-
destination_dir='path/to/unzip' # -- relative path for where to unzip the data, or None
66+
"remote_data": download_utils.RemoteFileMetadata(
67+
filename="a_zip_file.zip",
68+
url="http://website/hosting/the/zipfile.zip",
69+
checksum="00000000000000000000000000000000", # -- the md5 checksum
70+
destination_dir="path/to/unzip", # -- relative path for where to unzip the data, or None
7071
),
7172
}
7273

@@ -102,8 +103,8 @@ class Track(core.Track):
102103
annotation (EventData): a description of this annotation
103104
104105
"""
106+
105107
def __init__(self, track_id, data_home, dataset_name, index, metadata):
106-
107108
# -- this sets the following attributes:
108109
# -- * track_id
109110
# -- * _dataset_name
@@ -117,7 +118,7 @@ def __init__(self, track_id, data_home, dataset_name, index, metadata):
117118
index=index,
118119
metadata=metadata,
119120
)
120-
121+
121122
# -- add any dataset specific attributes here
122123
self.audio_path = self.get_path("audio")
123124
self.annotation_path = self.get_path("annotation")
@@ -146,12 +147,11 @@ def annotation(self) -> Optional[annotations.EventData]:
146147
# -- any memory heavy information (like audio) properties
147148
@property
148149
def audio(self) -> Optional[Tuple[np.ndarray, float]]:
149-
"""The track's audio
150+
"""The track's audio.
150151
151152
Returns:
152153
* np.ndarray - audio signal
153154
* float - sample rate
154-
155155
"""
156156
return load_audio(self.audio_path)
157157

@@ -172,7 +172,7 @@ def to_jams(self):
172172
# -- if the dataset contains multitracks, you can define a MultiTrack similar to a Track
173173
# -- you can delete the block of code below if the dataset has no multitracks
174174
class MultiTrack(core.MultiTrack):
175-
"""Example multitrack class
175+
"""Example multitrack class.
176176
177177
Args:
178178
mtrack_id (str): multitrack id
@@ -188,11 +188,9 @@ class MultiTrack(core.MultiTrack):
188188
189189
Cached Properties:
190190
annotation (EventData): a description of this annotation
191-
192191
"""
193-
def __init__(
194-
self, mtrack_id, data_home, dataset_name, index, track_class, metadata
195-
):
192+
193+
def __init__(self, mtrack_id, data_home, dataset_name, index, track_class, metadata):
196194
# -- this sets the following attributes:
197195
# -- * mtrack_id
198196
# -- * _dataset_name
@@ -232,12 +230,11 @@ def annotation(self) -> Optional[annotations.EventData]:
232230

233231
@property
234232
def audio(self) -> Optional[Tuple[np.ndarray, float]]:
235-
"""The track's audio
233+
"""The track's audio.
236234
237235
Returns:
238236
* np.ndarray - audio signal
239237
* float - sample rate
240-
241238
"""
242239
return load_audio(self.audio_path)
243240

@@ -247,16 +244,15 @@ def to_jams(self):
247244
"""Jams: the track's data in jams format"""
248245
return jams_utils.jams_converter(
249246
audio_path=self.mix_path,
250-
annotation_data=[(self.annotation, None)],
251-
...
247+
chord_data=[(self.annotation, None)],
252248
)
253249
# -- see the documentation for `jams_utils.jams_converter for all fields
254250

255251

256252
# -- this decorator allows this function to take a string or an open bytes file as input
257253
# -- and in either case converts it to an open file handle.
258254
# -- It also checks if the file exists
259-
# -- and, if None is passed, None will be returned
255+
# -- and, if None is passed, None will be returned
260256
@io.coerce_to_bytes_io
261257
def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
262258
"""Load a Example audio file.
@@ -267,7 +263,6 @@ def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
267263
Returns:
268264
* np.ndarray - the audio signal
269265
* float - The sample rate of the audio file
270-
271266
"""
272267
# -- for example, the code below. This should be dataset specific!
273268
# -- By default we load to mono
@@ -277,15 +272,15 @@ def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
277272

278273
# -- Write any necessary loader functions for loading the dataset's data
279274

275+
280276
# -- this decorator allows this function to take a string or an open file as input
281277
# -- and in either case converts it to an open file handle.
282278
# -- It also checks if the file exists
283-
# -- and, if None is passed, None will be returned
279+
# -- and, if None is passed, None will be returned
284280
@io.coerce_to_string_io
285281
def load_annotation(fhandle: TextIO) -> Optional[annotations.EventData]:
286-
287282
# -- because of the decorator, the file is already open
288-
reader = csv.reader(fhandle, delimiter=' ')
283+
reader = csv.reader(fhandle, delimiter=" ")
289284
intervals = []
290285
annotation = []
291286
for line in reader:
@@ -295,16 +290,14 @@ def load_annotation(fhandle: TextIO) -> Optional[annotations.EventData]:
295290
# there are several annotation types in annotations.py
296291
# They should be initialized with data, followed by their units
297292
# see annotations.py for a complete list of types and units.
298-
annotation_data = annotations.EventData(
299-
np.array(intervals), "s", np.array(annotation), "open"
300-
)
293+
annotation_data = annotations.EventData(np.array(intervals), "s", np.array(annotation), "open")
301294
return annotation_data
302295

296+
303297
# -- use this decorator so the docs are complete
304298
@core.docstring_inherit(core.Dataset)
305299
class Dataset(core.Dataset):
306-
"""The Example dataset
307-
"""
300+
"""The Example dataset."""
308301

309302
def __init__(self, data_home=None, version="default"):
310303
super().__init__(
@@ -320,40 +313,36 @@ def __init__(self, data_home=None, version="default"):
320313
)
321314

322315
# -- if your dataset has a top-level metadata file, write a loader for it here
323-
# -- you do not have to include this function if there is no metadata
316+
# -- you do not have to include this function if there is no metadata
324317
@core.cached_property
325318
def _metadata(self):
326-
metadata_path = os.path.join(self.data_home, 'example_metadata.csv')
319+
metadata_path = os.path.join(self.data_home, "example_metadata.csv")
327320

328321
# load metadata however makes sense for your dataset
329-
metadata_path = os.path.join(data_home, 'example_metadata.json')
330-
with open(metadata_path, 'r') as fhandle:
322+
metadata_path = os.path.join(data_home, "example_metadata.json")
323+
with open(metadata_path, "r") as fhandle:
331324
metadata = json.load(fhandle)
332325

333326
return metadata
334327

335328
# -- if your dataset needs to overwrite the default download logic, do it here.
336329
# -- this function is usually not necessary unless you need very custom download logic
337-
def download(
338-
self, partial_download=None, force_overwrite=False, cleanup=False
339-
):
340-
"""Download the dataset
330+
def download(self, partial_download=None, force_overwrite=False, cleanup=False):
331+
"""Download the dataset.
341332
342333
Args:
343334
partial_download (list or None):
344335
A list of keys of remotes to partially download.
345336
If None, all data is downloaded
346337
force_overwrite (bool):
347-
If True, existing files are overwritten by the downloaded files.
338+
If True, existing files are overwritten by the downloaded files.
348339
cleanup (bool):
349340
Whether to delete any zip/tar files after extracting.
350341
351342
Raises:
352343
ValueError: if invalid keys are passed to partial_download
353344
IOError: if a downloaded file's checksum is different from expected
354-
355345
"""
356346
# see download_utils.downloader for basic usage - if you only need to call downloader
357347
# once, you do not need this function at all.
358348
# only write a custom function if you need it!
359-

docs/source/contributing_examples/make_example_index.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import glob
33
import json
44
import os
5+
56
from mirdata.validate import md5
67

78
DATASET_INDEX_PATH = "../mirdata/datasets/indexes/dataset_index.json"
@@ -19,9 +20,7 @@ def make_dataset_index(dataset_data_path):
1920
# top-key level tracks
2021
index_tracks = {}
2122
for track_id in track_ids:
22-
audio_checksum = md5(
23-
os.path.join(dataset_data_path, "Wavfile/{}.wav".format(track_id))
24-
)
23+
audio_checksum = md5(os.path.join(dataset_data_path, "Wavfile/{}.wav".format(track_id)))
2524
annotation_checksum = md5(
2625
os.path.join(dataset_data_path, "annotation/{}.lab".format(track_id))
2726
)
@@ -48,8 +47,6 @@ def main(args):
4847

4948
if __name__ == "__main__":
5049
PARSER = argparse.ArgumentParser(description="Make dataset index file.")
51-
PARSER.add_argument(
52-
"dataset_data_path", type=str, help="Path to dataset data folder."
53-
)
50+
PARSER.add_argument("dataset_data_path", type=str, help="Path to dataset data folder.")
5451

5552
main(PARSER.parse_args())

docs/source/contributing_examples/test_example.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
"""Tests for example dataset
2-
"""
1+
"""Tests for example dataset."""
32
import numpy as np
43
import pytest
54

@@ -37,7 +36,6 @@ def test_track():
3736

3837

3938
def test_to_jams():
40-
4139
default_trackid = "some_id"
4240
data_home = "tests/resources/mir_datasets/dataset"
4341
dataset = example.Dataset(data_home, version="test")
@@ -73,4 +71,3 @@ def test_metadata():
7371
dataset = example.Dataset(data_home, version="test")
7472
metadata = dataset._metadata
7573
assert metadata["some_id"] == "something"
76-

mirdata/__init__.py

+3-8
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,13 @@
44

55
from .version import version as __version__
66

7-
87
DATASETS = [
9-
d.name
10-
for d in pkgutil.iter_modules(
11-
[os.path.dirname(os.path.abspath(__file__)) + "/datasets"]
12-
)
8+
d.name for d in pkgutil.iter_modules([os.path.dirname(os.path.abspath(__file__)) + "/datasets"])
139
]
1410

1511

1612
def list_datasets():
17-
"""Get a list of all mirdata dataset names
13+
"""Get a list of all mirdata dataset names.
1814
1915
Returns:
2016
list: list of dataset names as strings
@@ -23,7 +19,7 @@ def list_datasets():
2319

2420

2521
def initialize(dataset_name, data_home=None, version="default"):
26-
"""Load a mirdata dataset by name
22+
"""Load a mirdata dataset by name.
2723
2824
Example:
2925
.. code-block:: python
@@ -45,7 +41,6 @@ def initialize(dataset_name, data_home=None, version="default"):
4541
4642
Returns:
4743
Dataset: a mirdata.core.Dataset object
48-
4944
"""
5045
if dataset_name not in DATASETS:
5146
raise ValueError("Invalid dataset {}".format(dataset_name))

0 commit comments

Comments
 (0)