1
- """Example Dataset Loader
1
+ """Example Dataset Loader.
2
2
3
3
.. admonition:: Dataset Info
4
4
:class: dropdown
11
11
4. Describe the type of music included in the dataset
12
12
5. Indicate any relevant papers related to the dataset
13
13
6. Include a description about how the data can be accessed and the license it uses (if applicable)
14
-
15
14
"""
16
15
import csv
17
16
import json
22
21
# -- example imports you won't use
23
22
import librosa
24
23
import numpy as np
25
- from smart_open import open # if you use the open function, make sure you include this line!
24
+ from smart_open import (
25
+ open , # if you use the open function, make sure you include this line!
26
+ )
26
27
27
- from mirdata import download_utils , jams_utils , core , annotations
28
+ from mirdata import annotations , core , download_utils , jams_utils
28
29
29
30
# -- Add any relevant citations here
30
31
BIBTEX = """
54
55
"default" : "1.0" ,
55
56
"test" : "sample" ,
56
57
"1.0" : core .Index (filename = "example_index_1.0.json" ),
57
- "sample" : core .Index (filename = "example_index_sample.json" )
58
+ "sample" : core .Index (filename = "example_index_sample.json" ),
58
59
}
59
60
60
61
# -- REMOTES is a dictionary containing all files that need to be downloaded.
61
62
# -- The keys should be descriptive (e.g. 'annotations', 'audio').
62
63
# -- When having data that can be partially downloaded, remember to set up
63
64
# -- correctly destination_dir to download the files following the correct structure.
64
65
REMOTES = {
65
- ' remote_data' : download_utils .RemoteFileMetadata (
66
- filename = ' a_zip_file.zip' ,
67
- url = ' http://website/hosting/the/zipfile.zip' ,
68
- checksum = ' 00000000000000000000000000000000' , # -- the md5 checksum
69
- destination_dir = ' path/to/unzip' # -- relative path for where to unzip the data, or None
66
+ " remote_data" : download_utils .RemoteFileMetadata (
67
+ filename = " a_zip_file.zip" ,
68
+ url = " http://website/hosting/the/zipfile.zip" ,
69
+ checksum = " 00000000000000000000000000000000" , # -- the md5 checksum
70
+ destination_dir = " path/to/unzip" , # -- relative path for where to unzip the data, or None
70
71
),
71
72
}
72
73
@@ -102,8 +103,8 @@ class Track(core.Track):
102
103
annotation (EventData): a description of this annotation
103
104
104
105
"""
106
+
105
107
def __init__ (self , track_id , data_home , dataset_name , index , metadata ):
106
-
107
108
# -- this sets the following attributes:
108
109
# -- * track_id
109
110
# -- * _dataset_name
@@ -117,7 +118,7 @@ def __init__(self, track_id, data_home, dataset_name, index, metadata):
117
118
index = index ,
118
119
metadata = metadata ,
119
120
)
120
-
121
+
121
122
# -- add any dataset specific attributes here
122
123
self .audio_path = self .get_path ("audio" )
123
124
self .annotation_path = self .get_path ("annotation" )
@@ -146,12 +147,11 @@ def annotation(self) -> Optional[annotations.EventData]:
146
147
# -- any memory heavy information (like audio) properties
147
148
@property
148
149
def audio (self ) -> Optional [Tuple [np .ndarray , float ]]:
149
- """The track's audio
150
+ """The track's audio.
150
151
151
152
Returns:
152
153
* np.ndarray - audio signal
153
154
* float - sample rate
154
-
155
155
"""
156
156
return load_audio (self .audio_path )
157
157
@@ -172,7 +172,7 @@ def to_jams(self):
172
172
# -- if the dataset contains multitracks, you can define a MultiTrack similar to a Track
173
173
# -- you can delete the block of code below if the dataset has no multitracks
174
174
class MultiTrack (core .MultiTrack ):
175
- """Example multitrack class
175
+ """Example multitrack class.
176
176
177
177
Args:
178
178
mtrack_id (str): multitrack id
@@ -188,11 +188,9 @@ class MultiTrack(core.MultiTrack):
188
188
189
189
Cached Properties:
190
190
annotation (EventData): a description of this annotation
191
-
192
191
"""
193
- def __init__ (
194
- self , mtrack_id , data_home , dataset_name , index , track_class , metadata
195
- ):
192
+
193
+ def __init__ (self , mtrack_id , data_home , dataset_name , index , track_class , metadata ):
196
194
# -- this sets the following attributes:
197
195
# -- * mtrack_id
198
196
# -- * _dataset_name
@@ -232,12 +230,11 @@ def annotation(self) -> Optional[annotations.EventData]:
232
230
233
231
@property
234
232
def audio (self ) -> Optional [Tuple [np .ndarray , float ]]:
235
- """The track's audio
233
+ """The track's audio.
236
234
237
235
Returns:
238
236
* np.ndarray - audio signal
239
237
* float - sample rate
240
-
241
238
"""
242
239
return load_audio (self .audio_path )
243
240
@@ -247,16 +244,15 @@ def to_jams(self):
247
244
"""Jams: the track's data in jams format"""
248
245
return jams_utils .jams_converter (
249
246
audio_path = self .mix_path ,
250
- annotation_data = [(self .annotation , None )],
251
- ...
247
+ chord_data = [(self .annotation , None )],
252
248
)
253
249
# -- see the documentation for `jams_utils.jams_converter for all fields
254
250
255
251
256
252
# -- this decorator allows this function to take a string or an open bytes file as input
257
253
# -- and in either case converts it to an open file handle.
258
254
# -- It also checks if the file exists
259
- # -- and, if None is passed, None will be returned
255
+ # -- and, if None is passed, None will be returned
260
256
@io .coerce_to_bytes_io
261
257
def load_audio (fhandle : BinaryIO ) -> Tuple [np .ndarray , float ]:
262
258
"""Load a Example audio file.
@@ -267,7 +263,6 @@ def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
267
263
Returns:
268
264
* np.ndarray - the audio signal
269
265
* float - The sample rate of the audio file
270
-
271
266
"""
272
267
# -- for example, the code below. This should be dataset specific!
273
268
# -- By default we load to mono
@@ -277,15 +272,15 @@ def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
277
272
278
273
# -- Write any necessary loader functions for loading the dataset's data
279
274
275
+
280
276
# -- this decorator allows this function to take a string or an open file as input
281
277
# -- and in either case converts it to an open file handle.
282
278
# -- It also checks if the file exists
283
- # -- and, if None is passed, None will be returned
279
+ # -- and, if None is passed, None will be returned
284
280
@io .coerce_to_string_io
285
281
def load_annotation (fhandle : TextIO ) -> Optional [annotations .EventData ]:
286
-
287
282
# -- because of the decorator, the file is already open
288
- reader = csv .reader (fhandle , delimiter = ' ' )
283
+ reader = csv .reader (fhandle , delimiter = " " )
289
284
intervals = []
290
285
annotation = []
291
286
for line in reader :
@@ -295,16 +290,14 @@ def load_annotation(fhandle: TextIO) -> Optional[annotations.EventData]:
295
290
# there are several annotation types in annotations.py
296
291
# They should be initialized with data, followed by their units
297
292
# see annotations.py for a complete list of types and units.
298
- annotation_data = annotations .EventData (
299
- np .array (intervals ), "s" , np .array (annotation ), "open"
300
- )
293
+ annotation_data = annotations .EventData (np .array (intervals ), "s" , np .array (annotation ), "open" )
301
294
return annotation_data
302
295
296
+
303
297
# -- use this decorator so the docs are complete
304
298
@core .docstring_inherit (core .Dataset )
305
299
class Dataset (core .Dataset ):
306
- """The Example dataset
307
- """
300
+ """The Example dataset."""
308
301
309
302
def __init__ (self , data_home = None , version = "default" ):
310
303
super ().__init__ (
@@ -320,40 +313,36 @@ def __init__(self, data_home=None, version="default"):
320
313
)
321
314
322
315
# -- if your dataset has a top-level metadata file, write a loader for it here
323
- # -- you do not have to include this function if there is no metadata
316
+ # -- you do not have to include this function if there is no metadata
324
317
@core .cached_property
325
318
def _metadata (self ):
326
- metadata_path = os .path .join (self .data_home , ' example_metadata.csv' )
319
+ metadata_path = os .path .join (self .data_home , " example_metadata.csv" )
327
320
328
321
# load metadata however makes sense for your dataset
329
- metadata_path = os .path .join (data_home , ' example_metadata.json' )
330
- with open (metadata_path , 'r' ) as fhandle :
322
+ metadata_path = os .path .join (data_home , " example_metadata.json" )
323
+ with open (metadata_path , "r" ) as fhandle :
331
324
metadata = json .load (fhandle )
332
325
333
326
return metadata
334
327
335
328
# -- if your dataset needs to overwrite the default download logic, do it here.
336
329
# -- this function is usually not necessary unless you need very custom download logic
337
- def download (
338
- self , partial_download = None , force_overwrite = False , cleanup = False
339
- ):
340
- """Download the dataset
330
+ def download (self , partial_download = None , force_overwrite = False , cleanup = False ):
331
+ """Download the dataset.
341
332
342
333
Args:
343
334
partial_download (list or None):
344
335
A list of keys of remotes to partially download.
345
336
If None, all data is downloaded
346
337
force_overwrite (bool):
347
- If True, existing files are overwritten by the downloaded files.
338
+ If True, existing files are overwritten by the downloaded files.
348
339
cleanup (bool):
349
340
Whether to delete any zip/tar files after extracting.
350
341
351
342
Raises:
352
343
ValueError: if invalid keys are passed to partial_download
353
344
IOError: if a downloaded file's checksum is different from expected
354
-
355
345
"""
356
346
# see download_utils.downloader for basic usage - if you only need to call downloader
357
347
# once, you do not need this function at all.
358
348
# only write a custom function if you need it!
359
-
0 commit comments