1- """Example Dataset Loader
1+ """Example Dataset Loader.
22
33.. admonition:: Dataset Info
44 :class: dropdown
1111 4. Describe the type of music included in the dataset
1212 5. Indicate any relevant papers related to the dataset
1313 6. Include a description about how the data can be accessed and the license it uses (if applicable)
14-
1514"""
1615import csv
1716import json
2221# -- example imports you won't use
2322import librosa
2423import numpy as np
25- from smart_open import open # if you use the open function, make sure you include this line!
24+ from smart_open import (
25+ open , # if you use the open function, make sure you include this line!
26+ )
2627
27- from mirdata import download_utils , jams_utils , core , annotations
28+ from mirdata import annotations , core , download_utils , jams_utils
2829
2930# -- Add any relevant citations here
3031BIBTEX = """
5455 "default" : "1.0" ,
5556 "test" : "sample" ,
5657 "1.0" : core .Index (filename = "example_index_1.0.json" ),
57- "sample" : core .Index (filename = "example_index_sample.json" )
58+ "sample" : core .Index (filename = "example_index_sample.json" ),
5859}
5960
6061# -- REMOTES is a dictionary containing all files that need to be downloaded.
6162# -- The keys should be descriptive (e.g. 'annotations', 'audio').
6263# -- When having data that can be partially downloaded, remember to set up
6364# -- correctly destination_dir to download the files following the correct structure.
6465REMOTES = {
65- ' remote_data' : download_utils .RemoteFileMetadata (
66- filename = ' a_zip_file.zip' ,
67- url = ' http://website/hosting/the/zipfile.zip' ,
68- checksum = ' 00000000000000000000000000000000' , # -- the md5 checksum
69- destination_dir = ' path/to/unzip' # -- relative path for where to unzip the data, or None
66+ " remote_data" : download_utils .RemoteFileMetadata (
67+ filename = " a_zip_file.zip" ,
68+ url = " http://website/hosting/the/zipfile.zip" ,
69+ checksum = " 00000000000000000000000000000000" , # -- the md5 checksum
70+ destination_dir = " path/to/unzip" , # -- relative path for where to unzip the data, or None
7071 ),
7172}
7273
@@ -102,8 +103,8 @@ class Track(core.Track):
102103 annotation (EventData): a description of this annotation
103104
104105 """
106+
105107 def __init__ (self , track_id , data_home , dataset_name , index , metadata ):
106-
107108 # -- this sets the following attributes:
108109 # -- * track_id
109110 # -- * _dataset_name
@@ -117,7 +118,7 @@ def __init__(self, track_id, data_home, dataset_name, index, metadata):
117118 index = index ,
118119 metadata = metadata ,
119120 )
120-
121+
121122 # -- add any dataset specific attributes here
122123 self .audio_path = self .get_path ("audio" )
123124 self .annotation_path = self .get_path ("annotation" )
@@ -146,12 +147,11 @@ def annotation(self) -> Optional[annotations.EventData]:
146147 # -- any memory heavy information (like audio) properties
147148 @property
148149 def audio (self ) -> Optional [Tuple [np .ndarray , float ]]:
149- """The track's audio
150+ """The track's audio.
150151
151152 Returns:
152153 * np.ndarray - audio signal
153154 * float - sample rate
154-
155155 """
156156 return load_audio (self .audio_path )
157157
@@ -172,7 +172,7 @@ def to_jams(self):
172172# -- if the dataset contains multitracks, you can define a MultiTrack similar to a Track
173173# -- you can delete the block of code below if the dataset has no multitracks
174174class MultiTrack (core .MultiTrack ):
175- """Example multitrack class
175+ """Example multitrack class.
176176
177177 Args:
178178 mtrack_id (str): multitrack id
@@ -188,11 +188,9 @@ class MultiTrack(core.MultiTrack):
188188
189189 Cached Properties:
190190 annotation (EventData): a description of this annotation
191-
192191 """
193- def __init__ (
194- self , mtrack_id , data_home , dataset_name , index , track_class , metadata
195- ):
192+
193+ def __init__ (self , mtrack_id , data_home , dataset_name , index , track_class , metadata ):
196194 # -- this sets the following attributes:
197195 # -- * mtrack_id
198196 # -- * _dataset_name
@@ -232,12 +230,11 @@ def annotation(self) -> Optional[annotations.EventData]:
232230
233231 @property
234232 def audio (self ) -> Optional [Tuple [np .ndarray , float ]]:
235- """The track's audio
233+ """The track's audio.
236234
237235 Returns:
238236 * np.ndarray - audio signal
239237 * float - sample rate
240-
241238 """
242239 return load_audio (self .audio_path )
243240
@@ -247,16 +244,15 @@ def to_jams(self):
247244 """Jams: the track's data in jams format"""
248245 return jams_utils .jams_converter (
249246 audio_path = self .mix_path ,
250- annotation_data = [(self .annotation , None )],
251- ...
247+ chord_data = [(self .annotation , None )],
252248 )
253249 # -- see the documentation for `jams_utils.jams_converter for all fields
254250
255251
256252# -- this decorator allows this function to take a string or an open bytes file as input
257253# -- and in either case converts it to an open file handle.
258254# -- It also checks if the file exists
259- # -- and, if None is passed, None will be returned
255+ # -- and, if None is passed, None will be returned
260256@io .coerce_to_bytes_io
261257def load_audio (fhandle : BinaryIO ) -> Tuple [np .ndarray , float ]:
262258 """Load a Example audio file.
@@ -267,7 +263,6 @@ def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
267263 Returns:
268264 * np.ndarray - the audio signal
269265 * float - The sample rate of the audio file
270-
271266 """
272267 # -- for example, the code below. This should be dataset specific!
273268 # -- By default we load to mono
@@ -277,15 +272,15 @@ def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]:
277272
278273# -- Write any necessary loader functions for loading the dataset's data
279274
275+
280276# -- this decorator allows this function to take a string or an open file as input
281277# -- and in either case converts it to an open file handle.
282278# -- It also checks if the file exists
283- # -- and, if None is passed, None will be returned
279+ # -- and, if None is passed, None will be returned
284280@io .coerce_to_string_io
285281def load_annotation (fhandle : TextIO ) -> Optional [annotations .EventData ]:
286-
287282 # -- because of the decorator, the file is already open
288- reader = csv .reader (fhandle , delimiter = ' ' )
283+ reader = csv .reader (fhandle , delimiter = " " )
289284 intervals = []
290285 annotation = []
291286 for line in reader :
@@ -295,16 +290,14 @@ def load_annotation(fhandle: TextIO) -> Optional[annotations.EventData]:
295290 # there are several annotation types in annotations.py
296291 # They should be initialized with data, followed by their units
297292 # see annotations.py for a complete list of types and units.
298- annotation_data = annotations .EventData (
299- np .array (intervals ), "s" , np .array (annotation ), "open"
300- )
293+ annotation_data = annotations .EventData (np .array (intervals ), "s" , np .array (annotation ), "open" )
301294 return annotation_data
302295
296+
303297# -- use this decorator so the docs are complete
304298@core .docstring_inherit (core .Dataset )
305299class Dataset (core .Dataset ):
306- """The Example dataset
307- """
300+ """The Example dataset."""
308301
309302 def __init__ (self , data_home = None , version = "default" ):
310303 super ().__init__ (
@@ -320,40 +313,36 @@ def __init__(self, data_home=None, version="default"):
320313 )
321314
322315 # -- if your dataset has a top-level metadata file, write a loader for it here
323- # -- you do not have to include this function if there is no metadata
316+ # -- you do not have to include this function if there is no metadata
324317 @core .cached_property
325318 def _metadata (self ):
326- metadata_path = os .path .join (self .data_home , ' example_metadata.csv' )
319+ metadata_path = os .path .join (self .data_home , " example_metadata.csv" )
327320
328321 # load metadata however makes sense for your dataset
329- metadata_path = os .path .join (data_home , ' example_metadata.json' )
330- with open (metadata_path , 'r' ) as fhandle :
322+ metadata_path = os .path .join (data_home , " example_metadata.json" )
323+ with open (metadata_path , "r" ) as fhandle :
331324 metadata = json .load (fhandle )
332325
333326 return metadata
334327
335328 # -- if your dataset needs to overwrite the default download logic, do it here.
336329 # -- this function is usually not necessary unless you need very custom download logic
337- def download (
338- self , partial_download = None , force_overwrite = False , cleanup = False
339- ):
340- """Download the dataset
330+ def download (self , partial_download = None , force_overwrite = False , cleanup = False ):
331+ """Download the dataset.
341332
342333 Args:
343334 partial_download (list or None):
344335 A list of keys of remotes to partially download.
345336 If None, all data is downloaded
346337 force_overwrite (bool):
347- If True, existing files are overwritten by the downloaded files.
338+ If True, existing files are overwritten by the downloaded files.
348339 cleanup (bool):
349340 Whether to delete any zip/tar files after extracting.
350341
351342 Raises:
352343 ValueError: if invalid keys are passed to partial_download
353344 IOError: if a downloaded file's checksum is different from expected
354-
355345 """
356346 # see download_utils.downloader for basic usage - if you only need to call downloader
357347 # once, you do not need this function at all.
358348 # only write a custom function if you need it!
359-
0 commit comments