|
| 1 | +import csv |
| 2 | +import os |
| 3 | +import numpy as np |
| 4 | +from math import floor |
| 5 | +from smart_open import open |
| 6 | + |
| 7 | +import librosa |
| 8 | + |
| 9 | +from mirdata import download_utils, core, io |
| 10 | +from mirdata import jams_utils |
| 11 | + |
| 12 | +from typing import BinaryIO, Optional, Tuple |
| 13 | + |
| 14 | +NAME = "fma_keys" |
| 15 | + |
| 16 | +BIBTEX = """ |
| 17 | +We present a new expert-labeled dataset for the evaluation of key detection containing |
| 18 | +340 hours (5489 songs) of song-level key and mode annotations, spread across 17 genres. |
| 19 | +
|
| 20 | +For each song, we provide annotations for: |
| 21 | +* FMA track id |
| 22 | +* Spotify URI (when available) |
| 23 | +* Key and mode |
| 24 | +All the audio is collected in and distributed by the FMA dataset by Michael Defferrard, |
| 25 | +Kirell Benzi, Pierre Vandergheynst, and Xavier Bresson. |
| 26 | +
|
| 27 | +* The FMA metadata is made freely available for public use under a Creative Commons license |
| 28 | +* We do not hold the copyright on the audio and distribute it under the license chosen by the artist |
| 29 | +* The dataset is meant for research purposes |
| 30 | +
|
| 31 | +Please cite our work if you use our dataset: |
| 32 | + @inproceedings{ |
| 33 | + wong_fma_keys, |
| 34 | + title = {{FMAK}: A Dataset of Key and Mode Annotations for the Free Music Archive}, |
| 35 | + author = {Wong, Stella and Hernandez, Gandalf}, |
| 36 | + booktitle = {24th International Society for Music Information Retrieval Conference (ISMIR)}, |
| 37 | + year = {2023} |
| 38 | + } |
| 39 | + |
| 40 | +} |
| 41 | +""" |
| 42 | + |
| 43 | +LICENSE_INFO = "Creative Commons Attribution 4.0 International" |
| 44 | + |
| 45 | +INDEXES = { |
| 46 | + "default": "1.0", |
| 47 | + "test": "1.0", |
| 48 | + "1.0": core.Index(filename="fma_keys_index_1.0.json"), |
| 49 | +} |
| 50 | + |
| 51 | +# what the user will download |
| 52 | +REMOTES = { |
| 53 | + "metadata": download_utils.RemoteFileMetadata( |
| 54 | + filename="fma_keys_metadata.csv", |
| 55 | + url="https://zenodo.org/records/10719860/files/fma_keys_metadata.csv?download=1", |
| 56 | + checksum="d80a03bc8659edc60e335bd7f6bdf12a", |
| 57 | + ), |
| 58 | + "tracks-000-019": download_utils.RemoteFileMetadata( |
| 59 | + filename="000-019.zip", |
| 60 | + url="https://zenodo.org/records/10719860/files/000-019.zip?download=1", |
| 61 | + checksum="b86f6414820c1422b2c6cdf87be1ef3a", |
| 62 | + ), |
| 63 | + "tracks-020-039": download_utils.RemoteFileMetadata( |
| 64 | + filename="020-039.zip", |
| 65 | + url="https://zenodo.org/records/10719860/files/020-039.zip?download=1", |
| 66 | + checksum="a2da8377fdbc1d3a1f54dd60aa7b8f9b", |
| 67 | + ), |
| 68 | + "tracks-040-049": download_utils.RemoteFileMetadata( |
| 69 | + filename="040-049.zip", |
| 70 | + url="https://zenodo.org/records/10719860/files/040-049.zip?download=1", |
| 71 | + checksum="d70babe5f66bdf3e821c42a8b8aafb9b", |
| 72 | + ), |
| 73 | + "tracks-050-059": download_utils.RemoteFileMetadata( |
| 74 | + filename="050-059.zip", |
| 75 | + url="https://zenodo.org/records/10719860/files/050-059.zip?download=1", |
| 76 | + checksum="f53fcba704fce27e5c7f3ec2532dcb44", |
| 77 | + ), |
| 78 | + "tracks-060-069": download_utils.RemoteFileMetadata( |
| 79 | + filename="060-069.zip", |
| 80 | + url="https://zenodo.org/records/10719860/files/060-069.zip?download=1", |
| 81 | + checksum="1520f067d7caaf0813780ff69bc4ba85", |
| 82 | + ), |
| 83 | + "tracks-070-079": download_utils.RemoteFileMetadata( |
| 84 | + filename="070-079.zip", |
| 85 | + url="https://zenodo.org/records/10719860/files/070-079.zip?download=1", |
| 86 | + checksum="186643746fcb1f4722a28d3eb9c6b99c", |
| 87 | + ), |
| 88 | + "tracks-080-089": download_utils.RemoteFileMetadata( |
| 89 | + filename="080-089.zip", |
| 90 | + url="https://zenodo.org/records/10719860/files/080-089.zip?download=1", |
| 91 | + checksum="8cf882609fc2f301621c2e9f9da03214", |
| 92 | + ), |
| 93 | + "tracks-090-099": download_utils.RemoteFileMetadata( |
| 94 | + filename="090-099.zip", |
| 95 | + url="https://zenodo.org/records/10719860/files/090-099.zip?download=1", |
| 96 | + checksum="84f0f036e3778ffd97c10b591f803d06", |
| 97 | + ), |
| 98 | + "tracks-100-109": download_utils.RemoteFileMetadata( |
| 99 | + filename="100-109.zip", |
| 100 | + url="https://zenodo.org/records/10719860/files/100-109.zip?download=1", |
| 101 | + checksum="4a307f019d3354064814f05d1dffa1e2", |
| 102 | + ), |
| 103 | + "tracks-110-124": download_utils.RemoteFileMetadata( |
| 104 | + filename="110-124.zip", |
| 105 | + url="https://zenodo.org/records/10719860/files/110-124.zip?download=1", |
| 106 | + checksum="88d7dbcca82189ed75b7baa5aa132fc1", |
| 107 | + ), |
| 108 | +} |
| 109 | + |
| 110 | +KEY_MAP = { |
| 111 | + "C": 0, |
| 112 | + "C#": 1, |
| 113 | + "D": 2, |
| 114 | + "D#": 3, |
| 115 | + "E": 4, |
| 116 | + "F": 5, |
| 117 | + "F#": 6, |
| 118 | + "G": 7, |
| 119 | + "G#": 8, |
| 120 | + "A": 9, |
| 121 | + "Bb": 10, |
| 122 | + "B": 11, |
| 123 | +} |
| 124 | + |
| 125 | +MODE_MAP = {"minor": 0, "Major": 1} |
| 126 | + |
| 127 | + |
| 128 | +class Track(core.Track): |
| 129 | + """FMA Keys Track class |
| 130 | +
|
| 131 | + Args: |
| 132 | + track_id (str): track id of the track |
| 133 | +
|
| 134 | + Attributes: |
| 135 | + spotify_uri (str): Spotify URI if available |
| 136 | + key (str): path to the track's audio file |
| 137 | + mode (str): path to the track's audio file |
| 138 | + key_number (int): path to the track's audio file |
| 139 | + mode_number (int): path to the track's audio file |
| 140 | + audio (str): path to the track's audio file |
| 141 | + """ |
| 142 | + |
| 143 | + def __init__(self, track_id, data_home, dataset_name, index, metadata): |
| 144 | + super().__init__(track_id, data_home, dataset_name, index, metadata) |
| 145 | + |
| 146 | + self.audio_path = self.get_path("audio") |
| 147 | + |
| 148 | + @property |
| 149 | + def spotify_uri(self): |
| 150 | + return self._track_metadata.get("spotify_uri") |
| 151 | + |
| 152 | + @property |
| 153 | + def key(self): |
| 154 | + return self._track_metadata.get("key") |
| 155 | + |
| 156 | + @property |
| 157 | + def mode(self): |
| 158 | + return self._track_metadata.get("mode") |
| 159 | + |
| 160 | + @property |
| 161 | + def key_number(self): |
| 162 | + return self._track_metadata.get("key_number") |
| 163 | + |
| 164 | + @property |
| 165 | + def mode_number(self): |
| 166 | + return self._track_metadata.get("mode_number") |
| 167 | + |
| 168 | + @property |
| 169 | + def audio(self) -> Optional[Tuple[np.ndarray, float]]: |
| 170 | + """audio |
| 171 | +
|
| 172 | + Returns: |
| 173 | + * np.ndarray - audio signal |
| 174 | + * float - sample rate |
| 175 | +
|
| 176 | + """ |
| 177 | + return load_audio(self.audio_path) |
| 178 | + |
| 179 | + def to_jams(self): |
| 180 | + """Get the track's data in jams format |
| 181 | +
|
| 182 | + Returns: |
| 183 | + jams.JAMS: the track's data in jams format |
| 184 | +
|
| 185 | + """ |
| 186 | + return jams_utils.jams_converter( |
| 187 | + metadata=self._track_metadata, |
| 188 | + ) |
| 189 | + |
| 190 | + |
| 191 | +@core.docstring_inherit(core.Dataset) |
| 192 | +class Dataset(core.Dataset): |
| 193 | + """ |
| 194 | + The FMA Keys dataset |
| 195 | + """ |
| 196 | + |
| 197 | + def __init__(self, data_home=None, version="default"): |
| 198 | + super().__init__( |
| 199 | + data_home, |
| 200 | + version, |
| 201 | + name="fma_keys", |
| 202 | + track_class=Track, |
| 203 | + bibtex=BIBTEX, |
| 204 | + indexes=INDEXES, |
| 205 | + remotes=REMOTES, |
| 206 | + license_info=LICENSE_INFO, |
| 207 | + ) |
| 208 | + |
| 209 | + def _track_to_dict(self, t): |
| 210 | + key_and_mode = t["key_and_mode"].split(" ") |
| 211 | + |
| 212 | + return { |
| 213 | + "spotify_uri": t["spotify_uri"], |
| 214 | + "key": key_and_mode[0], |
| 215 | + "mode": key_and_mode[1], |
| 216 | + "key_number": KEY_MAP[key_and_mode[0]], |
| 217 | + "mode_number": MODE_MAP[key_and_mode[1]], |
| 218 | + } |
| 219 | + |
| 220 | + @core.cached_property |
| 221 | + def _metadata(self): |
| 222 | + metadata_path = os.path.join(self.data_home, "fma_keys_metadata.csv") |
| 223 | + |
| 224 | + metadata_index = {} |
| 225 | + try: |
| 226 | + with open(metadata_path) as f: |
| 227 | + metadata_index = { |
| 228 | + t["track_id"]: self._track_to_dict(t) for t in csv.DictReader(f) |
| 229 | + } |
| 230 | + |
| 231 | + except FileNotFoundError: |
| 232 | + raise FileNotFoundError("Metadata not found. Did you run .download()?") |
| 233 | + |
| 234 | + return metadata_index |
| 235 | + |
| 236 | + |
| 237 | +@io.coerce_to_bytes_io |
| 238 | +def load_audio(fhandle: BinaryIO) -> Tuple[np.ndarray, float]: |
| 239 | + """Load vocadito vocal audio |
| 240 | +
|
| 241 | + Args: |
| 242 | + fhandle (str or file-like): File-like object or path to audio file |
| 243 | +
|
| 244 | + Returns: |
| 245 | + * np.ndarray - audio signal |
| 246 | + * float - sample rate |
| 247 | +
|
| 248 | + """ |
| 249 | + # librosa has problems reading FMA mp3s without clamping down to the second. |
| 250 | + duration = librosa.get_duration(path=fhandle) |
| 251 | + return librosa.load(fhandle, sr=None, mono=True, duration=floor(duration)) |
0 commit comments