-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmusic-archive-creator.py
170 lines (145 loc) · 5.94 KB
/
music-archive-creator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import spotipy
import spotipy.util as util
import os
import logging
import logging.handlers
from datetime import date
from googlesearch import search
import eyed3
import os
from difflib import SequenceMatcher
# get track name similarity
def similar(a, b):
return SequenceMatcher(None, a, b).ratio()
# retrieve tracks
def show_tracks(results):
df = pd.DataFrame(columns=['track_artist', 'track_name'])
for j, item in enumerate(results['items']):
track = item['track']
print(
" %d %32.32s %s" %
(j, track['artists'][0]['name'], track['name']))
df = df.append(
{ 'track_artist': track['artists'][0]['name'], 'track_name':
track['name'] },
ignore_index=True)
return df
# get url from beatport
def getURL(query):
query = "Beatport " + query
urls = []
for url in search(query, tld="com", lang="en", num=1, stop=3, pause=2):
urls.append(url)
return urls
# get playlist detail from spotify for specific user and playlist
def get_playlist(user, playlist_name=None):
df2 = pd.DataFrame()
token = util.prompt_for_user_token(user)
if token:
sp = spotipy.Spotify(auth=token)
playlists = sp.user_playlists(user)
for playlist in playlists['items']:
if playlist['owner']['id'] == user:
if playlist['name'] == playlist_name:
logging.info('Playlist name: {0}'.format(playlist['name']))
logging.info('Total tracks: {0}'.
format(playlist['tracks']['total']))
results = sp.playlist(playlist['id'],
fields="tracks,next")
tracks = results['tracks']
while tracks['next']:
tracks = sp.next(tracks)
df1 = show_tracks(tracks)
df2 = pd.concat([df1, df2])
return df2
else:
logging.debug("Can't get token for {0}".format(username))
return None
# get track details from beatport
def get_song_details(url):
html_doc = requests.get(url[0])
if html_doc.ok:
try:
soup = BeautifulSoup(html_doc.text, 'html.parser')
genre = soup.findAll(
'li', { "class":
"interior-track-content-item interior-track-genre" })
genre = genre[0].a.string
# get bpm
bpm = soup.find(
'li',
{ "class": "interior-track-content-item interior-track-bpm" })
bpm = float(bpm.text.strip().split("BPM", 1)[1][1:])
# get key
key = soup.find(
'li',
{ "class": "interior-track-content-item interior-track-key" })
key = key.text.strip().split("Key", 1)[1][1:]
# get name
name = soup.find('div', { "class": "interior-title" })
remixed = name.contents[3].contents[0]
name = name.contents[1].contents[0]
return genre, key, name, remixed, bpm
except Exception as e:
logging.debug("Invalid link")
if __name__ == '__main__':
logger = logging.getLogger(__name__)
# set log level
logger.setLevel(logging.WARNING)
path = "C://Users//batuhan.organ//Desktop//Music/spotify-techno/"
run_date = date.today()
username = "koftezzz"
track_list = get_playlist(username, playlist_name="Proper t")
track_list['genre'] = ""
track_list['name'] = ""
track_list['remixed'] = ""
track_list['key'] = ""
track_list['bpm'] = ""
for i in range(len(track_list)):
track = track_list.iloc[i, 0] + " - " + track_list.iloc[i, 1]
logging.info('Trying song: {0}'.format(track))
logging.info('{0} out of {1}'.format(len(track_list)))
url_list = getURL(query=track)
result = get_song_details(url_list)
if result:
track_list.loc[i, 'genre'] = result[0]
track_list.loc[i, 'key'] = result[1]
track_list.loc[i, 'name'] = result[2]
track_list.loc[i, 'remixed'] = result[3]
track_list.loc[i, 'bpm'] = result[4]
else:
logging.info('{0}'.format(result))
time.sleep(2)
track_list.to_csv('./tracks/song-details_' + run_date + '.csv')
# update downloaded songs details
mp3_list = []
for file in os.listdir(path):
if file.endswith(".mp3"):
mp3_list.append(file)
track_list['full_name'] = track_list['artist'] + " - " + track_list[
'songTitle']
for i in range(len(mp3_list)):
audio_path = mp3_list[i]
track_list['name_similarity'] = track_list['full_name'].apply(lambda x:
similar(
x,
audio_path))
max_index = track_list['name_similarity'].idxmax()
max_row = track_list.iloc[[max_index]].reset_index()
if ~max_row.name.isna()[0]:
if max_row['name_similarity'][0] > 0.4:
audiofile = eyed3.load(os.path.join(path, audio_path)).tag
audiofile.setArtist = max_row.artist[0]
audiofile.setTitle = max_row.songTitle[0]
audiofile.genre = max_row.genre[0]
audiofile.bpm = int(max_row.bpm[0])
audiofile.save()
else:
print(audio_path)
logging.info("Max similarity for the track {0} tracks is {"
"1}".format(audio_path,
max_row.name_similarity))