-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathspotify_fetch_artist_top_tracks.py
More file actions
122 lines (87 loc) · 3.63 KB
/
spotify_fetch_artist_top_tracks.py
File metadata and controls
122 lines (87 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from typing import Any, Dict, List
import pandas as pd
import spotipy
import streamlit as st
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials
from scrapers.st_utils import StProgress
st.title("Spotify artist top tracks downloader")
DATA_URL = "./artists.csv"
def load_data(nrows: int) -> pd.DataFrame:
data = pd.read_csv(
DATA_URL,
nrows=nrows,
)
return data
data_load_state = st.text("Loading data...")
data = load_data(10000)
data = data[data["spotify_id"].notnull()]
data_load_state.text(f"Loaded {len(data)} not null artists")
st.subheader("Raw data, len: " + str(len(data)))
st.write(data["spotify_id"].head())
@st.cache(allow_output_mutation=True)
def spotify_client() -> spotipy.client.Spotify:
load_dotenv()
return spotipy.Spotify(auth_manager=SpotifyClientCredentials())
@st.cache(suppress_st_warning=True, allow_output_mutation=True)
def artist_top_tracks(sp_artist_id: str) -> Dict[str, Any]:
sp = spotify_client()
results = sp.artist_top_tracks(sp_artist_id, country="RU")
return results
st.subheader("Топ треков артиста по sp_id")
test_artist_sp_id = st.text_input("", "")
if test_artist_sp_id != "":
st.write(artist_top_tracks(test_artist_sp_id))
@st.cache(suppress_st_warning=True, allow_output_mutation=True)
def get_album(sp_album_id: str) -> Dict[str, Any]:
sp = spotify_client()
return sp.album(sp_album_id)
# @st.cache(suppress_st_warning=True, allow_output_mutation=True)
def get_artists_top_tracks(artists_sp_ids: List[str]) -> pd.DataFrame:
sp_artists_top_tracks_data = {
"artist_spotify_id": [],
"name": [],
"spotify_id": [],
"duration_ms": [],
"explicit": [],
"popularity": [],
"album_type": [],
"album_name": [],
"album_spotify_id": [],
"release_date": [],
"album_popularity": [],
}
curr_artist = st.text("")
for artist_sp_id in StProgress(
artists_sp_ids, title=f"Обкачиваем {len(artists_sp_ids)} артистов"
):
curr_artist.text = "curr artist " + artist_sp_id
try:
sp_artist_top_tracks = artist_top_tracks(artist_sp_id)
for track in sp_artist_top_tracks["tracks"]:
sp_album = get_album(track["album"]["id"])
sp_artist_top_tracks_data = {
"artist_spotify_id": artist_sp_id,
"name": track["name"],
"spotify_id": track["id"],
"duration_ms": track["duration_ms"],
"explicit": track["explicit"],
"popularity": track["popularity"],
"album_type": track["album"]["album_type"],
"album_name": track["album"]["name"],
"album_spotify_id": track["album"]["id"],
"release_date": track["album"]["release_date"],
"album_popularity": sp_album["popularity"],
}
for k in sp_artists_top_tracks_data.keys():
sp_artists_top_tracks_data[k].append(sp_artist_top_tracks_data[k])
except Exception as e:
st.write(e)
st.text("Some errors on processing artists on " + artist_sp_id)
break
return pd.DataFrame(sp_artists_top_tracks_data)
sp_artists_top_tracks_data = get_artists_top_tracks(data["spotify_id"])
st.subheader("Итого")
st.write(sp_artists_top_tracks_data.head(50))
sp_artists_top_tracks_data.to_csv("../data/intermediate/artist_top_tracks.csv", index=False)
st.subheader(f"Кол-во треков: {len(sp_artists_top_tracks_data)}")