-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHome.py
395 lines (306 loc) · 16.8 KB
/
Home.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
import datetime
import streamlit as st
import io
import plotly.express as px
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from streamlit_extras.metric_cards import style_metric_cards
from streamlit_extras.chart_container import chart_container
from streamlit_extras.switch_page_button import switch_page
from streamlit_extras.app_logo import add_logo
from prophet import Prophet
from channelDataExtraction import getChannelData
from channelVideoDataExtraction import *
########################################################################################################################
# FUNCTIONS
########################################################################################################################
@st.cache_data
def download_data(api_key, channel_id):
channel_details = getChannelData(api_key, channel_id)
# check if bad channel id
if channel_details is None:
return None, None, None, None
videos = getVideoList(api_key, channel_details["uploads"])
videos_df = pd.DataFrame(videos)
video_ids = [video['id'] for video in videos if video['id'] is not None]
all_video_data = buildVideoListDataframe(api_key, video_ids)
st.session_state.start_index = 0
st.session_state.end_index = 10
st.session_state['video_id'] = None
st.session_state.all_video_df = all_video_data
st.session_state.api_key = st.session_state.API_KEY
return channel_details, videos, all_video_data, videos_df
def display_video_list(video_data, start_index, end_index, search_query=None):
"""Displays a list of videos in a tabular format with custom column order and buttons."""
# Input widget for searching videos by title
if search_query is None:
search_query = ""
new_search_query = st.text_input("Search Videos by Title", search_query)
# Initialize start_index and end_index in session_state
if 'start_index' not in st.session_state:
st.session_state.start_index = start_index
if 'end_index' not in st.session_state:
st.session_state.end_index = end_index
# If a new search query is entered, reset the start and end indices
if new_search_query != search_query:
st.session_state.start_index = start_index
st.session_state.end_index = end_index
# Filter videos based on the search query across the entire video_data list
filtered_videos = [video for video in video_data if new_search_query.lower() in video['title'].lower()]
# Paginate the filtered results
paginated_videos = filtered_videos[st.session_state.start_index:st.session_state.end_index]
for video in paginated_videos:
col1, col2, col3, col4 = st.columns(4)
with col1:
st.image(video['thumbnail'])
with col2:
st.write(video['id'])
with col3:
st.write(video['title'])
with col4:
video_stats = st.button("Check Video Statistics", key=video['id'])
if video_stats:
st.session_state['video_id'] = video['id']
switch_page("video_data")
# Display a button to load the next 10 search results
if st.session_state.end_index < len(filtered_videos):
if st.button('Load next 10 videos', key='load_next'):
st.session_state.start_index = st.session_state.end_index
st.session_state.end_index += 10
########################################################################################################################
# MAIN PAGE CONFIGURATION
########################################################################################################################
st.set_page_config(page_title="Youtube Channel Analytics Dashboard",
page_icon="📊",
layout="wide")
########################################################################################################################
# SIDE BAR CONFIGURATION
########################################################################################################################
st.title("YouTube Analytics Dashboard")
# Sidebar
st.sidebar.title("Settings")
# Sidebar: Enter Channel ID and YouTube API Key
if 'API_KEY' not in st.session_state:
st.session_state.API_KEY = ""
if 'CHANNEL_ID' not in st.session_state:
st.session_state.CHANNEL_ID = ""
st.session_state.API_KEY = st.sidebar.text_input("Enter your YouTube API Key", st.session_state.API_KEY,
type="password")
st.session_state.CHANNEL_ID = st.sidebar.text_input("Enter the YouTube Channel ID", st.session_state.CHANNEL_ID)
if not st.session_state.API_KEY or not st.session_state.CHANNEL_ID:
st.warning("Please enter your API Key and Channel ID.")
# Display the GitHub link for the user manual
user_manual_link = "https://github.com/zainmz/Youtube-Channel-Analytics-Dashboard"
st.markdown(f"If you need help, please refer to the the GitHub Repository for the [User Manual]({user_manual_link}).")
st.stop()
# Data Refresh Button
refresh_button = st.sidebar.button("Refresh Data")
# First Data Load
channel_details, videos, all_video_data, videos_df = download_data(st.session_state.API_KEY, st.session_state.CHANNEL_ID)
if channel_details is None:
st.warning("Invalid YouTube Channel ID. Please check and enter a valid Channel ID.")
st.stop()
if refresh_button:
with st.spinner("Refreshing data..."):
channel_details, videos, all_video_data, videos_df = download_data(st.session_state.API_KEY, st.session_state.CHANNEL_ID)
if channel_details is None:
st.warning("Invalid YouTube Channel ID. Please check and enter a valid Channel ID.")
st.stop()
# Data Filters for fine-tuned data selection
st.sidebar.title("Data Filters")
num_videos = st.sidebar.slider("Select Number of Top Videos to Display:", 1, 50, 10)
# Convert the 'published_date' column to datetime format
all_video_data['published_date'] = pd.to_datetime(all_video_data['published_date'])
# Extract min and max publish dates
min_date = all_video_data['published_date'].min().date() # Ensure it's a date object
max_date = all_video_data['published_date'].max().date() # Ensure it's a date object
# Sidebar date input
start_date = st.sidebar.date_input("Select Start Date", min_date)
end_date = st.sidebar.date_input("Select End Date", max_date)
if start_date > end_date:
st.sidebar.warning("Start date should be earlier than end date.")
st.stop()
tag_search = st.sidebar.text_input("Search Videos by Tag")
date_range_start = pd.Timestamp(start_date)
date_range_end = pd.Timestamp(end_date)
filtered_data = all_video_data[(all_video_data['published_date'] >= date_range_start) &
(all_video_data['published_date'] <= date_range_end)]
if tag_search:
filtered_data = filtered_data[filtered_data['tags'].apply(lambda x: tag_search in x)]
########################################################################################################################
# CHANNEL DETAILS AREA CONFIGURATION
########################################################################################################################
# Display channel details
st.header("Channel Details", divider="green")
col1, col2, col3 = st.columns(3)
with col1:
channel_thumbnail = channel_details['thumbnail']
add_logo(channel_thumbnail, height=300)
view_count = int(channel_details['viewCount'])
subscriber_count = int(channel_details['subscriberCount'])
# Format view count and subscriber count with commas
view_count_formatted = "{:,}".format(view_count)
subscriber_count_formatted = "{:,}".format(subscriber_count)
st.markdown(f"**Channel Title:** {channel_details['title']}")
st.markdown(f"**Channel Description:** {channel_details['description']}")
with col3:
# Go to Channel Button
st.link_button("Go to Channel", f"https://www.youtube.com/channel/{st.session_state.CHANNEL_ID}")
col1, col2, col3 = st.columns(3)
col1.metric("Total Views", view_count_formatted, "")
col2.metric("Subscribers", subscriber_count_formatted, "")
col3.metric("Total Videos", len(videos), "")
style_metric_cards(background_color="#000000",
border_left_color="#049204",
border_color="#0E0E0E"
)
########################################################################################################################
# TOP VIDEO GRAPHS AREA
########################################################################################################################
col1, col2, col3 = st.columns(3)
# Display statistical graphs for the top videos based on views
with col1:
st.subheader(f"Top {num_videos} Videos Based on Views")
sorted_video_data = filtered_data.sort_values(by='view_count', ascending=False)
# Get the top videos from the sorted DataFrame
top_views_df = sorted_video_data.head(num_videos)
with chart_container(top_views_df):
# Display statistical graphs for the top videos based on views
# Create a bar chart using Plotly
fig = px.bar(top_views_df, x='title', y='view_count')
# Update the layout to rename the axes
fig.update_layout(xaxis_title="Video Title",
yaxis_title="View Count")
fig.update_traces(marker_color='green')
# Display the bar chart in Streamlit
st.plotly_chart(fig, use_container_width=True)
with col2:
st.subheader(f"Top {num_videos} Videos Based on Likes")
sorted_video_data = filtered_data.sort_values(by='like_count', ascending=False)
# Get the top 10 liked videos from the sorted DataFrame
top_likes_df = sorted_video_data.head(num_videos)
with chart_container(top_likes_df):
# Display statistical graphs for the top 10 videos based on views
# Create a bar chart using Plotly
fig = px.bar(top_likes_df, x='title', y='like_count')
# Update the layout to rename the axes
fig.update_layout(xaxis_title="Video Title",
yaxis_title="Like Count")
fig.update_traces(marker_color='orange')
# Display the bar chart in Streamlit
st.plotly_chart(fig, use_container_width=True)
with col3:
st.subheader(f"Top {num_videos} Based on Comments")
sorted_video_data = filtered_data.sort_values(by='comment_count', ascending=False)
# Get the top 10 liked videos from the sorted DataFrame
top_comments_df = sorted_video_data.head(num_videos)
with chart_container(top_comments_df):
# Display statistical graphs for the top 10 videos based on views
# Create a bar chart using Plotly
fig = px.bar(top_comments_df, x='title', y='comment_count')
# Update the layout to rename the axes
fig.update_layout(xaxis_title="Video Title",
yaxis_title="Comment Count")
fig.update_traces(marker_color='green')
# Display the bar chart in Streamlit
st.plotly_chart(fig, use_container_width=True)
########################################################################################################################
# CHANNEL GROWTH STATS
########################################################################################################################
st.subheader("Viewership Growth Over Time", divider="green")
views = filtered_data['view_count']
dates = filtered_data['published_date']
# Creating a time series plot using Plotly
fig = go.Figure()
fig.add_trace(
go.Scatter(x=dates, y=views, mode='lines+markers', name='Views Over Time', line=dict(color='orange'))
)
fig.update_layout(title='Views Over Time',
xaxis_title='Published Date',
yaxis_title='Number of Views',
template="plotly_dark")
st.plotly_chart(fig, use_container_width=True)
st.subheader("Predicted Viewership Growth Over Time", divider="green")
with st.spinner("Predicting Views for the next Week"):
# Prepare dataframe for Prophet
forecast_df = all_video_data[['published_date', 'view_count']]
forecast_df.columns = ['ds', 'y']
# Initialize the Prophet model
model = Prophet(
yearly_seasonality=False,
weekly_seasonality=True,
daily_seasonality=True,
seasonality_mode='additive')
# Fit the model with the data
model.fit(forecast_df)
# Dataframe for future dates
future_dates = model.make_future_dataframe(periods=30)
# Predict views for the future dates
forecast = model.predict(future_dates)
# Plot the original data and the forecast
# Plotting using Plotly
# Filter the forecast dataframe to include only the forecasted period
forecasted_period = forecast[forecast['ds'] > forecast_df['ds'].max()]
# Plotting using Plotly
# Filter the forecast dataframe to include only the forecasted period
forecasted_period = forecast[forecast['ds'] > forecast_df['ds'].max()]
# Filter the original dataframe to include only the last 30 days
last_date = forecast_df['ds'].max()
start_date = last_date - datetime.timedelta(days=30)
last_30_days = forecast_df[(forecast_df['ds'] > start_date) & (forecast_df['ds'] <= last_date)]
# Plotting using Plotly
trace1 = go.Scatter(x=last_30_days['ds'], y=last_30_days['y'], mode='lines', name='Actual Views (Last 30 Days)')
trace2 = go.Scatter(x=forecasted_period['ds'], y=forecasted_period['yhat'], mode='lines',
name='Predicted Views (Next 30 Days)')
layout = go.Layout(title="YouTube Views: Last 30 Days and Forecast for Next 30 Days", xaxis_title="Date",
yaxis_title="Views")
fig = go.Figure(data=[trace1, trace2], layout=layout)
# Display the combined historical and forecast data in Streamlit using Plotly
st.plotly_chart(fig, use_container_width=True)
########################################################################################################################
# WORD CLOUD & LIKE TO VIEW RATIO
########################################################################################################################
col1, col2 = st.columns(2)
with col1:
st.divider()
with st.spinner("Generating Word Cloud..."):
st.subheader("Most Common Tags")
# Extracting tags from DataFrame and creating a single string
all_tags = " ".join(" ".join(tags) for tags in filtered_data['tags'])
# Generating the word cloud
wordcloud = WordCloud(width=800, height=400, background_color='black').generate(all_tags)
# Plotting the word cloud using matplotlib
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.tight_layout(pad=0)
# Saving the figure to a bytes buffer
buf = io.BytesIO()
plt.savefig(buf, format="png", bbox_inches='tight', pad_inches=0)
buf.seek(0)
st.image(buf, use_column_width=True)
with col2:
# Calculating the Like-to-View Ratio
filtered_data['like_to_view_ratio'] = filtered_data['like_count'] / filtered_data['view_count']
# Extracting the like-to-view ratio and published dates from the dataframe
like_to_view_ratio = filtered_data['like_to_view_ratio']
st.divider()
st.subheader("Like-to-View Ratio Over Time")
# Creating a time series plot for Like-to-View Ratio using Plotly
fig_ratio = go.Figure()
fig_ratio.add_trace(go.Scatter(x=dates, y=like_to_view_ratio, mode='lines+markers', name='Like-to-View Ratio',
line=dict(color='green')))
fig_ratio.update_layout(xaxis_title='Published Date',
yaxis_title='Like-to-View Ratio',
template="plotly_dark")
# Display the plot in Streamlit
st.plotly_chart(fig_ratio, use_container_width=True)
########################################################################################################################
# DETAILED VIDEO STATS SELECTION SECTION
########################################################################################################################
st.divider()
st.subheader("Detailed Video Statistics Video Selection")
st.write("Click on view statistics to get detailed information related to the selected video")
# latest 10 videos
display_video_list(videos, 0, 10)