Home.py

import datetime

import streamlit as st
import io
import plotly.express as px
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from streamlit_extras.metric_cards import style_metric_cards
from streamlit_extras.chart_container import chart_container
from streamlit_extras.switch_page_button import switch_page
from streamlit_extras.app_logo import add_logo

from prophet import Prophet

from channelDataExtraction import getChannelData
from channelVideoDataExtraction import *


########################################################################################################################
#                                               FUNCTIONS
########################################################################################################################
@st.cache_data
def download_data(api_key, channel_id):
    channel_details = getChannelData(api_key, channel_id)

    # check if bad channel id
    if channel_details is None:
        return None, None, None, None

    videos = getVideoList(api_key, channel_details["uploads"])
    videos_df = pd.DataFrame(videos)
    video_ids = [video['id'] for video in videos if video['id'] is not None]
    all_video_data = buildVideoListDataframe(api_key, video_ids)

    st.session_state.start_index = 0
    st.session_state.end_index = 10
    st.session_state['video_id'] = None
    st.session_state.all_video_df = all_video_data

    st.session_state.api_key = st.session_state.API_KEY

    return channel_details, videos, all_video_data, videos_df


def display_video_list(video_data, start_index, end_index, search_query=None):
    """Displays a list of videos in a tabular format with custom column order and buttons."""

    # Input widget for searching videos by title
    if search_query is None:
        search_query = ""
    new_search_query = st.text_input("Search Videos by Title", search_query)

    # Initialize start_index and end_index in session_state
    if 'start_index' not in st.session_state:
        st.session_state.start_index = start_index
    if 'end_index' not in st.session_state:
        st.session_state.end_index = end_index

    # If a new search query is entered, reset the start and end indices
    if new_search_query != search_query:
        st.session_state.start_index = start_index
        st.session_state.end_index = end_index

    # Filter videos based on the search query across the entire video_data list
    filtered_videos = [video for video in video_data if new_search_query.lower() in video['title'].lower()]

    # Paginate the filtered results
    paginated_videos = filtered_videos[st.session_state.start_index:st.session_state.end_index]

    for video in paginated_videos:
        col1, col2, col3, col4 = st.columns(4)
        with col1:
            st.image(video['thumbnail'])
        with col2:
            st.write(video['id'])
        with col3:
            st.write(video['title'])
        with col4:
            video_stats = st.button("Check Video Statistics", key=video['id'])
            if video_stats:
                st.session_state['video_id'] = video['id']
                switch_page("video_data")

    # Display a button to load the next 10 search results
    if st.session_state.end_index < len(filtered_videos):
        if st.button('Load next 10 videos', key='load_next'):
            st.session_state.start_index = st.session_state.end_index
            st.session_state.end_index += 10


########################################################################################################################
#                                       MAIN PAGE CONFIGURATION
########################################################################################################################
st.set_page_config(page_title="Youtube Channel Analytics Dashboard",
                   page_icon="📊",
                   layout="wide")

########################################################################################################################
#                                       SIDE BAR CONFIGURATION
########################################################################################################################
st.title("YouTube Analytics Dashboard")

# Sidebar
st.sidebar.title("Settings")

# Sidebar: Enter Channel ID and YouTube API Key
if 'API_KEY' not in st.session_state:
    st.session_state.API_KEY = ""
if 'CHANNEL_ID' not in st.session_state:
    st.session_state.CHANNEL_ID = ""

st.session_state.API_KEY = st.sidebar.text_input("Enter your YouTube API Key", st.session_state.API_KEY,
                                                 type="password")
st.session_state.CHANNEL_ID = st.sidebar.text_input("Enter the YouTube Channel ID", st.session_state.CHANNEL_ID)

if not st.session_state.API_KEY or not st.session_state.CHANNEL_ID:
    st.warning("Please enter your API Key and Channel ID.")
    # Display the GitHub link for the user manual
    user_manual_link = "https://github.com/zainmz/Youtube-Channel-Analytics-Dashboard"
    st.markdown(f"If you need help, please refer to the the GitHub Repository for the [User Manual]({user_manual_link}).")
    st.stop()

# Data Refresh Button
refresh_button = st.sidebar.button("Refresh Data")

# First Data Load
channel_details, videos, all_video_data, videos_df = download_data(st.session_state.API_KEY, st.session_state.CHANNEL_ID)

if channel_details is None:
    st.warning("Invalid YouTube Channel ID. Please check and enter a valid Channel ID.")
    st.stop()

if refresh_button:
    with st.spinner("Refreshing data..."):
        channel_details, videos, all_video_data, videos_df = download_data(st.session_state.API_KEY, st.session_state.CHANNEL_ID)

        if channel_details is None:
            st.warning("Invalid YouTube Channel ID. Please check and enter a valid Channel ID.")
            st.stop()

# Data Filters for fine-tuned data selection
st.sidebar.title("Data Filters")

num_videos = st.sidebar.slider("Select Number of Top Videos to Display:", 1, 50, 10)

# Convert the 'published_date' column to datetime format
all_video_data['published_date'] = pd.to_datetime(all_video_data['published_date'])

# Extract min and max publish dates
min_date = all_video_data['published_date'].min().date()  # Ensure it's a date object
max_date = all_video_data['published_date'].max().date()  # Ensure it's a date object

# Sidebar date input
start_date = st.sidebar.date_input("Select Start Date", min_date)
end_date = st.sidebar.date_input("Select End Date", max_date)

if start_date > end_date:
    st.sidebar.warning("Start date should be earlier than end date.")
    st.stop()

tag_search = st.sidebar.text_input("Search Videos by Tag")

date_range_start = pd.Timestamp(start_date)
date_range_end = pd.Timestamp(end_date)

filtered_data = all_video_data[(all_video_data['published_date'] >= date_range_start) &
                               (all_video_data['published_date'] <= date_range_end)]

if tag_search:
    filtered_data = filtered_data[filtered_data['tags'].apply(lambda x: tag_search in x)]

########################################################################################################################
#                                       CHANNEL DETAILS AREA CONFIGURATION
########################################################################################################################

# Display channel details
st.header("Channel Details", divider="green")

col1, col2, col3 = st.columns(3)

with col1:
    channel_thumbnail = channel_details['thumbnail']

    add_logo(channel_thumbnail, height=300)

    view_count = int(channel_details['viewCount'])
    subscriber_count = int(channel_details['subscriberCount'])

    # Format view count and subscriber count with commas
    view_count_formatted = "{:,}".format(view_count)
    subscriber_count_formatted = "{:,}".format(subscriber_count)

    st.markdown(f"**Channel Title:** {channel_details['title']}")
    st.markdown(f"**Channel Description:** {channel_details['description']}")

with col3:
    # Go to Channel Button
    st.link_button("Go to Channel", f"https://www.youtube.com/channel/{st.session_state.CHANNEL_ID}")

col1, col2, col3 = st.columns(3)
col1.metric("Total Views", view_count_formatted, "")
col2.metric("Subscribers", subscriber_count_formatted, "")
col3.metric("Total Videos", len(videos), "")
style_metric_cards(background_color="#000000",
                   border_left_color="#049204",
                   border_color="#0E0E0E"
                   )

########################################################################################################################
#                                            TOP  VIDEO GRAPHS AREA
########################################################################################################################

col1, col2, col3 = st.columns(3)
# Display statistical graphs for the top videos based on views
with col1:
    st.subheader(f"Top {num_videos} Videos Based on Views")
    sorted_video_data = filtered_data.sort_values(by='view_count', ascending=False)
    # Get the top videos from the sorted DataFrame
    top_views_df = sorted_video_data.head(num_videos)
    with chart_container(top_views_df):
        # Display statistical graphs for the top videos based on views
        # Create a bar chart using Plotly
        fig = px.bar(top_views_df, x='title', y='view_count')
        # Update the layout to rename the axes
        fig.update_layout(xaxis_title="Video Title",
                          yaxis_title="View Count")
        fig.update_traces(marker_color='green')
        # Display the bar chart in Streamlit
        st.plotly_chart(fig, use_container_width=True)

with col2:
    st.subheader(f"Top {num_videos} Videos Based on Likes")
    sorted_video_data = filtered_data.sort_values(by='like_count', ascending=False)
    # Get the top 10 liked videos from the sorted DataFrame
    top_likes_df = sorted_video_data.head(num_videos)

    with chart_container(top_likes_df):
        # Display statistical graphs for the top 10 videos based on views
        # Create a bar chart using Plotly
        fig = px.bar(top_likes_df, x='title', y='like_count')
        # Update the layout to rename the axes
        fig.update_layout(xaxis_title="Video Title",
                          yaxis_title="Like Count")
        fig.update_traces(marker_color='orange')
        # Display the bar chart in Streamlit
        st.plotly_chart(fig, use_container_width=True)

with col3:
    st.subheader(f"Top {num_videos} Based on Comments")
    sorted_video_data = filtered_data.sort_values(by='comment_count', ascending=False)
    # Get the top 10 liked videos from the sorted DataFrame
    top_comments_df = sorted_video_data.head(num_videos)
    with chart_container(top_comments_df):
        # Display statistical graphs for the top 10 videos based on views
        # Create a bar chart using Plotly
        fig = px.bar(top_comments_df, x='title', y='comment_count')
        # Update the layout to rename the axes
        fig.update_layout(xaxis_title="Video Title",
                          yaxis_title="Comment Count")
        fig.update_traces(marker_color='green')
        # Display the bar chart in Streamlit
        st.plotly_chart(fig, use_container_width=True)

########################################################################################################################
#                                            CHANNEL GROWTH STATS
########################################################################################################################

st.subheader("Viewership Growth Over Time", divider="green")
views = filtered_data['view_count']
dates = filtered_data['published_date']

# Creating a time series plot using Plotly
fig = go.Figure()

fig.add_trace(
    go.Scatter(x=dates, y=views, mode='lines+markers', name='Views Over Time', line=dict(color='orange'))
)

fig.update_layout(title='Views Over Time',
                  xaxis_title='Published Date',
                  yaxis_title='Number of Views',
                  template="plotly_dark")

st.plotly_chart(fig, use_container_width=True)

st.subheader("Predicted Viewership Growth Over Time", divider="green")

with st.spinner("Predicting Views for the next Week"):
    # Prepare dataframe for Prophet
    forecast_df = all_video_data[['published_date', 'view_count']]
    forecast_df.columns = ['ds', 'y']

    # Initialize the Prophet model
    model = Prophet(
        yearly_seasonality=False,
        weekly_seasonality=True,
        daily_seasonality=True,
        seasonality_mode='additive')

    # Fit the model with the data
    model.fit(forecast_df)

    # Dataframe for future dates
    future_dates = model.make_future_dataframe(periods=30)

    # Predict views for the future dates
    forecast = model.predict(future_dates)
    # Plot the original data and the forecast

    # Plotting using Plotly
    # Filter the forecast dataframe to include only the forecasted period
    forecasted_period = forecast[forecast['ds'] > forecast_df['ds'].max()]

    # Plotting using Plotly
    # Filter the forecast dataframe to include only the forecasted period
    forecasted_period = forecast[forecast['ds'] > forecast_df['ds'].max()]

    # Filter the original dataframe to include only the last 30 days
    last_date = forecast_df['ds'].max()
    start_date = last_date - datetime.timedelta(days=30)
    last_30_days = forecast_df[(forecast_df['ds'] > start_date) & (forecast_df['ds'] <= last_date)]

    # Plotting using Plotly
    trace1 = go.Scatter(x=last_30_days['ds'], y=last_30_days['y'], mode='lines', name='Actual Views (Last 30 Days)')
    trace2 = go.Scatter(x=forecasted_period['ds'], y=forecasted_period['yhat'], mode='lines',
                        name='Predicted Views (Next 30 Days)')
    layout = go.Layout(title="YouTube Views: Last 30 Days and Forecast for Next 30 Days", xaxis_title="Date",
                       yaxis_title="Views")
    fig = go.Figure(data=[trace1, trace2], layout=layout)

    # Display the combined historical and forecast data in Streamlit using Plotly
    st.plotly_chart(fig, use_container_width=True)
########################################################################################################################
#                                         WORD CLOUD & LIKE TO VIEW RATIO
########################################################################################################################

col1, col2 = st.columns(2)

with col1:
    st.divider()
    with st.spinner("Generating Word Cloud..."):
        st.subheader("Most Common Tags")
        # Extracting tags from DataFrame and creating a single string
        all_tags = " ".join(" ".join(tags) for tags in filtered_data['tags'])

        # Generating the word cloud
        wordcloud = WordCloud(width=800, height=400, background_color='black').generate(all_tags)

        # Plotting the word cloud using matplotlib
        plt.figure(figsize=(10, 5))
        plt.imshow(wordcloud, interpolation='bilinear')
        plt.axis('off')
        plt.tight_layout(pad=0)

        # Saving the figure to a bytes buffer
        buf = io.BytesIO()
        plt.savefig(buf, format="png", bbox_inches='tight', pad_inches=0)
        buf.seek(0)

        st.image(buf, use_column_width=True)

with col2:
    # Calculating the Like-to-View Ratio
    filtered_data['like_to_view_ratio'] = filtered_data['like_count'] / filtered_data['view_count']

    # Extracting the like-to-view ratio and published dates from the dataframe
    like_to_view_ratio = filtered_data['like_to_view_ratio']

    st.divider()
    st.subheader("Like-to-View Ratio Over Time")

    # Creating a time series plot for Like-to-View Ratio using Plotly
    fig_ratio = go.Figure()

    fig_ratio.add_trace(go.Scatter(x=dates, y=like_to_view_ratio, mode='lines+markers', name='Like-to-View Ratio',
                                   line=dict(color='green')))

    fig_ratio.update_layout(xaxis_title='Published Date',
                            yaxis_title='Like-to-View Ratio',
                            template="plotly_dark")

    # Display the plot in Streamlit
    st.plotly_chart(fig_ratio, use_container_width=True)

########################################################################################################################
#                                         DETAILED VIDEO STATS SELECTION SECTION
########################################################################################################################

st.divider()
st.subheader("Detailed Video Statistics Video Selection")
st.write("Click on view statistics to get detailed information related to the selected video")
# latest 10 videos
display_video_list(videos, 0, 10)