Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
bfaee40
Week 1- data collection
Vaibhav5012 Feb 26, 2026
7c56a8a
Week 2-data cleaning
Vaibhav5012 Feb 26, 2026
655c824
Cleaned and merged dataset
Vaibhav5012 Feb 26, 2026
a15d49d
Week 2-Data Cleaning & Merging multiple Datasets
Vaibhav5012 Feb 26, 2026
95e2bf0
Week 3-Feature Engineering
Vaibhav5012 Feb 26, 2026
6796cc2
Cleaned Injury Dataset
Vaibhav5012 Feb 27, 2026
48e19e3
Cleaned StatsBomb Dataset
Vaibhav5012 Feb 27, 2026
685245e
Cleaned Transfermarkt Dataset
Vaibhav5012 Feb 27, 2026
28b2e86
Player Sentiment Analysis
Vaibhav5012 Feb 27, 2026
0661156
Week 1-Data Collection
Vaibhav5012 Feb 27, 2026
296dd99
Week 3-4 Dataset Report
Vaibhav5012 Mar 5, 2026
adaf242
Dataset exploration app
Vaibhav5012 Mar 5, 2026
b9a48d1
Week 3-4 Final Dataset for training
Vaibhav5012 Mar 5, 2026
0ecab95
Week 3-4 Tasks
Vaibhav5012 Mar 6, 2026
80027a0
Week 4- LSTM Model Training
Vaibhav5012 Mar 12, 2026
2b1732c
Week 5-LSTM Models Training
Vaibhav5012 Mar 12, 2026
24c48a3
Delete LSTM Model Training/files directory
Vaibhav5012 Mar 12, 2026
7652ad2
Week 5- LSTM Model Training
Vaibhav5012 Mar 12, 2026
74a2b4f
Week 5- LSTM Model Training
Vaibhav5012 Mar 12, 2026
b91a56e
Delete LSTM Model Training/CSVs directory
Vaibhav5012 Mar 12, 2026
50d0519
Delete LSTM Model Training/Visualizations directory
Vaibhav5012 Mar 12, 2026
add97c1
Week 5- LSTM Model Training
Vaibhav5012 Mar 12, 2026
8b36543
Week 5- LSTM Model Training
Vaibhav5012 Mar 12, 2026
9345add
Week 6 Tasks
Vaibhav5012 Mar 20, 2026
49ecd95
Week 7 Tasks
Vaibhav5012 Mar 30, 2026
ef6e1c8
Week 7 Tasks
Vaibhav5012 Mar 30, 2026
7ac9a26
Enhance README with detailed project information
Vaibhav5012 Mar 30, 2026
e2d9997
dependencies
Vaibhav5012 Mar 30, 2026
bb0326b
Revise README for better clarity and organization
Vaibhav5012 Mar 30, 2026
e13024f
Update requirements.txt to modify dependencies
Vaibhav5012 Mar 30, 2026
7b77cda
Revise requirements.txt with specific package versions
Vaibhav5012 Mar 30, 2026
5a7879f
Week 7 Tasks
Vaibhav5012 Mar 30, 2026
0930285
Week 7 Tasks
Vaibhav5012 Mar 30, 2026
72d9c19
Week 7 Tasks
Vaibhav5012 Mar 30, 2026
515af09
dependencies
Vaibhav5012 Mar 30, 2026
af58f84
Week 7 Tasks
Vaibhav5012 Mar 30, 2026
cd561e7
Add live website link to README
Vaibhav5012 Mar 30, 2026
497bcd6
Week 5 Task
Vaibhav5012 Mar 31, 2026
ea1e422
Week 6 Task
Vaibhav5012 Mar 31, 2026
cf5f07b
Week 6 Task
Vaibhav5012 Mar 31, 2026
22ba566
Week 8
Vaibhav5012 Apr 2, 2026
2585fc5
Delete Documentation/Transfer-IQ ppt.pptx
Vaibhav5012 Apr 6, 2026
9721525
Week 8 Tasks
Vaibhav5012 Apr 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
275 changes: 275 additions & 0 deletions Advanced Feature Engineering and Sentiment Analysis/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# ----------------------------
# Load Dataset
# ----------------------------

@st.cache_data
def load_data():
return pd.read_csv("Football player valuation analyzer\player_transfer_value_with_sentiment.csv")

df = load_data()

st.title("⚽ TransferIQ Player Intelligence Dashboard")

# ----------------------------
# Player Selection
# ----------------------------

players = df["player_name"].unique()

player_name = st.selectbox("Select Player", players)

player_df = df[df["player_name"] == player_name]
latest = player_df.sort_values("season").iloc[-1]

# ----------------------------
# Player Overview
# ----------------------------

st.header("Player Profile")

col1, col2, col3 = st.columns(3)

col1.metric("Age", latest["current_age"])
col2.metric("Position", latest["position"])
col3.metric("Team", latest["team"])

col1.metric("Market Value (€)", int(latest["market_value_eur"]))
col2.metric("Career Stage", latest["career_stage"])
col3.metric("Availability Rate", round(latest["availability_rate"],2))

# ----------------------------
# Market Value Trend
# ----------------------------

st.header("Market Value Trend")

fig = px.line(
player_df,
x="season",
y="market_value_eur",
markers=True,
title="Market Value Over Time"
)

st.plotly_chart(fig, use_container_width=True)

# ----------------------------
# Performance Stats
# ----------------------------

st.header("Performance Statistics")

perf_cols = [
"goals",
"assists",
"shots",
"passes_total",
"passes_complete",
"tackles_total",
"interceptions",
"dribbles"
]

fig = px.bar(
x=perf_cols,
y=[latest[col] for col in perf_cols],
labels={"x":"Metric","y":"Value"},
title="Performance Metrics"
)

st.plotly_chart(fig)

# ----------------------------
# Per 90 Metrics
# ----------------------------

st.header("Per 90 Performance")

per90_cols = [
"goals_per90",
"assists_per90",
"shots_per90",
"goal_contributions_per90",
"defensive_actions_per90",
"dribbles_per90"
]

fig = px.bar(
x=per90_cols,
y=[latest[c] for c in per90_cols],
title="Per 90 Contributions"
)

st.plotly_chart(fig)

# ----------------------------
# Passing Analysis
# ----------------------------

st.header("Passing Analysis")

fig = px.pie(
values=[
latest["passes_complete"],
latest["passes_total"] - latest["passes_complete"]
],
names=["Completed","Failed"],
title="Pass Completion"
)

st.plotly_chart(fig)

# ----------------------------
# Defensive Metrics
# ----------------------------

st.header("Defensive Metrics")

defensive_cols = [
"tackles_total",
"tackles_won",
"interceptions",
"fouls_committed"
]

fig = px.bar(
x=defensive_cols,
y=[latest[c] for c in defensive_cols],
title="Defensive Contribution"
)

st.plotly_chart(fig)

# ----------------------------
# Injury Analysis
# ----------------------------

st.header("Injury History")

injury_cols = [
"total_injuries",
"total_days_injured",
"total_matches_missed"
]

fig = px.bar(
x=injury_cols,
y=[latest[c] for c in injury_cols],
title="Injury Impact"
)

st.plotly_chart(fig)

st.write("Most Common Injury:", latest["most_common_injury"])

# ----------------------------
# Social Media Sentiment
# ----------------------------

st.header("Social Sentiment Analysis")

sentiment_cols = [
"positive_count",
"negative_count",
"neutral_count"
]

fig = px.pie(
values=[latest[c] for c in sentiment_cols],
names=["Positive","Negative","Neutral"],
title="Fan Sentiment"
)

st.plotly_chart(fig)

# ----------------------------
# Sentiment Scores
# ----------------------------

st.subheader("Sentiment Scores")

fig = px.bar(
x=[
"VADER Positive",
"VADER Negative",
"VADER Compound",
"TextBlob Polarity",
"TextBlob Subjectivity"
],
y=[
latest["vader_positive_score"],
latest["vader_negative_score"],
latest["vader_compound_score"],
latest["tb_polarity"],
latest["tb_subjectivity"]
],
)

st.plotly_chart(fig)

# ----------------------------
# Transfer Value Intelligence
# ----------------------------

st.header("Transfer Market Intelligence")

market_cols = [
"social_buzz_score",
"transfer_attractiveness_score",
"tweet_engagement_rate",
]

fig = px.bar(
x=market_cols,
y=[latest[c] for c in market_cols],
title="Market Influence Indicators"
)

st.plotly_chart(fig)

# ----------------------------
# Radar Chart (Overall Player Profile)
# ----------------------------

st.header("Player Radar")

radar_features = [
"goals_per90",
"assists_per90",
"shots_per90",
"defensive_actions_per90",
"pass_accuracy_pct",
"dribbles_per90"
]

values = [latest[f] for f in radar_features]

fig = go.Figure()

fig.add_trace(go.Scatterpolar(
r=values,
theta=radar_features,
fill="toself",
name=player_name
))

fig.update_layout(
polar=dict(radialaxis=dict(visible=True)),
showlegend=False
)

st.plotly_chart(fig)

# ----------------------------
# Raw Data Table
# ----------------------------

st.header("Raw Player Data")


st.dataframe(player_df)
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import pandas as pd
import numpy as np

# Load your longitudinal dataset
df = pd.read_csv('merged_football_dataset.csv')

# --- FEATURE ENGINEERING FOR MODELING ---

# 1. Target Variables (What the model might predict)
# Year-Over-Year Change in Market Value
df['market_value_yoy_change'] = df.groupby('player_name')['market_value_eur'].diff().fillna(0)

# Percentage Change in Market Value (with epsilon to prevent division by zero)
epsilon = 1e-5
df['market_value_yoy_pct_change'] = (
df['market_value_yoy_change'] /
(df['market_value_eur'] - df['market_value_yoy_change'] + epsilon)
).fillna(0).clip(lower=-1.0, upper=5.0) # Clip extreme percentages for stability

# 2. Performance Metrics (Standardized to per 90 minutes)
# Attacking Metric: Goal Involvement per 90 mins
df['goal_involvement_per_90'] = (
(df['goals_total'] + df['assists_total']) / (df['minutes_played_season'] / 90)
).fillna(0)

# Replace 'inf' values that happen if a player played 0 minutes
df.replace([np.inf, -np.inf], 0, inplace=True)

# Defensive Metric: Defensive Actions per 90 mins
df['defensive_actions_per_90'] = (
(df['sb_tackles'] + df['sb_interceptions']) / (df['minutes_played_season'] / 90)
).fillna(0)

# 3. Injury & Availability Metrics
# Availability Index: 0 to 1 scale (1 means available 100% of the year)
df['availability_index'] = (1 - (df['season_days_injured'] / 365)).clip(0, 1)

# 4. Sentiment Metrics
# Composite Sentiment Score (Average of Fan and Media)
df['overall_sentiment'] = (df['fan_sentiment'] + df['media_sentiment']) / 2

# Sentiment Trend (Is the player's reputation currently improving or worsening?)
df['sentiment_yoy_change'] = df.groupby('player_name')['overall_sentiment'].diff().fillna(0)

# --- FINAL SELECTION ---
# Extract only the relevant features for the final modeling dataset
final_cols = [
'player_name', 'season_year', 'age', 'position',
'market_value_eur', 'market_value_yoy_change', 'market_value_yoy_pct_change',
'minutes_played_season', 'goal_involvement_per_90', 'defensive_actions_per_90', 'sb_pass_accuracy',
'season_days_injured', 'injury_risk_score', 'availability_index',
'fan_sentiment', 'media_sentiment', 'overall_sentiment', 'sentiment_yoy_change'
]

modeling_df = df[final_cols]

# Save the final engineered features
# modeling_df.to_csv('final_modeling_features.csv', index=False)
Loading