leet.py

from bs4 import BeautifulSoup
import re
import time
import traceback
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import datetime
from openpyxl import Workbook
from openpyxl.styles import Font
import pandas as pd

# Global lists to store problem data
questionNameList = []
questionUrlList = []
questionDifficultyList = []
questionStatusList = []

def openBrowser(url):
    print("-----------> Opening Browser")
    options = webdriver.ChromeOptions()
    options.add_argument('--ignore-certificate-errors')
    options.add_experimental_option('excludeSwitches', ['enable-logging'])
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    driver.get(url)
    driver.maximize_window()
    return driver

def closeBrowser(driver):
    print("-----------> Closing Browser")
    driver.quit()

def extract_data_from_container(container):
    name_div = container.select_one("div.truncate")
    problemName = name_div.get_text(strip=True) if name_div else ""
    
    if not problemName:
        link_tag = container.find("a", href=re.compile(r"^/problems/"))
        problemName = link_tag.get_text(strip=True) if link_tag else "Unknown Problem"
    
    link_tag = container.select_one("a[href^='/problems/']")
    if link_tag and link_tag.has_attr("href"):
        href = link_tag['href']
        if 'editorial' in href:
            href = href.replace("editorial", "description")
        problemUrl = "https://leetcode.com" + href
    else:
        problemUrl = "N/A"
    
    p_tag = container.find("p", text=re.compile("(Easy|Medium|Hard)", re.IGNORECASE))
    if p_tag:
        difficulty = p_tag.get_text(strip=True)
    else:
        parent_text = container.get_text(" ", strip=True)
        match = re.search(r"(Easy|Medium|Hard)", parent_text, re.IGNORECASE)
        difficulty = match.group(1) if match else "Unknown"

    svg_status = container.find("svg", class_=lambda c: c and ("text-lc-icon-tertiary" in c or "text-lc-green-60" in c))
    status = ""
    if svg_status:
        classes = svg_status.get("class", [])
        if "text-lc-green-60" in classes or "dark:text-dark-lc-green-60" in classes:
            status = "Done"
        elif "text-lc-icon-tertiary" in classes or "dark:text-dark-lc-icon-tertiary" in classes:
            status = "Not Done"
    
    return problemName, problemUrl, difficulty, status

def fetchPageData(pageUrl):
    browser = openBrowser(pageUrl)
    time.sleep(3)
    
    try:
        WebDriverWait(browser, 10).until(EC.title_contains("LeetCode"))
    except Exception as e:
        print("Title wait exception:", str(e))
    
    pageSource = browser.page_source
    soup = BeautifulSoup(pageSource, 'html.parser')
    
    containers = soup.select("div.relative.flex.h-full.w-full.items-center")
    if containers:
        print(f"Found {len(containers)} problem containers")
        for container in containers:
            name, url, diff, status = extract_data_from_container(container)
            if url != "N/A":
                questionNameList.append(name)
                questionUrlList.append(url)
                questionDifficultyList.append(diff)
                questionStatusList.append(status)
    else:
        print("Using fallback scraping method")
        anchors = soup.find_all('a', href=re.compile(r"^/problems/"))
        for link in anchors:
            problemName = link.get_text(strip=True) or link.get("title", "Unknown Problem").strip()
            href = link['href']
            if 'editorial' in href:
                href = href.replace("editorial", "description")
            problemUrl = "https://leetcode.com" + href
            parent_text = link.parent.get_text(" ", strip=True)
            match = re.search(r"(Easy|Medium|Hard)", parent_text, re.IGNORECASE)
            difficulty = match.group(1) if match else "Unknown"
            
            svg_status = link.parent.find("svg", class_=lambda c: c and ("text-lc-icon-tertiary" in c or "text-lc-green-60" in c))
            status = "Done" if svg_status and ("text-lc-green-60" in svg_status.get("class", []) or "dark:text-dark-lc-green-60" in svg_status.get("class", [])) else ""
            
            if problemUrl != "N/A":
                questionNameList.append(problemName)
                questionUrlList.append(problemUrl)
                questionDifficultyList.append(difficulty)
                questionStatusList.append(status)
    
    closeBrowser(browser)

def xcelSheet():
    excelFileName = 'LeetCode.xlsx'
    sheetName = 'Problem List'
    
    df = pd.DataFrame({
        'Question Name': questionNameList,
        'Question URL': questionUrlList,
        'Difficulty': questionDifficultyList,
        'Status': questionStatusList
    })
    
    wb = Workbook()
    sheet = wb.active
    sheet.title = sheetName
    
    headers = ['Question Name', 'Question URL', 'Difficulty', 'Status']
    sheet.append(headers)
    
    for idx, row in df.iterrows():
        sheet.cell(row=idx+2, column=1, value=row['Question Name'])
        sheet.cell(row=idx+2, column=2, value=row['Question URL']).hyperlink = row['Question URL']
        sheet.cell(row=idx+2, column=3, value=row['Difficulty'])
        sheet.cell(row=idx+2, column=4, value=row['Status'])
    
    for column in sheet.columns:
        sheet.column_dimensions[column[0].column_letter].width = 30
    
    wb.save(excelFileName)
    print(f"Excel sheet '{excelFileName}' created successfully!")

def fetchProfileData(profile_url):
    print("-----------> Fetching Profile Data")
    browser = openBrowser(profile_url)
    time.sleep(3)
    
    try:
        WebDriverWait(browser, 10).until(EC.title_contains("LeetCode"))
    except Exception as e:
        print("Title wait exception:", str(e))
    
    pageSource = browser.page_source
    soup = BeautifulSoup(pageSource, 'html.parser')
    
    profile_data = {}

    # Extract basic profile info
    rank_label = soup.find("span", string="Rank")
    if rank_label:
        profile_data["Rank"] = rank_label.find_next_sibling("span").get_text(strip=True)
    
    for difficulty in ["Easy", "Medium", "Hard"]:
        label = soup.find("div", string=difficulty)
        if label:
            profile_data[f"{difficulty} Solved"] = label.find_next_sibling("div").get_text(strip=True)
    
    # Extract streak information
    streak_container = soup.find("div", class_="lc-md:flex hidden h-auto w-full flex-1 items-center justify-center")
    if streak_container:
        svg = streak_container.find("svg")
        if svg:
            # Process streak graph
            week_groups = svg.find_all("g", class_=re.compile(r"\bweek\b"))
            week_groups = sorted(week_groups, key=lambda g: float(g.get("x", 0)))
            
            # Build matrix and month labels
            month_labels = []
            prev_month = None
            month_positions = {}
            
            for week_idx, week in enumerate(week_groups):
                rect = week.find("rect", attrs={"data-date": True})
                if rect:
                    date_str = rect["data-date"]
                    try:
                        dt = datetime.datetime.strptime(date_str, "%Y-%m-%d")
                        current_month = dt.month
                        if current_month != prev_month:
                            month_abbr = dt.strftime("%b")
                            month_positions[week_idx] = month_abbr
                            prev_month = current_month
                    except:
                        pass
            
            # Create month label row
            month_label_row = ["   "] * len(week_groups)
            for pos, label in month_positions.items():
                if pos < len(month_label_row):
                    month_label_row[pos] = label
            
            # Print streak graph with labels
            print("\nStreak Calendar:")
            max_days = max(len(week.find_all("rect")) for week in week_groups)
            for day_idx in range(max_days):
                row = []
                for week in week_groups:
                    rects = week.find_all("rect")
                    if day_idx < len(rects):
                        fill = rects[day_idx].get("fill", "")
                        row.append("+" if "green" in fill else ".")
                    else:
                        row.append(" ")
                print(" ".join(row))
            
            print("\nMonth Labels:")
            print(" ".join(month_label_row))
    
    # Print profile summary
    print("\nProfile Summary:")
    for key, value in profile_data.items():
        print(f"{key:>20}: {value}")
    
    closeBrowser(browser)
    return profile_data

def getData():
    try:
        while True:
            print("\n" + "="*40)
            print("LeetCode Data Scraper")
            print("1. Fetch Problems List")
            print("2. Fetch Profile Data")
            print("3. Both (Problems + Profile)")
            print("4. Exit")
            choice = input("Enter your choice (1-4): ").strip()

            if choice == '1':
                url = input("Enter problems URL (press Enter for default): ").strip()
                if not url:
                    url = "https://leetcode.com/studyplan/top-interview-150/"
                
                pages = 1
                try:
                    pages = int(input("Enter number of pages to scrape (default 1): ") or "1")
                except ValueError:
                    pass
                
                del questionNameList[:]
                del questionUrlList[:]
                del questionDifficultyList[:]
                del questionStatusList[:]
                
                for page in range(1, pages+1):
                    print(f"\nScraping page {page}...")
                    fetchPageData(f"{url.rstrip('/')}/?page={page}")
                
                xcelSheet()
                print(f"\nSuccessfully saved {len(questionNameList)} problems to Excel!")

            elif choice == '2':
                profile_url = input("Enter your LeetCode profile URL: ").strip()
                if profile_url:
                    fetchProfileData(profile_url)
                else:
                    print("Invalid profile URL")

            elif choice == '3':
                # Fetch problems
                url = input("Enter problems URL (press Enter for default): ").strip() or "https://leetcode.com/studyplan/top-interview-150/"
                pages = int(input("Enter number of pages (default 1): ") or "1")
                
                for page in range(1, pages+1):
                    print(f"\nScraping page {page}...")
                    fetchPageData(f"{url.rstrip('/')}/?page={page}")
                
                xcelSheet()
                
                # Fetch profile
                profile_url = input("\nEnter your LeetCode profile URL: ").strip()
                if profile_url:
                    fetchProfileData(profile_url)

            elif choice == '4':
                print("Exiting program...")
                break

            else:
                print("Invalid choice. Please enter 1-4.")

    except Exception as e:
        print("Error occurred:", str(e))
        traceback.print_exc()

if __name__ == "__main__":
    getData()