Tezos-India · iamamanporwal · Aug 21, 2023 · Aug 21, 2023 · Aug 21, 2023 · Aug 21, 2023
diff --git a/GreenDay/README.md b/GreenDay/README.md
@@ -0,0 +1,34 @@
+- Team Name: GreenDay
+- Project name: Resume Analyzer ai
+- Project's Description : The Resume Ranking and Extraction System is a web-based application designed to streamline the process of evaluating and ranking job applicants' resumes based on their similarity to a provided job description. The system also extracts essential information, such as names and emails, from the resumes to facilitate efficient candidate identification.
+- What problem you are trying to solve: HR's dont have to manually go through resumes files based on jobs
+- Tech Stack used while building the project: Python, NLP, Scikit-learn, Flask, html/css
+- Project Demo Photos, Videos: https://www.youtube.com/watch?v=0eo_5oyW11o&t=1s
+- Your Deployed Smart Contract's Link: NA
+- Folder link to project codebase on Tezos-India/TezAsia-2k23 Repository * 
+- Github repository link: https://github.com/iamamanporwal/TezAsia-2k23
+- Your PPT file  https://docs.google.com/presentation/d/1m6nUfaIcD1gQaR8sHhVywfDK09xFHWFI/edit?usp=sharing&ouid=113142930030400017248&rtpof=true&sd=true
+- Your Team members Info:  
+Members: 1. AMAN PORWAL
+Email: [email protected]
+College: Bharati Vidyapeeth College of engineering.
+Linkedin: https://www.linkedin.com/in/aman-porwal-5933b120b/
+
+
+# Resume Analyzer Web App :memo::computer:
+
+![GitHub](https://img.shields.io/github/license/iamamanporwal/resume-ranker)
+![GitHub last commit](https://img.shields.io/github/last-commit/iamamanporwal/resume-ranker)
+![Python Version](https://img.shields.io/badge/python-3.8%2B-blue)
+
+An interactive web application that analyzes resumes based on a job description using natural language processing techniques. https://www.youtube.com/watch?v=0eo_5oyW11o&t=1s
+
+## :rocket: Features
+
+- Upload job descriptions and resumes in PDF format.
+- Process resumes to extract names, emails, and text content.
+- Calculate the similarity between the job description and each resume.
+- Rank resumes based on similarity percentage.
+- Download the ranked resumes in a CSV file.
+
+The Resume Ranking and Extraction System is a web-based application designed to streamline the process of evaluating and ranking job applicants' resumes based on their similarity to a provided job description. The system also extracts essential information, such as names and emails, from the resumes to facilitate efficient candidate identification.
diff --git a/GreenDay/app.py b/GreenDay/app.py
@@ -0,0 +1,97 @@
+from flask import Flask, render_template, request
+import spacy
+import PyPDF2
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import re
+import csv
+import os
+
+app = Flask(__name__)
+
+# Load spaCy NER model
+nlp = spacy.load("en_core_web_sm")
+
+# Extract text from PDFs
+def extract_text_from_pdf(pdf_path):
+    with open(pdf_path, "rb") as pdf_file:
+        pdf_reader = PyPDF2.PdfReader(pdf_file)
+        text = ""
+        for page in pdf_reader.pages:
+            text += page.extract_text()
+        return text
+
+# Extract entities using spaCy NER
+def extract_entities(text):
+    emails = re.findall(r'\S+@\S+', text)
+    names = re.findall(r'^([A-Z][a-z]+)\s+([A-Z][a-z]+)', text)
+    if names:
+        names = [" ".join(names[0])]
+    return emails, names
+
+@app.route('/', methods=['GET', 'POST'])
+def index():
+    results = []
+    if request.method == 'POST':
+        job_description = request.form['job_description']
+        resume_files = request.files.getlist('resume_files')
+
+        # Create a directory for uploads if it doesn't exist
+        if not os.path.exists("uploads"):
+            os.makedirs("uploads")
+
+        # Process uploaded resumes
+        processed_resumes = []
+        for resume_file in resume_files:
+            # Save the uploaded file
+            resume_path = os.path.join("uploads", resume_file.filename)
+            resume_file.save(resume_path)
+
+            # Process the saved file
+            resume_text = extract_text_from_pdf(resume_path)
+            emails, names = extract_entities(resume_text)
+            processed_resumes.append((names, emails, resume_text))
+
+        # TF-IDF vectorizer
+        tfidf_vectorizer = TfidfVectorizer()
+        job_desc_vector = tfidf_vectorizer.fit_transform([job_description])
+
+        # Rank resumes based on similarity
+        ranked_resumes = []
+        for (names, emails, resume_text) in processed_resumes:
+            resume_vector = tfidf_vectorizer.transform([resume_text])
+            similarity = cosine_similarity(job_desc_vector, resume_vector)[0][0] * 100 
+            ranked_resumes.append((names, emails, similarity))
+
+        # Sort resumes by similarity score
+        ranked_resumes.sort(key=lambda x: x[2], reverse=True)
+
+        results = ranked_resumes
+
+    return render_template('index.html', results=results)
+
+from flask import send_file
+
+@app.route('/download_csv')
+def download_csv():
+    # Generate the CSV content
+    csv_content = "Rank,Name,Email,Similarity\n"
+    for rank, (names, emails, similarity) in enumerate(results, start=1):
+        name = names[0] if names else "N/A"
+        email = emails[0] if emails else "N/A"
+        csv_content += f"{rank},{name},{email},{similarity}\n"
+
+    # Create a temporary file to store the CSV content
+    csv_filename = "ranked_resumes.csv"
+    with open(csv_filename, "w") as csv_file:
+        csv_file.write(csv_content)
+
+    # Send the file for download
+
+    csv_full_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), csv_filename)
+    return send_file(csv_full_path, as_attachment=True, download_name="ranked_resumes.csv")
+
+
+
+if __name__ == '__main__':
+    app.run(debug=True)
diff --git a/GreenDay/ranked_resumes.csv b/GreenDay/ranked_resumes.csv
@@ -0,0 +1,4 @@
+Rank,Name,Email,Similarity
+1,Emily Williams,[email protected],0.791666667
+2,Alex Johnson,[email protected],0.455091027
+3,Jane Smith,[email protected],0.396296962
diff --git a/GreenDay/resume1.pdf b/GreenDay/resume1.pdf
diff --git a/GreenDay/resume2.pdf b/GreenDay/resume2.pdf
diff --git a/GreenDay/resume3.pdf b/GreenDay/resume3.pdf
diff --git a/GreenDay/resume_ranker.py b/GreenDay/resume_ranker.py
@@ -0,0 +1,67 @@
+import spacy
+import PyPDF2
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import re
+import csv
+
+csv_filename = "ranked_resumes.csv"
+
+# Load spaCy NER model
+nlp = spacy.load("en_core_web_sm")
+
+# Sample job description
+job_description = "NLP Specialist: Develop and implement NLP algorithms. Proficiency in Python, NLP libraries, and ML frameworks required."
+
+# List of resume PDF file paths
+resume_paths = ["resume1.pdf", "resume2.pdf", "resume3.pdf"]  # Add more file paths here
+
+# Extract text from PDFs
+def extract_text_from_pdf(pdf_path):
+    with open(pdf_path, "rb") as pdf_file:
+        pdf_reader = PyPDF2.PdfReader(pdf_file)
+        text = ""
+        for page in pdf_reader.pages:
+            text += page.extract_text()
+        return text
+
+# Extract emails and names using spaCy NER
+def extract_entities(text):
+    # Extract emails using regular expression
+    emails = re.findall(r'\S+@\S+', text)
+    # Extract names using a simple pattern (assuming "First Last" format)
+    names = re.findall(r'^([A-Z][a-z]+)\s+([A-Z][a-z]+)', text)
+    if names:
+        names = [" ".join(names[0])]
+
+    return emails, names
+
+
+# Extract job description features using TF-IDF
+tfidf_vectorizer = TfidfVectorizer()
+job_desc_vector = tfidf_vectorizer.fit_transform([job_description])
+
+# Rank resumes based on similarity
+ranked_resumes = []
+for resume_path in resume_paths:
+    resume_text = extract_text_from_pdf(resume_path)
+    emails, names = extract_entities(resume_text)
+    resume_vector = tfidf_vectorizer.transform([resume_text])
+    similarity = cosine_similarity(job_desc_vector, resume_vector)[0][0]
+    ranked_resumes.append((names, emails, similarity))
+
+# Sort resumes by similarity score
+ranked_resumes.sort(key=lambda x: x[2], reverse=True)
+
+# Display ranked resumes with emails and names
+for rank, (names, emails, similarity) in enumerate(ranked_resumes, start=1):
+    print(f"Rank {rank}: Names: {names}, Emails: {emails}, Similarity: {similarity:.2f}")
+
+with open(csv_filename, "w", newline="") as csvfile:
+    csv_writer = csv.writer(csvfile)
+    csv_writer.writerow(["Rank", "Name", "Email", "Similarity"])
+
+    for rank, (names, emails, similarity) in enumerate(ranked_resumes, start=1):
+        name = names[0] if names else "N/A"
+        email = emails[0] if emails else "N/A"
+        csv_writer.writerow([rank, name, email, similarity])
diff --git a/GreenDay/static/styles.css b/GreenDay/static/styles.css
@@ -0,0 +1,77 @@
+/* Reset some default styles */
+body, h1, h2, p, table, th, td {
+    margin: 0;
+    padding: 0;
+}
+
+
+/* Basic styling */
+body {
+    font-family: Arial, sans-serif;
+    background-color: #f2f2f2;
+    color: #333;
+    padding: 20px;
+}
+
+h1 {
+    margin-bottom: 20px;
+}
+
+/* Form styling */
+form {
+    background-color: #fff;
+    padding: 20px;
+    border-radius: 8px;
+    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+}
+
+label, input[type="file"], textarea, input[type="submit"] {
+    display: block;
+    margin-bottom: 10px;
+}
+
+input[type="file"] {
+    margin-top: 5px;
+}
+
+textarea {
+    width: 100%;
+    padding: 10px;
+    border: 1px solid #ccc;
+    border-radius: 4px;
+    resize: vertical;
+}
+
+input[type="submit"] {
+    background-color: #007bff;
+    color: #fff;
+    border: none;
+    padding: 10px 15px;
+    border-radius: 4px;
+    cursor: pointer;
+}
+
+/* Table styling */
+table {
+    border-collapse: collapse;
+    width: 100%;
+    margin-top: 20px;
+}
+
+th, td {
+    padding: 8px;
+    text-align: left;
+    border-bottom: 1px solid #ddd;
+}
+
+th {
+    background-color: #f2f2f2;
+}
+
+/* Responsive layout */
+@media (max-width: 768px) {
+    form {
+        width: 100%;
+        padding: 15px;
+    }
+}
diff --git a/GreenDay/templates/index.html b/GreenDay/templates/index.html
@@ -0,0 +1,53 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Resume Analyzer AI</title>
+    <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
+
+</head>
+<body>
+    <center>
+    <img src = "https://d8it4huxumps7.cloudfront.net/uploads/images/150x150/uploadedManual-64a519d421041_ti_logo__1_.png?d=110x110" width="50" height="50" >
+    <h1>Resume Analyzer AI</h1>
+    A ranking system to find the best candidate.
+    </center>
+
+    <form action="/" method="post" enctype="multipart/form-data">
+        <label for="job_description">Job Description:</label>
+        <textarea name="job_description" rows="4" cols="50" required></textarea>
+        <br>
+        <label for="resume_files">Upload Resumes (PDF):</label>
+        <input type="file" name="resume_files" accept=".pdf" multiple required>
+        <br>
+        <input type="submit" value="Analyze Resumes">
+    </form>
+    <br>
+    {% if results %}
+    <h2>Ranked Resumes:</h2>
+    <table>
+        <tr>
+            <th>Rank</th>
+            <th>Name</th>
+            <th>Email</th>
+            <th>Similarity in %</th>
+        </tr>
+        {% for result in results %}
+        <tr>
+            <td>{{ loop.index }}</td>
+            <td>{{ result[0][0] }}</td>
+            <td>{{ result[1][0] }}</td>
+            <td>{{ result[2] }}</td>
+        </tr>
+        {% endfor %}
+    </table>
+    {% if results %}
+    <br>
+    <a href="{{ url_for('download_csv') }}" download="ranked_resumes.csv" class="download-link">
+        Download CSV
+    </a>
+
+{% endif %}
+
+    {% endif %}
+</body>
+</html>
diff --git a/GreenDay/uploads/resume1.pdf b/GreenDay/uploads/resume1.pdf
diff --git a/GreenDay/uploads/resume2.pdf b/GreenDay/uploads/resume2.pdf
diff --git a/GreenDay/uploads/resume3.pdf b/GreenDay/uploads/resume3.pdf