Skip to content
Open
1 change: 1 addition & 0 deletions academy_data.json

Large diffs are not rendered by default.

31 changes: 23 additions & 8 deletions docker_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pprint import pprint as pp
import json
import pandas as pd
import os

class dockerSetUp:
"""
Expand All @@ -20,6 +21,7 @@ def __init__(self):
self.tables = json.load(open('SQL/tables.json')) #Open json file
self.my_list = []
self.table_names = list(self.tables.keys())
self.df_files = os.listdir('output_tables/')

#Output the column names
def get_column_names(self, table_name):
Expand Down Expand Up @@ -54,16 +56,29 @@ def all_tables_upload(self):


def pandas_to_SQL(self):
for table in self.df_files:
try:
df = pd.read_json(f'output_tables/{table}', dtype={"phone_number": str},
convert_dates=["date", "start_date", "invited_date", "dob"])
table_name = table.replace('_table.json', '')

data = { 'Candidate_ID': [1,2,3],
'Course_ID': [4,5,6]
}
df = pd.DataFrame(data)
df.reset_index(drop = True, inplace=True)
print(df)
#for index, row in df.iterrows():
for index, row in df.iterrows():

self.cursor.execute("INSERT INTO CANDIDATE_COURSE_J (Candidate_ID, Course_ID) VALUES(1,2)")
command = f'INSERT INTO {table_name.upper()} VALUES ('
for column in df.keys():
command = command + f"'{row[f'{column}']}',"
command = command[: -1] + ')'
self.cursor.execute(command)
except pyodbc.Error as e:
print(f"Error {e} table={table},column={column}")

# try:
# self.cursor.execute(command)
#
# except


# self.cursor.execute("INSERT INTO CANDIDATE_COURSE_J (Candidate_ID, Course_ID) VALUES(1,2)")

def close_cursor(self):
self.cursor.commit()
Expand Down
48 changes: 29 additions & 19 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def create_candidates_table(self):
"phone_number", "uni", "degree", "invited_date",
"geo_flex", "course_interest"]].copy()

self.candidates_table.to_json("output_tables/candidates_table.json")
self.candidates_table.to_json("output_tables/candidate_table.json")

def create_interview_table(self):
self.interview_table = self.big_table[["candidate_id", "invited_date", "self_development",
Expand Down Expand Up @@ -367,8 +367,8 @@ def create_tech_skill_tables(self):
jt_tech_skills_df.columns = ["candidate_id", "skill_id", "score"]
self.tech_junction_table = jt_tech_skills_df

self.tech_junction_table.to_json("output_tables/tech_junction_table.json")
self.tech_skills_table.to_json("output_tables/tech_skills_table.json")
self.tech_junction_table.to_json("output_tables/tech_skill_score_j_table.json")
self.tech_skills_table.to_json("output_tables/tech_skill_table.json")

def create_quality_junction(self):
big_table_nonan = self.big_table.dropna(subset=["qualities"])
Expand All @@ -395,7 +395,7 @@ def create_quality_junction(self):

self.quality_junction_table = jt_qualities_df

self.quality_junction_table.to_json("output_tables/quality_junction_table.json")
self.quality_junction_table.to_json("output_tables/interview_quality_j_table.json")

def create_quality_table(self):
strengths = self.attributes["strengths"]
Expand Down Expand Up @@ -488,7 +488,7 @@ def create_candidates_course_j_table(self):
self.candidates_course_j_table.dropna(subset=["course_id"], inplace=True)
self.candidates_course_j_table = self.candidates_course_j_table.astype({'course_id': 'int32'})

self.candidates_course_j_table.to_json("output_tables/candidates_course_j_table.json")
self.candidates_course_j_table.to_json("output_tables/candidate_course_j_table.json")

def create_tables(self):
self.list_attributes()
Expand All @@ -508,20 +508,30 @@ def create_tables(self):
self.create_candidates_course_j_table()

def print_tables(self):
print(self.interview_table)
print(self.candidates_table)
print(self.tech_skills_table)
print(self.tech_junction_table)
print(self.quality_table)
print(self.quality_junction_table)

print(self.benchmarks_table)
print(self.sparta_day_table)
print(self.sparta_day_results_table)

print(self.trainer_table)
print(self.course_table)
print(self.candidates_course_j_table)
print("\ninterview")
print(self.interview_table.head())
print("\ncandiate")
print(self.candidates_table.head())
print("\ntech skills")
print(self.tech_skills_table.head())
print("\ntech junction")
print(self.tech_junction_table.head())
print("\nQuality")
print(self.quality_table.head())
print("\nquality junction")
print(self.quality_junction_table.head())
print("\nbecnhmark")
print(self.benchmarks_table.head())
print("\nsparta day")
print(self.sparta_day_table.head())
print("\nsparta day results")
print(self.sparta_day_results_table.head())
print("\ntrainer table")
print(self.trainer_table.head())
print("\ncourse table")
print(self.course_table.head())
print("\ncandidates course")
print(self.candidates_course_j_table.head())

def upload_tables_to_s3(self):
for file in os.listdir("output_tables"):
Expand Down
1 change: 1 addition & 0 deletions sparta_day_data.json

Large diffs are not rendered by default.