-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
103 lines (86 loc) · 3.85 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import pandas as pd
import json
import os
def load_skills(skills_csv_path='input/skills.csv'):
"""
Load skills from CSV file and format them.
Args:
skills_csv_path (str): Path to the skills CSV file
Returns:
str: Formatted skills list as a string with each skill on a new line with a bullet point
bool: True if skills were loaded successfully, False otherwise
"""
try:
skills_df = pd.read_csv(skills_csv_path)
skills_list = skills_df['description'].tolist()
skills_formatted = '\n'.join(f"- {skill}" for skill in skills_list)
return skills_formatted, True
except FileNotFoundError:
print(f"Warning: skills.csv file not found at {skills_csv_path}")
return None, False
except KeyError:
print("Warning: 'description' column not found in skills.csv")
return None, False
def llm_skills_json_to_dataframe(json_response, story_id=None):
"""
Convert the JSON response from the Groq API to a pandas DataFrame
Args:
json_response (str): JSON string from the API response
story_id (str, optional): The story ID to add to the DataFrame
Returns:
pandas.DataFrame: DataFrame containing story_id, skills, explanations, ratings, and excerpts
"""
try:
# Parse the JSON string if it's not already a dictionary
if isinstance(json_response, str):
data = json.loads(json_response)
else:
data = json_response
# Get the story_id from the response or use the provided one
if 'story_id' in data:
story_id = data['story_id']
elif story_id is None:
raise ValueError("story_id must be provided if not present in the response")
# Convert the skills list to a DataFrame
df = pd.DataFrame(data['skills'])
# Add story_id column
df['story_id'] = story_id
# Reorder columns to put story_id first
columns_order = ['story_id', 'skill', 'explanation', 'story_excerpt', 'rating']
df = df[columns_order]
return df
except Exception as e:
print(f"Error converting to DataFrame: {str(e)}")
return None
def save_to_excel(df, filename, sheet_name):
"""
Save DataFrame to Excel:
- If file/sheet doesn't exist: create new file and sheet
- If file/sheet exists: append data to existing sheet
Args:
df: pandas DataFrame to save
filename: name of the Excel file
sheet_name: name of the sheet
"""
try:
if not os.path.exists(filename):
# Create new file and sheet if doesn't exist
df.to_excel(filename, sheet_name=sheet_name, index=False)
print(f"Created new file '{filename}' with sheet '{sheet_name}'")
else:
# File exists, try to load existing sheet
try:
existing_df = pd.read_excel(filename, sheet_name=sheet_name)
# Append new data to existing data
combined_df = pd.concat([existing_df, df], ignore_index=True)
# Save back to file
with pd.ExcelWriter(filename, mode='a', if_sheet_exists='replace') as writer:
combined_df.to_excel(writer, sheet_name=sheet_name, index=False)
print(f"Appended data to existing sheet '{sheet_name}' in '{filename}'")
except ValueError: # Sheet doesn't exist
# Append new sheet to existing file
with pd.ExcelWriter(filename, mode='a') as writer:
df.to_excel(writer, sheet_name=sheet_name, index=False)
print(f"Created new sheet '{sheet_name}' in existing file '{filename}'")
except Exception as e:
print(f"Error saving to Excel: {str(e)}")