-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconvert_cie_csv.py
More file actions
70 lines (59 loc) · 1.96 KB
/
Copy pathconvert_cie_csv.py
File metadata and controls
70 lines (59 loc) · 1.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import csv
import json
import re
def clean_category(cat):
return cat.strip('[]')
def map_type(extracted_type):
type_map = {
'Question Paper': 'qp',
'Mark Scheme': 'ms',
'Examiner Report': 'er',
'Grade Thresholds': 'gt',
'Insert': 'insert',
'Confidential Instructions': 'ci',
'Other': 'other'
}
# Default to 'other' if not found or if extracted_type is empty
return type_map.get(extracted_type, 'other')
data = []
with open('developmentfiles/cie.csv', 'r') as f:
reader = csv.DictReader(f)
for row in reader:
category = clean_category(row['Category'])
subject = row['Subject'].strip('[]')
# Use Extracted_Year if available, else Year
year = row['Extracted_Year'] if row['Extracted_Year'] else row['Year']
try:
year = int(year)
except:
year = 'Unknown'
session = row['Extracted_Session']
# Map type
# The CSV has 'Extracted_Type' which seems to be verbose like 'Examiner Report'
# I need to check what values are in Extracted_Type
type_ = map_type(row['Extracted_Type'])
component = row['Extracted_Component']
if not component:
component = None
# Unit seems to be the subject code number
unit = row['Extracted_UnitCode']
try:
unit = float(unit)
except:
unit = None
item = {
"Category": category,
"Subject": subject,
"Year": year,
"Session": session,
"Type": type_,
"Component": component,
"URL": row['Full_URL'],
"Unit": unit
}
data.append(item)
with open('public/cie_data.json', 'w') as f:
json.dump(data, f, indent=2)
print(f"Converted {len(data)} items.")
categories = set(d['Category'] for d in data)
print(f"Categories found: {categories}")