-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathload_model.py
More file actions
78 lines (59 loc) · 2.21 KB
/
Copy pathload_model.py
File metadata and controls
78 lines (59 loc) · 2.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from datetime import datetime
from sklearn.preprocessing import LabelEncoder
import catboost
# Read "train.csv" file
df = pd.read_csv("DataSet/test.csv")
output = pd.DataFrame(index=df["index"])
df = df.set_index(df.columns[0])
# Delete $ symbol from amount column
df['amount'] = df['amount'].str.replace('$', '').astype(float)
# Split "zip" by units
df["zip_1"] = df["zip"] // 10000
df["zip_2"] = (df["zip"] - df["zip_1"]) // 100
df["zip_4"] = df["zip"] % 100
# Drop "zip"
df = df.drop("zip", axis=1)
# Replace NaN with -1
df = df.fillna(-1)
# Change float64 with int64
df["amount"] = round(df["amount"] * 100)
df["amount"] = df["amount"].astype("int64")
df["zip_2"] = df["zip_2"].astype("int64")
df["zip_4"] = df["zip_4"].astype("int64")
df["merchant_id"] = df["merchant_id"].astype("category")
df["mcc"] = df["mcc"].astype("category")
df["merchant_city"] = df["merchant_city"].astype("category")
df["merchant_state"] = df["merchant_state"].astype("category")
df["errors?"] = df["errors?"].astype("category")
df["use_chip"] = df["use_chip"].astype("category")
df["user_id"] = df["user_id"].astype("category")
df["card_id"] = df["card_id"].astype("category")
df["zip_1"] = df["zip_1"].astype("int64")
df["zip_1"] = df["zip_1"].astype("category")
# Create additional data
# User average amount
user_avg_amount = df.groupby("user_id")["amount"].mean().reset_index()
user_avg_amount.columns = ['user_id', 'user_avg_amount']
# Merchant average amount
merchant_avg_amount = df.groupby("merchant_id")["amount"].mean().reset_index()
merchant_avg_amount.columns = ["merchant_id", "merchant_avg_amount"]
# Add to Original Dataset
df = pd.merge(df, user_avg_amount, on="user_id", how="left")
df = pd.merge(df, merchant_avg_amount, on="merchant_id", how="left")
# Print Data sample
print(df.head(10))
# Validate
print(df.dtypes)
# Load model
model = catboost.CatBoostClassifier()
model.load_model("catboostdb/catboost_model_11_0.620253164556962_20230902-155044.cbm")
# Test loaded model
y_pred = model.predict(df)
# Write on Dataframe
output["ans"] = y_pred
# Output to CSV
output.to_csv("catboostdb/submit11_1.csv", header=False)