-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcombine_dfs.py
61 lines (56 loc) · 2.44 KB
/
combine_dfs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
## COMBINES DATAFRAMES OF PARSED REPLAY FILES ##
import pandas as pd
from tqdm import tqdm
import os
GRANULARITY = 0 # edit this. 1 -> 8x_10y_7z, .5 -> 16x_20y_14z, 0 -> exact
FRAMES_AHEAD = 15
## NN shouldn't care about game time in OT
def fix_ot_secs(df):
if len(df[df['seconds_remaining'] == -1.0]) > 0:
print('already fixed')
return df
i=0
try:
while df.at[i, 'seconds_remaining'] >= df.at[i+1, 'seconds_remaining']:
i += 1
i += 1
for j in range(i, len(df)):
df.at[j, 'seconds_remaining'] = -1
return df
except:
return df
## combining replays ##
if GRANULARITY == 1:
rootdir = '/home/zach/Files/Nas/ReplayModels/ReplayDataProcessing/RANKED_STANDARD/Replays/1400-1600/CSVs_8x_10y_7z/'
elif GRANULARITY == .5:
rootdir = '/home/zach/Files/Nas/ReplayModels/ReplayDataProcessing/RANKED_STANDARD/Replays/1400-1600/CSVs_16x_20y_14z/'
elif GRANULARITY == 0:
rootdir = '/home/zach/Files/Nas/ReplayModels/ReplayDataProcessing/RANKED_STANDARD/Replays/1400-1600/CSVs/'
for root, dirs, files in os.walk(rootdir):
all_dfs = pd.read_csv(rootdir+"/"+files[0], low_memory=False)
all_dfs.drop(columns=['Unnamed: 0'], inplace=True)
all_dfs = fix_ot_secs(all_dfs)
all_dfs['0_pos_x_nf'] = all_dfs['0_pos_x'].shift(-1*FRAMES_AHEAD)
all_dfs['0_pos_y_nf'] = all_dfs['0_pos_y'].shift(-1*FRAMES_AHEAD)
all_dfs['0_pos_z_nf'] = all_dfs['0_pos_z'].shift(-1*FRAMES_AHEAD)
all_dfs = all_dfs[:-1*FRAMES_AHEAD]
for filename in tqdm(files[1:]):
if not filename.endswith('.csv'):
print("\n", filename, "not a csv\n")
continue
piece = pd.read_csv(rootdir+"/"+filename, low_memory=False)
piece.drop(columns=['Unnamed: 0'], inplace=True)
piece = fix_ot_secs(piece)
piece['0_pos_x_nf'] = piece['0_pos_x'].shift(-1*FRAMES_AHEAD)
piece['0_pos_y_nf'] = piece['0_pos_y'].shift(-1*FRAMES_AHEAD)
piece['0_pos_z_nf'] = piece['0_pos_z'].shift(-1*FRAMES_AHEAD)
piece = piece[:-1*FRAMES_AHEAD]
all_dfs = all_dfs.append(piece, ignore_index=True)
print(len(all_dfs))
print("WRITING...")
if GRANULARITY == 0:
all_dfs.to_csv("exact_train_"+str(FRAMES_AHEAD)+"_frames"+".csv")
if GRANULARITY == .5:
all_dfs.to_csv("train_16x_20y_14z_"+str(FRAMES_AHEAD)+"_frames"+".csv")
if GRANULARITY == 1:
all_dfs.to_csv("train_8x_10y_7z_"+str(FRAMES_AHEAD)+"_frames"+".csv")