forked from connorbybee/hoim
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
114 lines (91 loc) · 3.07 KB
/
utils.py
File metadata and controls
114 lines (91 loc) · 3.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
from time import time
import itertools
import pandas as pd
import numpy as np
import seaborn as sb
from matplotlib import pyplot as plt
import jax.numpy as jnp
from tqdm import tqdm_notebook as tqdm
def best_group(df, groupby, metric, verbose=False):
grouped = df.groupby(groupby)
best_group_idx = grouped[metric].mean().idxmin()
best_group = grouped.get_group(best_group_idx)
query = ' & '.join([f'{col} == {v}' for col, v in zip(groupby, best_group_idx)])
if verbose:
print(query)
return best_group, query
def binary_set(k):
return np.array([[int(b) for b in "{0:0{k}b}".format(i, k=k)] for i in range(2 ** k)])
def check_dir(directory):
if not os.path.exists(directory):
os.makedirs(directory)
def write_results(directory, description, data):
# Write Data #
##############
check_dir(directory)
run_time = time()
pid = os.getpid()
df_filename = os.path.join(directory, f'{pid}_{run_time}.pkl')
description.update({'run_time': run_time})
df = pd.DataFrame(data)
ldf = len(df)
for name, value in description.items():
if not name in df:
df[name] = [value] * ldf
df.to_pickle(df_filename)
def quantize_binary(x):
return np.where(np.real(x) > 0, np.ones_like(x, dtype=int), np.zeros_like(x, dtype=int))
def quantize_spin(x):
return np.where(np.real(x) > 0, np.ones_like(x, dtype=int), -np.ones_like(x, dtype=int))
def load_df(data_dir, ctime=0, max_entries=None, query=None, dropna=False, cols=None):
files = os.listdir(data_dir)
dfs = []
models = []
for file in tqdm(files):
root, ext = os.path.splitext(file)
if ext == '.pkl':
models.append(root.strip().split)
file_path = os.path.join(data_dir, file)
ftime = os.path.getctime(file_path)
if ftime < ctime:
continue
try:
# df = pd.read_csv(file_path)
df = pd.read_pickle(file_path)
except Exception as e:
print(e, file_path)
if cols:
cols = [c for c in cols if c in df]
df = df[cols]
if dropna:
df = df.dropna()
if query:
df = df.query(query)
if max_entries:
df = df.iloc[:max_entries]
dfs.append(df)
dfs = pd.concat(dfs)
dfs = dfs.reset_index(drop=True)
return dfs
def remove_duplicated(df, groupby, verbose=False):
dup = df.duplicated(groupby)
df = df[~dup]
if verbose:
print(dup.sum(), df.duplicated(groupby).sum())
return df
def get_cases(params: dict):
keys = params.keys()
vals = list(params.values())
z = list(itertools.product(*vals))
cases = [ravel_dict({k: v for k, v in zip(keys, vs)}) for vs in z]
return cases
def ravel_dict(dict):
new_dict = {}
for k, v in dict.items():
if isinstance(k, (list, tuple)):
for new_k, new_v in zip(k, v):
new_dict[new_k] = new_v
else:
new_dict[k] = v
return new_dict