-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathmwis_mlp_call.py
executable file
·192 lines (173 loc) · 7.86 KB
/
mwis_mlp_call.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# python3
# Make this standard template for testing and training
from __future__ import division
from __future__ import print_function
import sys
import os
import shutil
sys.path.append( '%s/gcn' % os.path.dirname(os.path.realpath(__file__)) )
# add the libary path for graph reduction and local search
# sys.path.append( '%s/kernel' % os.path.dirname(os.path.realpath(__file__)) )
import time
import random
import scipy.io as sio
import numpy as np
import scipy.sparse as sp
from multiprocessing import Queue
from copy import deepcopy
import networkx as nx
import tensorflow as tf
from collections import deque
from gcn.models import MLP, MLP2
# import the libary for graph reduction and local search
# from reduce_lib import reducelib
import warnings
warnings.filterwarnings('ignore')
from gcn.utils import *
from runtime_config import flags, FLAGS
# Settings (FLAGS)
from test_utils import *
from heuristics import *
from mwis_base_call import MWISSolver
# flags.DEFINE_float('epsilon', 1.0, 'test dataset')
# flags.DEFINE_float('epsilon_min', 0.001, 'test dataset')
# test data path
# Some preprocessing
model_func = MLP2
nsr = np.power(10.0,-FLAGS.snr_db/20.0)
class DQNAgent(MWISSolver):
def __init__(self, input_flags, memory_size=5000):
super(DQNAgent, self).__init__(input_flags, memory_size)
self.flags = input_flags
self.num_supports = 1 + self.flags.max_degree
# self.placeholders = {
# 'support': [tf.compat.v1.sparse_placeholder(tf.float32) for _ in range(self.num_supports)],
# 'features': tf.compat.v1.sparse_placeholder(tf.float32, shape=(None, self.feature_size)), # featureless: #points
# 'adj': tf.compat.v1.sparse_placeholder(tf.float32),
# 'labels': tf.compat.v1.placeholder(tf.float32, shape=(None, self.flags.diver_num)), # 0: not linked, 1:linked
# 'labels_mask': tf.compat.v1.placeholder(tf.int32),
# 'actions': tf.compat.v1.placeholder(tf.float32, shape=(None, 1)), # real actions, including exploration
# 'dropout': tf.compat.v1.placeholder_with_default(0., shape=()),
# 'num_features_nonzero': tf.compat.v1.placeholder(tf.int32) # helper variable for sparse dropout
# }# Define placeholders
self.sess = tf.compat.v1.Session(config=config)
self.model = self._build_model()
with self.sess.as_default():
self.sess.run(tf.compat.v1.global_variables_initializer())
self.saver = tf.compat.v1.train.Saver(max_to_keep=1000)
def _build_model(self):
# Neural Net for Deep-Q learning Model
model = model_func(self.placeholders, hidden_dim=self.flags.hidden1, bias=True, logging=True)
return model
def makestate(self, adj, wts_nn):
reduced_nn = wts_nn.shape[0]
norm_wts = np.amax(wts_nn)
# features = np.multiply(np.ones([reduced_nn, self.feature_size]), wts_nn/norm_wts)
degrees = np.asarray(adj.sum(axis=1).astype(float)).flatten()
features = np.ones([reduced_nn, self.feature_size])
features[:, 0] = degrees
features = sp.lil_matrix(features)
features = sparse_to_tuple(features)
support = simple_polynomials(adj, self.flags.max_degree)
state = {"features": features, "support": support}
return state
def predict(self, state):
feed_dict_val = construct_feed_dict4pred(state["features"], state["support"], self.placeholders)
with self.sess.as_default():
act_values, action = self.sess.run([self.model.outputs, self.model.pred], feed_dict=feed_dict_val)
return act_values, action
def act(self, state, train):
act_values, action = self.predict(state)
if train:
if np.random.rand() <= self.epsilon:
act_values = np.random.uniform(0, 1, size=act_values.shape)
# act_values[act_values<0] = 0
action = np.argmax(act_values)
return act_values, action # returns action
def replay(self, batch_size):
if len(self.memory) < batch_size:
return None
minibatch = random.sample(self.memory, batch_size)
states, targets_f = [], []
losses = []
batch_avg = 0
batch_len = 0
batch_tgt = np.array([])
for state, act_vals, solu, wts_nn, reward in minibatch:
# target = np.mean(wts_nn.flatten()) * reward
# target_f = np.ones((act_vals.size, 1)) * (0 - reward)
target = reward
# if not done:
# act_values, _ = self.predict(next_state)
# target = (reward + self.gamma * np.amax(act_values))
# target_f, _ = self.predict(state)
target_f = act_vals
# target_f = np.zeros((act_vals.size, 1))
# target_f = -np.abs(target_f)
if FLAGS.predict == 'mwis':
target_f[solu,:] = target # * wts_nn #+ wts_nn
else:
target_f[solu,:] = target * wts_nn[solu,0:1] #+ wts_nn
# m2 = np.mean(target_f)
# target_f = target_f /np.mean(target_f)
# Filtering out states and targets for training
states.append(state.copy())
targets_f.append(target_f)
batch_tgt = np.append(batch_tgt, target_f)
batch_tgt = batch_tgt.flatten()
batch_avg = np.mean(batch_tgt)
batch_std = np.std(batch_tgt)
with self.sess.as_default():
for i in range(len(states)):
state = states[i]
# target_f = (targets_f[i]-batch_avg)/batch_std + 1
target_f = targets_f[i] #/ batch_avg
feed_dict = construct_feed_dict(state['features'], state['support'], target_f, self.placeholders)
_, loss = self.sess.run([self.model.opt_op, self.model.loss], feed_dict=feed_dict)
losses.append(loss)
# history = self.model.fit(np.array(states), np.array(targets_f), epochs=1, verbose=0)
# Keeping track of loss
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
return np.nanmean(losses)
def solve_mwis_train(self, adj_0, wts_0, train=False, grd=1.0):
"""
GCN followed by LGS
"""
adj = adj_0.copy()
wts_nn = np.reshape(wts_0, (wts_0.shape[0], FLAGS.feature_size))
# GCN
state = self.makestate(adj, wts_nn)
act_vals, act = self.act(state, train)
# np.savetxt("viz_act_vals.csv", act_vals, delimiter=",")
if FLAGS.predict == 'mwis':
# gcn_wts = np.divide(wts_nn.flatten(), act_vals.flatten()+1e-8)
gcn_wts = np.multiply(act_vals.flatten(), wts_nn.flatten())
# gcn_wts = act_vals.flatten()+100
else:
gcn_wts = act_vals.flatten()
# gcn_wts = np.multiply(act_vals.flatten(), wts_nn.flatten())
# gcn_wts = np.multiply(act_vals.flatten(), wts_nn.flatten()) + wts_nn.flatten()
mwis, _ = local_greedy_search(adj, gcn_wts)
# mwis, _ = greedy_search(adj, gcn_wts)
solu = list(mwis)
mwis_rt = mwis
total_wt = np.sum(wts_nn[solu, 0])
if train:
# wts_norm = wts_nn[list(sol_gd), :]/greedy_util.flatten()
# self.memorize(state.copy(), act_vals.copy(), list(sol_gd), wts_norm, 1.0)
# reward = (total_wt + self.smallconst) / (greedy_util.flatten()[0] + self.smallconst)
reward = total_wt / (grd + 1e-6)
# reward = reward if reward > 0 else 0
wts_norm = wts_nn/np.amax(wts_nn)
if not np.isnan(reward):
self.memorize(state.copy(), act_vals.copy(), list(mwis).copy(), wts_norm.copy(), reward)
return mwis_rt, total_wt
return mwis_rt, total_wt
# use gpu 0
os.environ['CUDA_VISIBLE_DEVICES']=str(0)
# Initialize session
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
bsf_q = []
# Create model