szaman19
diff --git a/‎applications/graph/GNN/PyTorch_Implementation/SYNTH_Trainer.py‎
Lines changed: 48 additions & 0 deletions b/‎applications/graph/GNN/PyTorch_Implementation/SYNTH_Trainer.py‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎applications/graph/GNN/PyTorch_Implementation/SyntheticData.py‎
Lines changed: 56 additions & 0 deletions b/‎applications/graph/GNN/PyTorch_Implementation/SyntheticData.py‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎applications/graph/GNN/PyTorch_Implementation/_generated_graph_statistics.py‎
Lines changed: 29 additions & 0 deletions b/‎applications/graph/GNN/PyTorch_Implementation/_generated_graph_statistics.py‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎applications/graph/GNN/PyTorch_Implementation/main.py‎
Lines changed: 2 additions & 2 deletions b/‎applications/graph/GNN/PyTorch_Implementation/main.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎applications/graph/GNN/PyTorch_Implementation/synthetic_test.py‎
Lines changed: 158 additions & 0 deletions b/‎applications/graph/GNN/PyTorch_Implementation/synthetic_test.py‎
Lines changed: 158 additions & 0 deletions
@@ -0,0 +1,48 @@
+import torch.nn as nn
+from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder
+from torch_geometric.utils import to_dense_batch
+from torch_geometric.nn import NNConv
+
+
+class LSC_Trainer(nn.Module):
+    def __init__(self, num_nodes):
+        super(LSC_Trainer, self).__init__()
+        self.bond_encoder = BondEncoder(16)
+        self.atom_encoder = AtomEncoder(64)
+        self._graph_nn= nn.Sequential(nn.Linear(16, 64, bias=False),
+                                      nn.ReLU(),
+                                      nn.Linear(64, 32, bias=False),
+                                      nn.ReLU(),
+                                      nn.Linear(32, 64*32, bias=False))
+
+        self.graph_conv = NNConv(64, 32, self._graph_nn)
+        self.num_nodes = num_nodes
+        self._nn = nn.Sequential(nn.Linear(num_nodes*32, 256),
+                                 nn.ReLU(),
+                                 nn.Linear(256, 128),
+                                 nn.ReLU(),
+                                 nn.Linear(128, 32),
+                                 nn.ReLU(),
+                                 nn.Linear(32, 8),
+                                 nn.ReLU(),
+                                 nn.Linear(8, 1))
+
+    def flatten(self, x):
+        return x.view(x.size(0), -1)
+
+    def forward(self, data):
+        node_features = data.x
+        edge_features = data.edge_attr
+        edge_index = data.edge_index
+        batch = data.batch
+
+        encoded_atoms = self.atom_encoder(node_features)
+        encoded_bonds = self.bond_encoder(edge_features)
+
+        updated_features = self.graph_conv(encoded_atoms, edge_index, encoded_bonds)
+
+        updated_features = self.flatten(to_dense_batch(updated_features, batch, max_num_nodes=self.num_nodes)[0])
+
+        out = self._nn(updated_features)
+
+        return out
@@ -0,0 +1,56 @@
+from torch_geometric.utils import erdos_renyi_graph
+from torch_geometric.data import Data
+from tqdm import tqdm
+import torch
+import pickle
+import  multiprocessing as mp
+import numpy as np
+
+node_sizes  = [64, 128]
+p_vals  = np.logspace(-2.5,-.4, num=10)
+
+_f_name = "/p/vast1/zaman2/synth_data/{}_{}_Pytorch.pickle"
+
+_num_samples = 10000
+
+def make_dataset(_n, _p):
+
+    _dataset = []
+    max_eddges = 0
+    edge_spread = []
+    count = 0
+    while(count < _num_samples):
+            
+        edge_indices = erdos_renyi_graph(_n, _p)
+        
+        if (edge_indices.shape[1] > 1):
+            node_features = torch.randint(1,(_n, 9), dtype=torch.int)
+            edge_features = torch.randint(1,(edge_indices.shape[1], 3), dtype=torch.int)
+            
+            target = torch.rand(1,1)
+            data = Data(x=node_features,
+                            edge_index = edge_indices,
+                             edge_attr=edge_features,
+                             y = target)
+            _dataset.append(data)
+            edge_spread.append(edge_indices.shape)
+            if edge_indices.shape[1] > max_eddges:
+                max_eddges = edge_indices.shape[1]
+            count += 1
+    with open(_f_name.format(_n,max_eddges), 'wb') as f:
+        pickle.dump(_dataset, f)
+    print(max_eddges)
+    
+    edge_spread = np.array(edge_spread)
+
+    np.save(f'{_n}_{max_eddges}.npy', edge_spread)
+    return(max_eddges)
+    
+
+
+combos = []
+for _n in node_sizes:
+    for _p in p_vals:
+    
+        make_dataset(_n,_p)
+        print(_n, _p)
@@ -0,0 +1,29 @@
+import numpy as np
+import glob 
+import pickle
+
+_files = glob.glob("/p/vast1/zaman2/synth_data/*.pickle")
+
+#print(_files)
+
+
+_vals = {}
+
+for _file in _files:
+    _temp = _file.split("/")[-1].split(".")[0].split("_")
+    _num_nodes = _temp[0]
+    _num_edges = _temp[1]
+    print(_num_nodes, _num_edges)
+    
+    _edge_dic = {}
+    with open(_file, 'rb') as f:
+        _data = pickle.load(f)
+        
+        edge_sizes = []
+        for obj in _data:
+            edge_sizes.append(obj.num_edges)
+        _edge_dic[_num_edges] = edge_sizes
+    _vals[_num_nodes] = _edge_dic
+
+with open("_gen_stats.pickle", 'wb') as f:
+    pickle.dump(_vals, f)
@@ -98,7 +98,7 @@ def main(BATCH_SIZE, dist=False, sync=True):
 
         print("Writing log information to ", file_name, flush=True)
 
-    for epoch in range(1, 3):
+    for epoch in range(0, 5):
 
         epoch_loss = 0
         epoch_start_time = time.perf_counter()
@@ -127,7 +127,7 @@ def main(BATCH_SIZE, dist=False, sync=True):
 
             if rank == 0:
                 batch_times.update(time.perf_counter() - _time_start)
-                print("Mini Batch Times ", i,": \t", batch_times.mean(), "LOSS: \t", loss_tracker.mean(), flush=True)
+                #print("Mini Batch Times ", i,": \t", batch_times.mean(), "LOSS: \t", loss_tracker.mean(), flush=True)
         if dist:
             torch.distributed.barrier()
 
 
@@ -0,0 +1,158 @@
+from utils import get_world_size, init_dist, AverageTracker, get_local_rank
+import torch
+import argparse
+import time
+import pickle
+from torch_geometric.data import DataLoader
+
+
+from SYNTH_Trainer import LSC_Trainer
+
+import glob
+desc = "PyTorch Geometric Distributed Trainer for OGB LSC dataset"
+
+parser = argparse.ArgumentParser(description=desc)
+
+parser.add_argument(
+    '--mini-batch-size', action='store', default=2048, type=int,
+    help='mini-batch size (default: 512)', metavar='NUM')
+
+parser.add_argument(
+    '--num-nodes', action='store', default=16, type=int,
+    help='default 16', metavar='NUM')
+
+parser.add_argument(
+    '--num-edges', action='store', default=16, type=int,
+    help='default 16', metavar='NUM')
+
+
+parser.add_argument('--no-sync', dest='mini_batch_sync', action='store_false')
+
+parser.add_argument('--sync', dest='mini_batch_sync', action='store_true')
+
+
+parser.add_argument('--dist', dest='dist', action='store_true')
+
+parser.set_defaults(feature=True)
+args = parser.parse_args()
+
+mb_size = args.mini_batch_size
+
+num_nodes = args.num_nodes
+num_edges = args.num_edges
+sync = args.mini_batch_sync
+
+distributed_training = args.dist
+
+
+
+def main(BATCH_SIZE, dist=False, sync=True):
+    time_stamp = time.strftime("%d-%m-%Y-%H-%M-%S", time.gmtime())
+
+    if dist:
+        init_dist("/p/vast1/zaman2/randevous_files_"+str(BATCH_SIZE))
+        rank = torch.distributed.get_rank()
+
+    else:
+        rank = 0
+
+    primary = rank == 0
+    world_size = get_world_size()
+
+    if primary:
+        print("Running distributed: ", dist, "\t world size: ", world_size)
+    
+    _files = [f"/p/vast1/zaman2/synth_data/{num_nodes}_{num_edges}_Pytorch.pickle"]
+    #_files = glob.glob(_files_str)
+    for _file in _files:
+        
+        #num_edges = _file.split("/")[-1].split(".")[0].split("_")[1]
+        print(num_edges)
+        with open(_file,'rb') as f:
+            train_dataset = pickle.load(f)
+
+
+
+        train_loader = DataLoader(train_dataset,
+                                  batch_size=(BATCH_SIZE),
+                                  pin_memory=True) 
+
+        if dist:
+            device = torch.device(f'cuda:{get_local_rank()}' if torch.cuda.is_available() else 'cpu')
+        else:
+            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+        model = LSC_Trainer(num_nodes).to(device)
+
+        if dist:
+            model = torch.nn.parallel.DistributedDataParallel(model,
+                                                              device_ids=[get_local_rank()],
+                                                              output_device=get_local_rank())
+
+        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
+
+        criterion = torch.nn.MSELoss()
+
+        if primary:
+            file_name = "SYNTHETIC_LOGS/SYNTHETIC_"+str(num_nodes) + "_" + str(num_edges) +".log"
+
+            logger = open(file_name, 'w')
+
+            print("Writing log information to ", file_name, flush=True)
+
+        for epoch in range(0, 5):
+
+            epoch_loss = 0
+            epoch_start_time = time.perf_counter()
+            batch_times = AverageTracker()
+            
+            loss_tracker = AverageTracker()
+           
+            if (dist):
+                train_loader.sampler.set_epoch(epoch)
+            for i, data in enumerate(train_loader):
+
+                _time_start = time.perf_counter()
+                data = data.to(device)
+                y = data.y
+
+                pred = model(data)
+                loss = criterion(y, pred)
+                loss_tracker.update(loss.item())
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+
+
+                #if dist and sync:
+                    #torch.distributed.barrier()  # This ensures that Global Mini Batches are synced
+                
+                if rank == 0:
+                    batch_times.update(time.perf_counter() - _time_start)
+                    #print("Mini Batch Times ", i,": \t", batch_times.mean(), "LOSS: \t", loss_tracker.mean(), flush=True)
+            if dist:
+                torch.distributed.barrier()
+
+                if primary:
+                    message = "Epoch {}: Total elapsed time {:.3f} \t Average Mini Batch Time {:.3f} \n"
+                    epoch_time = time.perf_counter() - epoch_start_time
+
+                    logger.write(message.format(epoch, epoch_time, batch_times.mean()))
+                    logger.flush()
+                    print(message.format(epoch, epoch_time, batch_times.mean()), flush=True)
+          
+            else:
+                if primary:
+                    message = "Epoch {}: Total elapsed time {:.3f} \t Average Mini Batch Time {:.3f} \n"
+                    epoch_time = time.perf_counter() - epoch_start_time
+
+                    logger.write(message.format(epoch, epoch_time, batch_times.mean()))
+                    logger.flush()
+                    print(message.format(epoch, epoch_time, batch_times.mean()), flush=True)
+            
+        if primary:
+            logger.close()
+
+
+if __name__ == '__main__':
+    main(mb_size, False, False)