ALFA-group · floresd9 · Jul 19, 2019 · Aug 14, 2019 · Aug 23, 2019 · Aug 23, 2019
diff --git a/gan-script.sh b/gan-script.sh
@@ -16,8 +16,8 @@ echo "Client PIDS:"
 cat ${PID_FILE}
 sleep 5
 
-echo "Start master on GPU 4"
-export CUDA_VISIBLE_DEVICES=4; 
+echo "Start master on GPU"
+# export CUDA_VISIBLE_DEVICES=4; 
 python main.py train --distributed --master -f configuration/quickstart/mnist.yml
 
 echo "Begin kill clients"

diff --git a/install_ubuntu.md b/install_ubuntu.md
@@ -0,0 +1,45 @@
+# Lipizzaner
+
+## Setup
+```
+git clone https://github.com/ALFA-group/lipizzaner-gan.git
+cd lipizzaner-gan/
+python3 --version
+sudo apt-get update
+sudo apt-get install python3-venv
+python3 -m venv ~/my367
+source ~/my367/bin/activate
+sudo apt-get install python3-dev
+sudo apt-get install gcc
+pip install -r ./src/helper_files/requirements.txt 
+```
+
+## MNIST
+```
+cd src/
+python main.py train --distributed --client & sleep 5; 
+python main.py train --distributed --client & sleep 5;
+python main.py train --distributed --client & sleep 5;
+python main.py train --distributed --client &
+ps
+wget http://0.0.0.0:5000/status
+cat status 
+python main.py train --distributed --master -f configuration/quickstart/mnist.yml
+```
+
+## Theoretical GAN
+```
+cd ../theoretical_experiments
+sudo apt-get install python3-tk 
+python gaussian_gan.py 
+```
+
+## Network traffic
+```
+cd ../src/data/network_data/
+sudo apt-get install argus-client
+./collect_network_traffic.sh 
+python analyze_network_file.py --pcap_file=network_capture.pcap --sequence_length=30
+cd ../../
+python main.py train --distributed --master -f configuration/lipizzaner-gan/network_traffic.yml
+```
diff --git a/lipi-mnist-satori.lsf b/lipi-mnist-satori.lsf
@@ -6,9 +6,8 @@
 #BSUB -R "span[ptile=4]"
 #BSUB -gpu "num=4"
 #BSUB -q "normal"
-#BSUB -x
 
-HOME2=/nobackup/users/ehemberg
+HOME2=/nobackup/users/floresd
 PYTHON_VIRTUAL_ENVIRONMENT=lipi
 CONDA_ROOT=$HOME2/anaconda3
 source ${CONDA_ROOT}/etc/profile.d/conda.sh

diff --git a/src/Dockerfile b/src/Dockerfile
@@ -27,4 +27,4 @@ COPY helper_files/requirements.txt ./helper_files/
 RUN pip install -r ./helper_files/requirements.txt
 
 COPY . .
-CMD [ "sh", "-c", "python3.6 train ./main.py --distributed --${role} -f ${config_file}" ]
+CMD [ "sh", "-c", "python3.6 ./main.py train --distributed --${role} -f ${config_file}" ]
diff --git a/src/configuration/lipizzaner-gan/mnist_labels.yml b/src/configuration/lipizzaner-gan/mnist_labels.yml
@@ -0,0 +1,47 @@
+trainer:
+  name: lipizzaner_gan
+  n_iterations: 200
+  calculate_net_weights_dist: True
+  # independent_probability, exact_proportion
+  mixture_generator_samples_mode: exact_proportion
+  params:
+    population_size: 1
+    tournament_size: 2
+    n_replacements: 1
+    default_adam_learning_rate: 0.0002
+    # Hyperparameter mutation
+    alpha: 0.0001
+    mutation_probability: 0.5
+    discriminator_skip_each_nth_step: 1
+    mixture_sigma: 0.01
+    enable_selection: True
+    score:
+      enabled: True
+      type: fid
+      score_sample_size: 1000
+      cuda: True
+    fitness:
+      fitness_sample_size: 1000
+      fitness_mode: average    # worse, best, average
+dataloader:
+  dataset_name: mnist_labels
+  use_batch: True
+  batch_size: 50
+  n_batches: 0
+  shuffle: True
+  labels:
+    - 1
+    - 2
+    - 3
+    - 4
+    - 5
+  labels_per_cell: 3
+network:
+  name: four_layer_perceptron
+  loss: bceloss
+master:
+  calculate_score: True
+  # Same amount of data as original CIFAR contains
+  score_sample_size: 50000
+  cuda: True
+general: !include ../general.yml
diff --git a/src/configuration/quickstart-data-dieting/general.yml b/src/configuration/quickstart-data-dieting/general.yml
@@ -0,0 +1,19 @@
+logging:
+  enabled: True
+  log_level: INFO
+  log_server: # Fill in connection string with read/write access here
+  image_format: jpg
+  print_discriminator: False
+losswise:
+  enabled: False
+  api_key: # Fill in API key 
+output_dir: ./output
+distribution:
+  auto_discover: False
+  master_node:
+    exit_clients_on_disconnect: True
+  client_nodes:
+    - address: 127.0.0.1 # Fill in IP address here 
+      port: 5000-5003
+seed: 1
+num_workers: 0    # how many subprocesses to use for data loading
diff --git a/src/configuration/quickstart-data-dieting/mnist.yml b/src/configuration/quickstart-data-dieting/mnist.yml
@@ -0,0 +1,45 @@
+trainer:
+  name: lipizzaner_gan
+  n_iterations: 2
+  calculate_net_weights_dist: True
+  # independent_probability, exact_proportion
+  mixture_generator_samples_mode: exact_proportion
+  params:
+    population_size: 1
+    tournament_size: 2
+    n_replacements: 1
+    default_adam_learning_rate: 0.0002
+    # Hyperparameter mutation
+    alpha: 0.0001
+    mutation_probability: 0.5
+    discriminator_skip_each_nth_step: 1
+    #mixture_sigma: 0.01
+    enable_selection: True
+    score:
+      enabled: True
+      type: fid
+      score_sample_size: 10000
+      cuda: True
+    fitness:
+      fitness_sample_size: 1000
+      fitness_mode: average    # worse, best, average
+    optimize_mixture:
+      es_generations: 50
+      es_score_sample_size: 10000
+      es_random_init: False
+      mixture_sigma: 0.01
+dataloader:
+  dataset_name: mnist
+  use_batch: True
+  batch_size: 100
+  n_batches: 0
+  shuffle: True
+  sampling_ratio: 0.5
+network:
+  name: four_layer_perceptron
+  loss: bceloss
+master:
+  calculate_score: True
+  score_sample_size: 50000
+  cuda: True
+general: !include general.yml
diff --git a/src/configuration/quickstart-weights-optimization/general.yml b/src/configuration/quickstart-weights-optimization/general.yml
@@ -0,0 +1,19 @@
+logging:
+  enabled: True
+  log_level: INFO
+  log_server: # Fill in connection string with read/write access here
+  image_format: jpg
+  print_discriminator: False
+losswise:
+  enabled: False
+  api_key: # Fill in API key 
+output_dir: ./output
+distribution:
+  auto_discover: False
+  master_node:
+    exit_clients_on_disconnect: True
+  client_nodes:
+    - address: 127.0.0.1 # Fill in IP address here 
+      port: 5000-5003
+seed: 1
+num_workers: 0    # how many subprocesses to use for data loading
diff --git a/src/configuration/quickstart-weights-optimization/mnist.yml b/src/configuration/quickstart-weights-optimization/mnist.yml
@@ -0,0 +1,44 @@
+trainer:
+  name: lipizzaner_gan
+  n_iterations: 2
+  calculate_net_weights_dist: True
+  # independent_probability, exact_proportion
+  mixture_generator_samples_mode: exact_proportion
+  params:
+    population_size: 1
+    tournament_size: 2
+    n_replacements: 1
+    default_adam_learning_rate: 0.0002
+    # Hyperparameter mutation
+    alpha: 0.0001
+    mutation_probability: 0.5
+    discriminator_skip_each_nth_step: 1
+    #mixture_sigma: 0.01
+    enable_selection: True
+    score:
+      enabled: True
+      type: fid
+      score_sample_size: 10000
+      cuda: True
+    fitness:
+      fitness_sample_size: 1000
+      fitness_mode: average    # worse, best, average
+    optimize_mixture:
+      es_generations: 50
+      es_score_sample_size: 10000
+      es_random_init: False
+      mixture_sigma: 0.01
+dataloader:
+  dataset_name: mnist
+  use_batch: True
+  batch_size: 100
+  n_batches: 10
+  shuffle: True
+network:
+  name: four_layer_perceptron
+  loss: bceloss
+master:
+  calculate_score: True
+  score_sample_size: 50000
+  cuda: True
+general: !include general.yml
diff --git a/src/configuration/quickstart/mnist.yml b/src/configuration/quickstart/mnist.yml
@@ -1,12 +1,12 @@
 trainer:
   name: lipizzaner_gan
-  n_iterations: 5
+  n_iterations: 200
   calculate_net_weights_dist: True
   # independent_probability, exact_proportion
   mixture_generator_samples_mode: exact_proportion
   params:
     population_size: 1
-    tournament_size: 1
+    tournament_size: 2
     n_replacements: 1
     default_adam_learning_rate: 0.0002
     # Hyperparameter mutation
@@ -19,14 +19,19 @@ trainer:
       enabled: True
       type: fid
       score_sample_size: 1000
-      cuda: False
+      cuda: True
     fitness:
       fitness_sample_size: 1000
       fitness_mode: average    # worse, best, average
+    optimize_mixture:
+      es_generations: 10
+      es_score_sample_size: 10000
+      es_random_init: False
+      mixture_sigma: 0.01
 dataloader:
   dataset_name: mnist
   use_batch: True
-  batch_size: 400
+  batch_size: 100
   n_batches: 0
   shuffle: True
 network:

diff --git a/src/configuration/quickstart/mnist_labels.yml b/src/configuration/quickstart/mnist_labels.yml
@@ -0,0 +1,46 @@
+trainer:
+  name: lipizzaner_gan
+  n_iterations: 5
+  calculate_net_weights_dist: True
+  # independent_probability, exact_proportion
+  mixture_generator_samples_mode: exact_proportion
+  params:
+    population_size: 1
+    tournament_size: 2
+    n_replacements: 1
+    default_adam_learning_rate: 0.0002
+    # Hyperparameter mutation
+    alpha: 0.0001
+    mutation_probability: 0.5
+    discriminator_skip_each_nth_step: 1
+    mixture_sigma: 0.01
+    enable_selection: True
+    score:
+      enabled: True
+      type: fid
+      score_sample_size: 1000
+      cuda: True
+    fitness:
+      fitness_sample_size: 1000
+      fitness_mode: average    # worse, best, average
+dataloader:
+  dataset_name: mnist_labels
+  use_batch: True
+  batch_size: 400
+  n_batches: 0
+  shuffle: True
+  labels:
+    - 1
+    - 2
+    - 3
+    - 4
+    - 5
+  labels_per_cell: 3
+network:
+  name: four_layer_perceptron
+  loss: bceloss
+master:
+  calculate_score: True
+  score_sample_size: 50000
+  cuda: True
+general: !include general.yml
diff --git a/src/configuration/tests/README.md b/src/configuration/tests/README.md
@@ -0,0 +1,11 @@
+### Configuration files to test new Lipizzaner functionalities
+
+- **checkpointing**: It is a new feature to store the current status of each client node (cell) in its output folder. The stored information includes the *genome* (the network), the current iteration, learning rate, and all the information needed to resume the experiment from this given checkpoint.
+- **weights-optimization**: It is used to test the optimization of the mixture weights at the enf of the training process. It basically call this function without performing any training epoch.
+- **data-dieting**: It allows the selection of the portion of the training dataset to be used in each cell to train the networks. The samples of this reduced dataset are randomly picked. 
+- **mustangs**: Applyong Mustangs, the idea of randomlu picking a loss function from three different ones (BCE, MSE, and heuristic losses). It is introduced in **Spatial evolutionary generative adversarial networks** 
+
+
+Jamal Toutouh, Erik Hemberg, and Una-May O'Reilly. 2019. Spatial evolutionary generative adversarial networks. In Proceedings of the Genetic and Evolutionary Computation Conference (GECCO '19), Manuel López-Ibáñez (Ed.). ACM, New York, NY, USA, 472-480. DOI: https://doi.org/10.1145/3321707.3321860  
+
+
diff --git a/src/configuration/tests/checkpointing/general.yml b/src/configuration/tests/checkpointing/general.yml
@@ -0,0 +1,20 @@
+logging:
+  enabled: True
+  log_level: INFO
+  log_server: # Fill in connection string with read/write access here
+  image_format: jpg
+  print_discriminator: False
+losswise:
+  enabled: False
+  api_key: # Fill in API key 
+output_dir: ./output
+distribution:
+  auto_discover: False
+  master_node:
+    exit_clients_on_disconnect: True
+  client_nodes:
+    - address: 127.0.0.1 # Fill in IP address here 
+      port: 5000-5003
+seed: 1
+num_workers: 0    # How many subprocesses to use for data loading
+checkpoint_period: 2 # Number of iterations to be performed between the checkpoint (0 no check points are stored)