-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvirny_flow-folk_pubcov-template.sbatch
More file actions
79 lines (66 loc) · 3.12 KB
/
virny_flow-folk_pubcov-template.sbatch
File metadata and controls
79 lines (66 loc) · 3.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/bin/bash
# ====================================================================================
# Define SLURM configs
# ====================================================================================
#SBATCH --nodes=1
#SBATCH --mem=<MEM>GB
#SBATCH --cpus-per-task=<CPUS>
#SBATCH --time=48:00:00
#SBATCH --mail-type=END
#SBATCH --mail-user=<EMAIL>@nyu.edu
#SBATCH --account=pr_152_general
#SBATCH --job-name=<EXP_NAME>_<DATASET>_w<NUM_WORKERS>_<EXP_CONFIG_SUFFIX>_<RUN_NUM>
#SBATCH --output=%j_<EXP_NAME>_<DATASET>_w<NUM_WORKERS>_<EXP_CONFIG_SUFFIX>_<RUN_NUM>.out
module purge
# ====================================================================================
# Default parameters
# ====================================================================================
EXP_CONFIG_NAME=<EXP_NAME>_<DATASET>_w<NUM_WORKERS>_<EXP_CONFIG_SUFFIX>
NUM_WORKERS=<NUM_WORKERS>
NUM_CPUS_PER_WORKER=1
CLUSTER_TYPE=one_node_configs # one_node_configs or many_node_configs
EMAIL=<EMAIL>
SESSION=${SLURM_JOB_ID}_${EXP_CONFIG_NAME}_<RUN_NUM>
# ====================================================================================
# Define exp_config.yaml
# ====================================================================================
mkdir ./$SESSION
mkdir -p ./$SESSION/tmp/zookeeper-data/
mkdir -p ./$SESSION/tmp/zookeeper-logs/
mkdir -p ./$SESSION/tmp/kafka-logs/
cat <<EOL > ./$SESSION/exp_config.yaml
common_args:
exp_config_name: "$EXP_CONFIG_NAME"
run_nums: [<RUN_NUM>]
secrets_path: "/home/$EMAIL/projects/virny-flow-experiments/scripts/configs/secrets.env"
pipeline_args:
dataset: "<DATASET>"
sensitive_attrs_for_intervention: ["SEX", "RAC1P"]
null_imputers: []
fairness_interventions: ["DIR", "AD"]
models: ["dt_clf", "lr_clf", "rf_clf", "xgb_clf", "lgbm_clf"]
optimisation_args:
ref_point: [0.50, 0.15, 0.15]
objectives:
- { name: "objective_1", metric: "F1", group: "overall", weight: <W1> }
- { name: "objective_2", metric: "Statistical_Parity_Difference", group: "SEX", weight: <W2> }
- { name: "objective_3", metric: "Statistical_Parity_Difference", group: "RAC1P", weight: <W3> }
max_total_pipelines_num: <MAX_TOTAL_PIPELINES_NUM>
num_workers: $NUM_WORKERS
num_pp_candidates: <NUM_PP_CANDIDATES>
training_set_fractions_for_halting: [0.5, 1.0]
exploration_factor: 0.5
risk_factor: 0.5
virny_args:
sensitive_attributes_dct: {'SEX': '2', 'RAC1P': ['2', '3', '4', '5', '6', '7', '8', '9'], 'SEX&RAC1P': None}
EOL
# ====================================================================================
# Start VirnyFlow cluster
# ====================================================================================
# Start Kafka
bash /home/$EMAIL/projects/virny-flow-experiments/cluster/$CLUSTER_TYPE/run_singularity_kafka.sh $NUM_WORKERS $SESSION $EMAIL
# Start virny_flow_cluster
singularity exec \
--overlay /scratch/$EMAIL/virny_flow_project/vldb_sds_env.ext3:ro \
/scratch/work/public/singularity/ubuntu-20.04.1.sif \
/bin/bash -c "source /ext3/env.sh; bash /home/$EMAIL/projects/virny-flow-experiments/cluster/$CLUSTER_TYPE/run_virny_flow_cluster.sh $NUM_WORKERS $NUM_CPUS_PER_WORKER $SESSION $EMAIL"