Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions config/sarc-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,66 @@ sarc:
h100: H100-SXM5-80GB
tg[10601-10606,10701-10707,10801-10807,10901-10908,11101-11107,11201-11207]:
h100: H100-SXM5-80GB
fir:
host: fir.alliancecan.ca
timezone: America/Vancouver
accounts:
- rrg-bengioy-ad_gpu
- rrg-bengioy-ad_cpu
- def-bengioy_gpu
- def-bengioy_cpu
sacct_bin: "/opt/software/slurm/bin/sacct"
duc_inodes_command:
duc_storage_command:
diskusage_report_command: diskusage_report --project --all_users
prometheus_url:
prometheus_headers_file:
start_date: '2025-08-11'
gpus_per_nodes:
__DEFAULTS__:
h100: H100-SXM5-80GB
nvidia_h100_80gb_hbm3_1g.10gb: __MIG_FLAG__h100
nvidia_h100_80gb_hbm3_2g.20gb: __MIG_FLAG__h100
nvidia_h100_80gb_hbm3_3g.40gb: __MIG_FLAG__h100
nibi:
host: nibi.alliancecan.ca
timezone: America/Toronto
accounts:
- rrg-bengioy-ad_gpu
- rrg-bengioy-ad_cpu
- def-bengioy_gpu
- def-bengioy_cpu
sacct_bin: "/opt/software/slurm/bin/sacct"
duc_inodes_command:
duc_storage_command:
diskusage_report_command: diskusage_report --project --all_users
prometheus_url:
prometheus_headers_file:
start_date: '2025-07-31'
gpus_per_nodes:
__DEFAULTS__:
h100: H100-SXM5-80GB
nvidia_h100_80gb_hbm3_1g.10gb: __MIG_FLAG__h100
nvidia_h100_80gb_hbm3_2g.20gb: __MIG_FLAG__h100
nvidia_h100_80gb_hbm3_3g.40gb: __MIG_FLAG__h100
rorqual:
host: rorqual.alliancecan.ca
timezone: America/Montreal
accounts:
- rrg-bengioy-ad_gpu
- rrg-bengioy-ad_cpu
- def-bengioy_gpu
- def-bengioy_cpu
sacct_bin: "/opt/software/slurm/bin/sacct"
duc_inodes_command:
duc_storage_command:
diskusage_report_command: diskusage_report --project --all_users
prometheus_url:
prometheus_headers_file:
start_date: '2025-06-19'
gpus_per_nodes:
__DEFAULTS__:
h100: H100-SXM5-80GB
nvidia_h100_80gb_hbm3_1g.10gb: __MIG_FLAG__h100
nvidia_h100_80gb_hbm3_2g.20gb: __MIG_FLAG__h100
nvidia_h100_80gb_hbm3_3g.40gb: __MIG_FLAG__h100
65 changes: 63 additions & 2 deletions config/sarc-prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -184,5 +184,66 @@ sarc:
h100: H100-SXM5-80GB
tg[10601-10606,10701-10707,10801-10807,10901-10908,11101-11107,11201-11207]:
h100: H100-SXM5-80GB


fir:
host: fir.alliancecan.ca
timezone: America/Vancouver
accounts:
- rrg-bengioy-ad_gpu
- rrg-bengioy-ad_cpu
- def-bengioy_gpu
- def-bengioy_cpu
sacct_bin: "/opt/software/slurm/bin/sacct"
duc_inodes_command:
duc_storage_command:
diskusage_report_command: diskusage_report --project --all_users
prometheus_url:
prometheus_headers_file:
start_date: '2025-08-11'
gpus_per_nodes:
__DEFAULTS__:
h100: H100-SXM5-80GB
nvidia_h100_80gb_hbm3_1g.10gb: __MIG_FLAG__h100
nvidia_h100_80gb_hbm3_2g.20gb: __MIG_FLAG__h100
nvidia_h100_80gb_hbm3_3g.40gb: __MIG_FLAG__h100
nibi:
host: nibi.alliancecan.ca
timezone: America/Toronto
accounts:
- rrg-bengioy-ad_gpu
- rrg-bengioy-ad_cpu
- def-bengioy_gpu
- def-bengioy_cpu
sacct_bin: "/opt/software/slurm/bin/sacct"
duc_inodes_command:
duc_storage_command:
diskusage_report_command: diskusage_report --project --all_users
prometheus_url:
prometheus_headers_file:
start_date: '2025-07-31'
gpus_per_nodes:
__DEFAULTS__:
h100: H100-SXM5-80GB
nvidia_h100_80gb_hbm3_1g.10gb: __MIG_FLAG__h100
nvidia_h100_80gb_hbm3_2g.20gb: __MIG_FLAG__h100
nvidia_h100_80gb_hbm3_3g.40gb: __MIG_FLAG__h100
rorqual:
host: rorqual.alliancecan.ca
timezone: America/Montreal
accounts:
- rrg-bengioy-ad_gpu
- rrg-bengioy-ad_cpu
- def-bengioy_gpu
- def-bengioy_cpu
sacct_bin: "/opt/software/slurm/bin/sacct"
duc_inodes_command:
duc_storage_command:
diskusage_report_command: diskusage_report --project --all_users
prometheus_url:
prometheus_headers_file:
start_date: '2025-06-19'
gpus_per_nodes:
__DEFAULTS__:
h100: H100-SXM5-80GB
nvidia_h100_80gb_hbm3_1g.10gb: __MIG_FLAG__h100
nvidia_h100_80gb_hbm3_2g.20gb: __MIG_FLAG__h100
nvidia_h100_80gb_hbm3_3g.40gb: __MIG_FLAG__h100
2 changes: 1 addition & 1 deletion scripts/systemd/scrapers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ SCRIPT=$(readlink -f "$0")
SCRIPTPATH=$(dirname "$SCRIPT")
cd $SCRIPTPATH/../../
# scraping jobs
sudo -u sarc SARC_MODE=scraping SARC_CONFIG=$SCRIPTPATH/../../config/sarc-prod.yaml ../.local/bin/uv run sarc acquire jobs -c narval mila -d auto
sudo -u sarc SARC_MODE=scraping SARC_CONFIG=$SCRIPTPATH/../../config/sarc-prod.yaml ../.local/bin/uv run sarc acquire jobs -c narval fir rorqual nibi mila -d auto
# scraping users
sudo -u sarc SARC_MODE=scraping SARC_CONFIG=$SCRIPTPATH/../../config/sarc-prod.yaml ../.local/bin/uv run sarc acquire users