Skip to content

Commit d736a53

Browse files
authored
Merge branch 'develop' into distconv-channelwise-softmax
2 parents 8ded742 + dd8b60d commit d736a53

File tree

1,698 files changed

+99376
-54648
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,698 files changed

+99376
-54648
lines changed

.clang-format

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,4 +162,7 @@ StatementMacros:
162162
TabWidth: 8
163163
UseCRLF: false
164164
UseTab: Never
165+
---
166+
Language: Proto
167+
BasedOnStyle: Google
165168
...

.github/workflows/build-cpu.yml

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
name: LBANN CPU
2+
3+
on:
4+
push:
5+
branches: develop
6+
pull_request:
7+
branches: develop
8+
merge_group:
9+
branches: develop
10+
11+
jobs:
12+
test:
13+
runs-on: ubuntu-latest
14+
strategy:
15+
matrix:
16+
compiler: [gcc] # , clang
17+
18+
steps:
19+
- uses: actions/checkout@v4
20+
21+
- name: Set up Python 3.10
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: '3.10'
25+
cache: 'pip'
26+
27+
- name: Install dependencies
28+
run: |
29+
sudo apt-get update
30+
sudo apt-get install -y libyaml-dev cmake lmod ninja-build
31+
sudo apt-get install -y libblas-dev libopenblas-dev liblapacke-dev
32+
sudo apt-get install -y openmpi-bin openmpi-common libopenmpi-dev
33+
python -m pip install --upgrade pip
34+
35+
- name: Restore cached Spack-built dependencies
36+
id: cache-spack
37+
uses: actions/cache/restore@v3
38+
with:
39+
path: |
40+
~/.spack
41+
spack
42+
key: ${{ runner.os }}-${{ matrix.compiler }}-spackdeps
43+
44+
- name: Build and install LBANN dependencies
45+
if: steps.cache-spack.outputs.cache-hit != 'true'
46+
run: |
47+
source /usr/share/lmod/lmod/init/bash
48+
git clone -c feature.manyFiles=true https://github.com/spack/spack.git
49+
cd spack
50+
git checkout 73858df14dc3f0e701814c84bb8bd6b72f80a806 # Use a tried and true version of Spack
51+
cd ..
52+
source spack/share/spack/setup-env.sh
53+
scripts/build_lbann.sh -d --dependencies-only -l ci -- +numpy +unit_tests %${{ matrix.compiler }}
54+
55+
- name: Cache Spack-built dependencies
56+
id: cache-spack-save
57+
uses: actions/cache/save@v3
58+
with:
59+
path: |
60+
~/.spack
61+
spack
62+
key: ${{ runner.os }}-${{ matrix.compiler }}-spackdeps
63+
64+
- name: Build LBANN
65+
run: |
66+
source /usr/share/lmod/lmod/init/bash
67+
source spack/share/spack/setup-env.sh
68+
scripts/build_lbann.sh -r -l ci --ci -- +numpy +unit_tests %${{ matrix.compiler }}
69+
70+
- name: Test Catch2
71+
run: |
72+
cd builds/*/build
73+
./unit_test/helpers_tests
74+
./unit_test/seq-catch-tests

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,12 @@ data.prototext*
1212

1313
# Can also ignore all directories and files in a directory.
1414
# tmp/**/*
15-
build
15+
builds
1616
spack_environments/users/
1717

1818

1919
# we don't want to collect slurm output
2020
**/slurm-*.out
21+
22+
# Ignore default lbann output experiment directory names
23+
????????_??????_lbann*/

.gitlab-ci.yml

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,32 @@
2828
# clusters. To run testing locally, consult the README in the ci_test
2929
# directory.
3030

31+
variables:
32+
FF_USE_NEW_BASH_EVAL_STRATEGY: 'true'
33+
FF_ENABLE_BASH_EXIT_CODE_CHECK: 1
34+
LBANN_CI_CLEAN_BUILD: 'true'
35+
3136
stages:
3237
- run-all-clusters
3338

34-
catalyst testing:
39+
corona testing:
3540
stage: run-all-clusters
3641
variables:
3742
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
43+
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
3844
trigger:
3945
strategy: depend
40-
include: .gitlab/catalyst/pipeline.yml
46+
include: .gitlab/corona/pipeline.yml
4147

42-
corona testing:
48+
corona distconv testing:
4349
stage: run-all-clusters
4450
variables:
51+
JOB_NAME_SUFFIX: _distconv
52+
SPACK_ENV_BASE_NAME_MODIFIER: "-distconv"
53+
SPACK_SPECS: "+rocm +distconv"
4554
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
55+
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
56+
TEST_FLAG: "test_*_distconv.py"
4657
trigger:
4758
strategy: depend
4859
include: .gitlab/corona/pipeline.yml
@@ -51,31 +62,76 @@ lassen testing:
5162
stage: run-all-clusters
5263
variables:
5364
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
65+
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
5466
trigger:
5567
strategy: depend
5668
include: .gitlab/lassen/pipeline.yml
5769

70+
lassen distconv testing:
71+
stage: run-all-clusters
72+
variables:
73+
JOB_NAME_SUFFIX: _distconv
74+
SPACK_ENV_BASE_NAME_MODIFIER: "-multi-stage-distconv"
75+
SPACK_SPECS: "+cuda +distconv +fft"
76+
# SPACK_SPECS: "+cuda +distconv +nvshmem +fft"
77+
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
78+
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
79+
TEST_FLAG: "test_*_distconv.py"
80+
trigger:
81+
strategy: depend
82+
include: .gitlab/lassen/multi_stage_pipeline.yml
83+
5884
pascal testing:
5985
stage: run-all-clusters
6086
variables:
6187
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
88+
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
6289
trigger:
6390
strategy: depend
6491
include: .gitlab/pascal/pipeline.yml
6592

6693
pascal compiler testing:
6794
stage: run-all-clusters
6895
variables:
69-
SPACK_SPECS: "%gcc@8.3.1 +cuda +half +fft"
96+
SPACK_SPECS: "%gcc@10.3.1 +cuda +half +fft"
7097
BUILD_SCRIPT_OPTIONS: "--no-default-mirrors"
98+
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
7199
trigger:
72100
strategy: depend
73101
include: .gitlab/pascal/pipeline_compiler_tests.yml
74102

103+
pascal distconv testing:
104+
stage: run-all-clusters
105+
variables:
106+
JOB_NAME_SUFFIX: _distconv
107+
SPACK_SPECS: "%[email protected] +cuda +distconv +fft"
108+
BUILD_SCRIPT_OPTIONS: "--no-default-mirrors"
109+
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
110+
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
111+
TEST_FLAG: "test_*_distconv.py"
112+
trigger:
113+
strategy: depend
114+
include: .gitlab/pascal/pipeline.yml
115+
75116
tioga testing:
76117
stage: run-all-clusters
77118
variables:
119+
# FF_USE_NEW_BASH_EVAL_STRATEGY: 1
120+
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
121+
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
122+
trigger:
123+
strategy: depend
124+
include: .gitlab/tioga/pipeline.yml
125+
126+
tioga distconv testing:
127+
stage: run-all-clusters
128+
variables:
129+
JOB_NAME_SUFFIX: _distconv
130+
SPACK_ENV_BASE_NAME_MODIFIER: "-distconv"
131+
SPACK_SPECS: "+rocm +distconv"
78132
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
133+
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
134+
TEST_FLAG: "test_*_distconv.py"
79135
trigger:
80136
strategy: depend
81137
include: .gitlab/tioga/pipeline.yml

0 commit comments

Comments
 (0)