palace/.github/actions/palace-ci/action.yml at d239245d9bd899b4a845865dd74a01b40e6bb766 · awslabs/palace · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

name: 'Palace CI'
description: 'Run Palace CI with Spack'
inputs:
  toolchain:
    description: 'Toolchain to use'
    required: true
  variant:
    description: 'Palace variant'
    required: false
    default: ''
  math-libs:
    description: 'Math libraries'
    required: false
    default: ''
  run-regression-tests:
    description: 'Run regression tests after build'
    required: false
    default: 'true'
  use-develop-deps:
    description: 'Use develop versions of key dependencies'
    required: false
    default: 'false'
  test-cases:
    description: 'Test cases: "default", "long", or space-separated list'
    required: false
    default: 'default'
  additional-test-cases:
    description: 'Additional test cases to run after default tests (space-separated list)'
    required: false
    default: ''

runs:
  using: 'composite'
  steps:
    - name: Setup environment
      uses: ./.github/actions/setup-runner
      with:
        julia-version: 'release'
        # Spack develop because we want to have access to the most recent
        # version of spack-packages
        spack-version: 'develop'
        # LLVM 19 because it is the latest supported by CUDA 12.9
        llvm-version: ${{ inputs.toolchain == 'llvm' && '19' || 'dont-install' }}
        setup-intel: ${{ inputs.toolchain == 'intel-oneapi' && 'true' || 'false' }}

    - name: Prepare spack.yaml
      shell: bash
      run: |
        source /tmp/timing_functions.sh
        start_timer
        if [[ "${{ inputs.variant }}" == *"+cuda"* ]]; then
          CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -1 | tr -d '.')
          CUDA_ARGS="cuda_arch=${CUDA_ARCH}"
        else
          CUDA_ARGS=""
        fi

        # As of version 1.2.0-alpha, the spack concretizer hangs when using flang.
        # Also https://github.com/libxsmm/libxsmm/issues/996
        # TODO: Try again in the future to see if we can use it.
        if [[ "${{ inputs.toolchain }}" == "llvm" ]]; then
          FORTRAN_COMPILER="gcc"
        else
          FORTRAN_COMPILER="${{ inputs.toolchain }}"
        fi

        if [[ "${{ inputs.toolchain }}" == "intel-oneapi" ]]; then
          MATH_LIBS="${{ inputs.math-libs || 'intel-oneapi-mkl' }}"
          MPI_IMPL="intel-oneapi-mpi"
          C_CXX_COMPILER="intel-oneapi-compilers"
          FORTRAN_COMPILER="intel-oneapi-compilers"
        else
          # OpenBlas 0.3.30 has issues on arm
          # https://github.com/OpenMathLib/OpenBLAS/issues/5459
          MATH_LIBS="${{ inputs.math-libs || 'openblas@0.3.29' }}"
          MPI_IMPL="openmpi"
          C_CXX_COMPILER="${{ inputs.toolchain }}"
        fi
        PALACE_SPEC="local.palace@develop+libxsmm+superlu-dist+mumps+sundials+strumpack+slepc+arpack${{ inputs.variant }} ${CUDA_ARGS}"

        cat << EOF > spack.yaml
        spack:
          specs:
            - ${PALACE_SPEC}
          view: false
          config:
            install_tree:
              root: ${HOME}/opt/spack
              # We add a lot of padding to support better cache relocation
              # https://spack.readthedocs.io/en/latest/binary_caches.html#relocation
              padded_length: 256
          concretizer:
            reuse: false
            unify: true
            # generic cpu target so that we can increase cache hits
            # (we also do not care too much about hyper-optimal performance)
            targets:
              granularity: generic
          packages:
            petsc:
              require: ~hdf5
            mpi:
              require: ${MPI_IMPL}
            blas:
              require: ${MATH_LIBS}
            lapack:
              require: ${MATH_LIBS}
            c:
              require: [${C_CXX_COMPILER}]
            cxx:
              require: [${C_CXX_COMPILER}]
            fortran:
              require: [${FORTRAN_COMPILER}]
            # Metis is verify fragile
            # -fPIC seems to be required because metis always compiles some shared objects
            # -lc seems to be required for intel compilers
            metis:
              require: cflags=-fPIC cppflags=-fPIC ldflags=-lc
            parmetis:
              require: ldflags=-lc
        EOF

        # The Intel compiler has problems with C/C++/Fortran interop in MUMPS.
        # Fixes: for_main.c:(.text+0x19): undefined reference to `MAIN__'
        if [[ "${{ inputs.toolchain }}" == "intel-oneapi" ]]; then
          cat << INTELEOF >> spack.yaml
            mumps:
              require: fflags=-nofor-main
        INTELEOF
        fi

        if [[ "${{ inputs.use-develop-deps }}" == "true" ]]; then
          cat << DEVEOF >> spack.yaml
            hypre:
              require: "@develop"
            mfem:
              require: "@develop"
        DEVEOF
        fi

        if [[ "${{ inputs.variant }}" == *"+cuda"* ]]; then
          cat << CUDAEOF >> spack.yaml
            cuda:
              buildable: false
              externals:
              - spec: cuda@12.9
                prefix: /usr/local/cuda-12.9/
            # When some packages are built with OpenMP in a non-OpenMP build,
            # FindSTRUMPACK in MFEM cannot compile the test program. Hopefully,
            # we can remove this when MFEM is not in the superbuild.
            all:
              require: ~openmp
        CUDAEOF
        fi

        cat << EOF >> spack.yaml
          repos:
          - spack_repo/local
          mirrors:
            spack:
              binary: true
              url: https://binaries.spack.io/develop
            local-buildcache:
              binary: true
              url: oci://ghcr.io/awslabs/palace-develop-testing
              signed: false
              access_pair:
                id_variable: GITHUB_USER
                secret_variable: GITHUB_TOKEN
        EOF

    - name: Configure Compilers
      shell: bash
      run: |
        source /tmp/timing_functions.sh
        start_timer
        if [[ "${{ inputs.toolchain }}" == "intel-oneapi" ]]; then
          . /opt/intel/oneapi/setvars.sh
        fi
        spack -e . compiler find && spack -e . compiler list
        end_timer "Configure Compilers"

    # Spack and oneAPI-2025.3.1 have some compatibility problems where the
    # version of the fortran compiler is not correctly identified. As a
    # workaround, we manually edit the relevant entry in the spack.yaml to put
    # all the compilers under the same version. # This amounts to merging two
    # entries (for two different versions) into a single one.
    #
    # TODO: Check if this issue is still relevant, if not, remove this step.
    - name: Merge Intel Compiler Entries
      if: inputs.toolchain == 'intel-oneapi'
      shell: bash
      run: |
        python3 << 'EOF'
        import yaml

        with open('spack.yaml', 'r') as f:
            data = yaml.safe_load(f)

        # Get Intel compiler externals
        intel_externals = data['spack']['packages']['intel-oneapi-compilers']['externals']

        if len(intel_externals) > 1:
            # Merge compilers from all entries
            merged_compilers = {}
            newest_spec = None
            newest_prefix = None

            for entry in intel_externals:
                # Keep track of the newest version
                if newest_spec is None or entry['spec'] > newest_spec:
                    newest_spec = entry['spec']
                    newest_prefix = entry['prefix']

                # Merge all compilers
                if 'extra_attributes' in entry and 'compilers' in entry['extra_attributes']:
                    compilers = entry['extra_attributes']['compilers']
                    merged_compilers.update(compilers)

            # Create single merged entry
            merged_entry = {
                'spec': newest_spec,
                'prefix': newest_prefix,
                'extra_attributes': {
                    'compilers': merged_compilers
                }
            }

            # Replace ALL entries with single merged entry
            data['spack']['packages']['intel-oneapi-compilers']['externals'] = [merged_entry]

            # Write back
            with open('spack.yaml', 'w') as f:
                yaml.dump(data, f, default_flow_style=False)

            print("Merged Intel compiler entries into single entry")
        EOF

    - name: Display spack.yaml
      shell: bash
      run: |
        echo "=== Generated spack.yaml ==="
        cat spack.yaml
        echo "=== End spack.yaml ==="

    - name: Configure Binary Mirror Keys
      shell: bash
      run: |
        source /tmp/timing_functions.sh
        start_timer
        spack -e . buildcache keys --install --trust
        end_timer "Configure Binary Mirror Keys"

    - name: Bootstrap
      shell: bash
      run: |
        source /tmp/timing_functions.sh
        start_timer
        spack -e . bootstrap now
        end_timer "Bootstrap"

    - name: Concretize
      shell: bash
      env:
        PALACE_REF: ${{ github.head_ref || github.ref_name }}
      run: |
        source /tmp/timing_functions.sh
        start_timer
        spack -e . develop --path=$(pwd) local.palace@git."${PALACE_REF}"=develop
        spack -e . concretize -f --test root
        end_timer "Concretize"

    - name: Build Dependencies
      shell: bash
      run: |
        source /tmp/timing_functions.sh
        start_timer
        spack -e . install --only-concrete --no-check-signature --fail-fast --show-log-on-error --only dependencies --test root -j $(nproc)
        end_timer "Build Dependencies"

    - name: Build Palace
      shell: bash
      run: |
        source /tmp/timing_functions.sh
        start_timer
        spack -e . install --only-concrete --keep-stage --show-log-on-error --only package --no-cache --test root -j $(nproc)
        end_timer "Build Palace"

    # We don't want to run unit tests as part of the long test run. They are
    # already run elsewhere.
    - name: Run Unit Tests
      if: inputs.test-cases != 'long'
      shell: bash
      run: |
        source /tmp/timing_functions.sh
        start_timer
        eval "$(spack -e . load --sh palace)"

        # Use nproc, but halve to account for hyperthreading.
        # NOTE: This CI action only runs on Linux runners (lscpu/nproc are Linux-specific).
        LOGICAL_CORES=$(nproc)
        THREADS_PER_CORE=$(lscpu | grep "Thread(s) per core" | awk '{print $4}')
        if [ "$THREADS_PER_CORE" = "2" ]; then
            export NUM_PROC_TEST=$((LOGICAL_CORES / 2))
        else
            export NUM_PROC_TEST=$LOGICAL_CORES
        fi

        if [[ "${{ inputs.variant }}" == *"+openmp"* ]]; then
          NUM_PROC_TEST=$(( NUM_PROC_TEST / 2 ))
          export OMP_NUM_THREADS=2
        else
          export OMP_NUM_THREADS=1
        fi

        cd "$(spack -e . location -b palace)"

        # Skip serial/mpi libCEED tests on CUDA builds due to umpire host memory
        # allocation bug.
        #
        # TODO: Remove this when MFEM/Palace switches to using standard host
        # allocation.
        if [[ "${{ inputs.variant }}" == *"+cuda"* ]]; then
          CTEST_EXCLUDE="--exclude-regex (serial|mpi)-.*libCEED"
        else
          CTEST_EXCLUDE=""
        fi

        # Disable OpenMPI 5.x default CPU binding so MPI processes don't starve
        # OpenMP threads during parallel test execution. OpenMPI 5.x uses PRRTE,
        # so we set both the PRTE and legacy OMPI MCA variables for robustness.
        export PRTE_MCA_hwloc_default_binding_policy=none
        export OMPI_MCA_hwloc_base_binding_policy=none

        # Runs all hardware-compatible tests, skipping Catch2 benchmarks.
        ctest -j $NUM_PROC_TEST --output-on-failure $CTEST_EXCLUDE

        end_timer "Run Unit Tests"

    # We upload the spack environment as an artifact so that next runners can
    # pick up the very same environment and have consistent binaries (pulled
    # from the cache).
    - name: Upload spack environment
      uses: actions/upload-artifact@v4
      if: inputs.run-regression-tests != 'true'
      with:
        name: spack-env-${{ runner.arch }}-${{ inputs.toolchain }}-${{ inputs.variant || 'default' }}-${{ inputs.math-libs || 'default' }}
        path: |
          spack.yaml
          spack.lock
        retention-days: 7

    - name: Push to GHCR cache
      shell: bash
      run: |
        source /tmp/timing_functions.sh
        start_timer
        spack -e . buildcache push --force --with-build-dependencies --unsigned --update-index local-buildcache || true
        end_timer "Push to GHCR cache"

    - name: Run Regression Tests
      if: inputs.run-regression-tests == 'true'
      uses: ./.github/actions/run-regression-tests
      with:
        testing-on-build-runner: 'true'
        variant: ${{ inputs.variant }}
        test-cases: ${{ inputs.test-cases }}
        additional-test-cases: ${{ inputs.additional-test-cases }}

    # We print timing if we don't run the regression tests, otherwise, it will
    # be regression test job that prints the file. (This is for the build-xxx
    # jobs)
    - name: Timing information
      if: inputs.run-regression-tests != 'true'
      shell: bash
      run: |
        cat /tmp/timing.log