diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 39dd7a3..91d3d96 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,7 @@ env: CARGO_TERM_COLOR: always CARGO_INCREMENTAL: 0 MATMUL_NUM_THREADS: 4 + RUST_BACKTRACE: full jobs: tests: @@ -119,6 +120,9 @@ jobs: strategy: matrix: include: + - rust: stable + target: s390x-unknown-linux-gnu + features: constconf cgemm threading - rust: stable target: aarch64-unknown-linux-gnu features: constconf cgemm threading @@ -146,7 +150,9 @@ jobs: run: cross test --target "${{ matrix.target }}" --features "${{ matrix.features }}" env: MMTEST_FAST_TEST: 1 - RUSTFLAGS: -Copt-level=2 + - name: Tests (Release) + run: cross test --release --target "${{ matrix.target }}" --features "${{ matrix.features }}" + cargo-careful: runs-on: ubuntu-latest diff --git a/src/gemm.rs b/src/gemm.rs index 63dbdcd..f7605d5 100644 --- a/src/gemm.rs +++ b/src/gemm.rs @@ -344,7 +344,8 @@ const MASK_BUF_SIZE: usize = KERNEL_MAX_SIZE + KERNEL_MAX_ALIGN - 1; // bugs we have seen on certain platforms (macos) that look like // we don't get aligned allocations out of TLS - 16- and 8-byte // allocations have been seen, make the minimal align request we can. -#[cfg_attr(not(target_os = "macos"), repr(align(32)))] +// Align(32) would not work with TLS for s390x. +#[cfg_attr(not(target_os = "macos"), repr(align(16)))] struct MaskBuffer { buffer: [u8; MASK_BUF_SIZE], }