Skip to content

Array Repeat operator should not use a loop for small sizes #97804

Open
@programmerjake

Description

@programmerjake

This likely caused poor assembly for Simd::splat in some circumstances:
https://rust-lang.zulipchat.com/#narrow/stream/257879-project-portable-simd/topic/Very.20bad.20.60u16x8.3A.3Asplat.60.20codegen.20on.20x86_64

I opened a PR for a workaround: rust-lang/portable-simd#284

Reduced reproducer:
https://godbolt.org/z/rhvEjz5MW

#![feature(portable_simd)]
use core::{mem, simd::u8x16};

pub fn f(v: u16, p: &mut [u8; 16]) {
    unsafe {
        let v: u8x16 = mem::transmute([v; 8]);
        *p = mem::transmute(v);
    }
}
.LCPI0_0:
        .short  65535
        .short  65535
        .short  65535
        .short  0
        .short  65535
        .short  65535
        .short  65535
        .short  65535
.LCPI0_1:
        .short  65535
        .short  65535
        .short  65535
        .short  65535
        .short  65535
        .short  0
        .short  65535
        .short  65535
.LCPI0_2:
        .short  65535
        .short  65535
        .short  65535
        .short  65535
        .short  65535
        .short  65535
        .short  65535
        .short  0
example::f:
        movd    xmm0, edi
        pshuflw xmm1, xmm0, 0
        pshufd  xmm2, xmm0, 0
        punpckldq       xmm1, xmm2
        movdqa  xmm2, xmm0
        psllq   xmm2, 48
        movdqa  xmm3, xmmword ptr [rip + .LCPI0_0]
        pand    xmm1, xmm3
        pandn   xmm3, xmm2
        por     xmm3, xmm1
        movdqa  xmm1, xmm3
        punpcklqdq      xmm1, xmm0
        movdqa  xmm2, xmm0
        pslldq  xmm2, 10
        movdqa  xmm4, xmmword ptr [rip + .LCPI0_1]
        pand    xmm1, xmm4
        pandn   xmm4, xmm2
        por     xmm4, xmm1
        movsd   xmm4, xmm0
        shufps  xmm3, xmm4, 36
        andps   xmm3, xmmword ptr [rip + .LCPI0_2]
        pslldq  xmm0, 14
        por     xmm0, xmm3
        movdqu  xmmword ptr [rsi], xmm0
        ret

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.A-arrayArea: `[T; N]`C-bugCategory: This is a bug.E-needs-testCall for participation: An issue has been fixed and does not reproduce, but no test has been added.I-slowIssue: Problems and improvements with respect to performance of generated code.T-compilerRelevant to the compiler team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions