diff --git a/compiler/p/codegen/OMRCodeGenerator.cpp b/compiler/p/codegen/OMRCodeGenerator.cpp index 21407f4ed0c..dddca19c404 100644 --- a/compiler/p/codegen/OMRCodeGenerator.cpp +++ b/compiler/p/codegen/OMRCodeGenerator.cpp @@ -1790,6 +1790,8 @@ bool OMR::Power::CodeGenerator::getSupportsOpCodeForAutoSIMD(TR::CPU *cpu, TR::I case TR::v2m: case TR::vblend: return true; + case TR::m2s: + return true; case TR::m2v: // only P9 has splat byte immediate, otherwise it's too expensive return cpu->isAtLeast(OMR_PROCESSOR_PPC_P9); diff --git a/compiler/p/codegen/OMRInstOpCode.enum b/compiler/p/codegen/OMRInstOpCode.enum index 005029bbf33..c047ce49a7b 100644 --- a/compiler/p/codegen/OMRInstOpCode.enum +++ b/compiler/p/codegen/OMRInstOpCode.enum @@ -804,7 +804,7 @@ // vupklpx, // Vector Unpack Low Pixel // vupkhsw, // Vector Unpack High Signed Word // vpksdss, // Vector Pack Signed Dword Signed Saturate -// vpksdus, // Vector Pack Signed Dword Unsigned Saturate + vpksdus, // Vector Pack Signed Dword Unsigned Saturate vpkuhum, // vector pack unsigned half word unsigned modulo vpkuwum, // vector pack unsigned word unsigned modulo // vpkuwus, // Vector Pack Unsigned Word Unsigned Saturate diff --git a/compiler/p/codegen/OMRInstOpCodeProperties.hpp b/compiler/p/codegen/OMRInstOpCodeProperties.hpp index e23c363e900..9817985f382 100644 --- a/compiler/p/codegen/OMRInstOpCodeProperties.hpp +++ b/compiler/p/codegen/OMRInstOpCodeProperties.hpp @@ -9079,17 +9079,16 @@ /* PPCOpProp_SyncSideEffectFree, */ /* }, */ - /* { */ - /* .mnemonic = OMR::InstOpCode::vpksdus, */ - /* .name = "vpksdus", */ - /* .description = "Vector Pack Signed Dword Unsigned Saturate", */ - /* .prefix = 0x00000000, */ - /* .opcode = 0x1000054E, */ - /* .format = FORMAT_UNKNOWN, */ - /* .minimumALS = OMR_PROCESSOR_PPC_P8, */ - /* .properties = PPCOpProp_IsVMX | */ - /* PPCOpProp_SyncSideEffectFree, */ - /* }, */ + { + /* .mnemonic = */ OMR::InstOpCode::vpksdus, + /* .name = */ "vpksdus", + /* .description = "Vector Pack Signed Dword Unsigned Saturate", */ + /* .prefix = */ 0x00000000, + /* .opcode = */ 0x1000054E, + /* .format = */ FORMAT_VRT_VRA_VRB, + /* .minimumALS = */ OMR_PROCESSOR_PPC_P8, + /* .properties = */ PPCOpProp_IsVMX | PPCOpProp_SyncSideEffectFree, + }, { /* .mnemonic = */ OMR::InstOpCode::vpkuhum, @@ -9097,7 +9096,7 @@ /* .description = "vector pack unsigned half word unsigned modulo", */ /* .prefix = */ 0x00000000, /* .opcode = */ 0x1000000E, - /* .format = */ FORMAT_UNKNOWN, + /* .format = */ FORMAT_VRT_VRA_VRB, /* .minimumALS = */ OMR_PROCESSOR_PPC_P6, /* .properties = */ PPCOpProp_IsVMX | PPCOpProp_SyncSideEffectFree, }, @@ -9108,7 +9107,7 @@ /* .description = "vector pack unsigned word unsigned modulo", */ /* .prefix = */ 0x00000000, /* .opcode = */ 0x1000004E, - /* .format = */ FORMAT_UNKNOWN, + /* .format = */ FORMAT_VRT_VRA_VRB, /* .minimumALS = */ OMR_PROCESSOR_PPC_P6, /* .properties = */ PPCOpProp_IsVMX | PPCOpProp_SyncSideEffectFree, }, diff --git a/compiler/p/codegen/OMRTreeEvaluator.cpp b/compiler/p/codegen/OMRTreeEvaluator.cpp index 0eefb54eb16..53ac9b826ce 100644 --- a/compiler/p/codegen/OMRTreeEvaluator.cpp +++ b/compiler/p/codegen/OMRTreeEvaluator.cpp @@ -1069,7 +1069,35 @@ TR::Register *OMR::Power::TreeEvaluator::m2bEvaluator(TR::Node *node, TR::CodeGe TR::Register *OMR::Power::TreeEvaluator::m2sEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - return TR::TreeEvaluator::unImpOpEvaluator(node, cg); + TR::Node *child = node->getFirstChild(); + + TR::Register *srcReg = cg->evaluate(child); + TR::Register *dstReg = cg->allocateRegister(TR_GPR); + + TR::Register *tmpReg = cg->allocateRegister(TR_VRF); + + node->setRegister(dstReg); + + // set all but least significant bit of each doubleword element to 0 + generateTrg1ImmInstruction(cg, TR::InstOpCode::vspltisw, node, tmpReg, -1); + generateTrg1Src2Instruction(cg, TR::InstOpCode::vsrw, node, tmpReg, srcReg, tmpReg); + + // reverse element order if little endian + if (cg->comp()->target().cpu.isLittleEndian()) + generateTrg1Src2ImmInstruction(cg, TR::InstOpCode::xxpermdi, node, tmpReg, tmpReg, tmpReg, 2); + + // pack doubleword elements into byte-length elements + generateTrg1Src2Instruction(cg, TR::InstOpCode::vpksdus, node, tmpReg, tmpReg, tmpReg); // doubleword -> word + generateTrg1Src2Instruction(cg, TR::InstOpCode::vpkuwum, node, tmpReg, tmpReg, tmpReg); // word -> halfword + generateTrg1Src2Instruction(cg, TR::InstOpCode::vpkuhum, node, tmpReg, tmpReg, tmpReg); // halfword -> byte + + // move to GPR + generateTrg1Src1Instruction(cg, TR::InstOpCode::mfvsrwz, node, dstReg, tmpReg); + + cg->stopUsingRegister(tmpReg); + cg->decReferenceCount(child); + + return dstReg; } TR::Register *OMR::Power::TreeEvaluator::m2iEvaluator(TR::Node *node, TR::CodeGenerator *cg)