Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions compiler/p/codegen/OMRCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1790,6 +1790,8 @@ bool OMR::Power::CodeGenerator::getSupportsOpCodeForAutoSIMD(TR::CPU *cpu, TR::I
case TR::v2m:
case TR::vblend:
return true;
case TR::m2s:
return true;
case TR::m2v:
// only P9 has splat byte immediate, otherwise it's too expensive
return cpu->isAtLeast(OMR_PROCESSOR_PPC_P9);
Expand Down
2 changes: 1 addition & 1 deletion compiler/p/codegen/OMRInstOpCode.enum
Original file line number Diff line number Diff line change
Expand Up @@ -804,7 +804,7 @@
// vupklpx, // Vector Unpack Low Pixel
// vupkhsw, // Vector Unpack High Signed Word
// vpksdss, // Vector Pack Signed Dword Signed Saturate
// vpksdus, // Vector Pack Signed Dword Unsigned Saturate
vpksdus, // Vector Pack Signed Dword Unsigned Saturate
vpkuhum, // vector pack unsigned half word unsigned modulo
vpkuwum, // vector pack unsigned word unsigned modulo
// vpkuwus, // Vector Pack Unsigned Word Unsigned Saturate
Expand Down
25 changes: 12 additions & 13 deletions compiler/p/codegen/OMRInstOpCodeProperties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9079,25 +9079,24 @@
/* PPCOpProp_SyncSideEffectFree, */
/* }, */

/* { */
/* .mnemonic = OMR::InstOpCode::vpksdus, */
/* .name = "vpksdus", */
/* .description = "Vector Pack Signed Dword Unsigned Saturate", */
/* .prefix = 0x00000000, */
/* .opcode = 0x1000054E, */
/* .format = FORMAT_UNKNOWN, */
/* .minimumALS = OMR_PROCESSOR_PPC_P8, */
/* .properties = PPCOpProp_IsVMX | */
/* PPCOpProp_SyncSideEffectFree, */
/* }, */
{
/* .mnemonic = */ OMR::InstOpCode::vpksdus,
/* .name = */ "vpksdus",
/* .description = "Vector Pack Signed Dword Unsigned Saturate", */
/* .prefix = */ 0x00000000,
/* .opcode = */ 0x1000054E,
/* .format = */ FORMAT_VRT_VRA_VRB,
/* .minimumALS = */ OMR_PROCESSOR_PPC_P8,
/* .properties = */ PPCOpProp_IsVMX | PPCOpProp_SyncSideEffectFree,
},

{
/* .mnemonic = */ OMR::InstOpCode::vpkuhum,
/* .name = */ "vpkuhum",
/* .description = "vector pack unsigned half word unsigned modulo", */
/* .prefix = */ 0x00000000,
/* .opcode = */ 0x1000000E,
/* .format = */ FORMAT_UNKNOWN,
/* .format = */ FORMAT_VRT_VRA_VRB,
/* .minimumALS = */ OMR_PROCESSOR_PPC_P6,
/* .properties = */ PPCOpProp_IsVMX | PPCOpProp_SyncSideEffectFree,
},
Expand All @@ -9108,7 +9107,7 @@
/* .description = "vector pack unsigned word unsigned modulo", */
/* .prefix = */ 0x00000000,
/* .opcode = */ 0x1000004E,
/* .format = */ FORMAT_UNKNOWN,
/* .format = */ FORMAT_VRT_VRA_VRB,
/* .minimumALS = */ OMR_PROCESSOR_PPC_P6,
/* .properties = */ PPCOpProp_IsVMX | PPCOpProp_SyncSideEffectFree,
},
Expand Down
30 changes: 29 additions & 1 deletion compiler/p/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,35 @@ TR::Register *OMR::Power::TreeEvaluator::m2bEvaluator(TR::Node *node, TR::CodeGe

TR::Register *OMR::Power::TreeEvaluator::m2sEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
TR::Node *child = node->getFirstChild();

TR::Register *srcReg = cg->evaluate(child);
TR::Register *dstReg = cg->allocateRegister(TR_GPR);

TR::Register *tmpReg = cg->allocateRegister(TR_VRF);

node->setRegister(dstReg);

// set all but least significant bit of each doubleword element to 0
generateTrg1ImmInstruction(cg, TR::InstOpCode::vspltisw, node, tmpReg, -1);
generateTrg1Src2Instruction(cg, TR::InstOpCode::vsrw, node, tmpReg, srcReg, tmpReg);

// reverse element order if little endian
if (cg->comp()->target().cpu.isLittleEndian())
generateTrg1Src2ImmInstruction(cg, TR::InstOpCode::xxpermdi, node, tmpReg, tmpReg, tmpReg, 2);

// pack doubleword elements into byte-length elements
generateTrg1Src2Instruction(cg, TR::InstOpCode::vpksdus, node, tmpReg, tmpReg, tmpReg); // doubleword -> word
generateTrg1Src2Instruction(cg, TR::InstOpCode::vpkuwum, node, tmpReg, tmpReg, tmpReg); // word -> halfword
generateTrg1Src2Instruction(cg, TR::InstOpCode::vpkuhum, node, tmpReg, tmpReg, tmpReg); // halfword -> byte

// move to GPR
generateTrg1Src1Instruction(cg, TR::InstOpCode::mfvsrwz, node, dstReg, tmpReg);

cg->stopUsingRegister(tmpReg);
cg->decReferenceCount(child);

return dstReg;
}

TR::Register *OMR::Power::TreeEvaluator::m2iEvaluator(TR::Node *node, TR::CodeGenerator *cg)
Expand Down