Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 76 additions & 1 deletion compiler/z/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15559,11 +15559,86 @@ TR::Register *OMR::Z::TreeEvaluator::vcmpgeEvaluator(TR::Node *node, TR::CodeGen
}
}

TR::Register *OMR::Z::TreeEvaluator::vreductionAddEvaluator(TR::Node *node, TR::CodeGenerator *cg)
TR::Register *vIntReductionAddHelper(TR::Node *node, TR::CodeGenerator *cg, TR::Register *sourceReg, TR::DataType type)
{
bool needPreReduction = false;
uint8_t elementSizeMask = 0;
switch (type) {
case TR::Int8:
needPreReduction = true;
break;
case TR::Int16:
needPreReduction = true;
elementSizeMask = 1;
break;
case TR::Int32:
elementSizeMask = 2;
break;
case TR::Int64:
elementSizeMask = 3;
break;
default:
TR_ASSERT_FATAL_WITH_NODE(node, false, "Encountered unsupported data type: %s", type.toString());
}

TR::Register *scratchReg = cg->allocateRegister(TR_VRF);
// Zeroing the scratch register.
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, scratchReg, 0, 0);
if (needPreReduction) {
// We can not sum all lanes in one operation when the lane size is byte or halfword.
// Calculating the sum of byte or halfword into an intermediate word so we can add all word in the next step.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ehsankianifar - How does this code handles the overflow for byte and half word ? VSUM would zero extend the intermediate sum and place it in the word, but VSUMQ would not do that right ?

Does reduction opcode doubles the element type ? If not, I believe VSUMQ possibly can produce result that is larger than element size.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I confirmed with @gita-omr that we do not need to handle overflow in this opcode.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok - I would like that to be in comment here - or at least in documentation.

TR::Register *tmpSourceReg = TR::TreeEvaluator::tryToReuseInputVectorRegs(node, cg);
generateVRRcInstruction(cg, TR::InstOpCode::VSUM, node, tmpSourceReg, sourceReg, scratchReg, 0, 0,
elementSizeMask);
sourceReg = tmpSourceReg;
}

// Reduce word or doubleword size to one element.
generateVRRcInstruction(cg, TR::InstOpCode::VSUMQ, node, scratchReg, sourceReg, scratchReg, 0, 0,
needPreReduction ? 2 : elementSizeMask);

TR::Register *resultReg = cg->allocateRegister();
generateVRScInstruction(cg, TR::InstOpCode::VLGV, node, resultReg, scratchReg,
generateS390MemoryReference((16 >> elementSizeMask) - 1, cg), elementSizeMask);

if (needPreReduction)
cg->stopUsingRegister(sourceReg);
cg->stopUsingRegister(scratchReg);

return resultReg;
}

TR::Register *vFloatReductionAddHelper(TR::Node *node, TR::CodeGenerator *cg, TR::Register *source, TR::DataType type)
{
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
}

TR::Register *OMR::Z::TreeEvaluator::vreductionAddEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
TR::Node *firstChild = node->getFirstChild();

TR_ASSERT_FATAL_WITH_NODE(node, firstChild->getDataType().getVectorLength() == TR::VectorLength128,
"Only 128-bit vectors are supported %s", firstChild->getDataType().toString());

TR::Register *sourceReg = cg->evaluate(firstChild);

TR::DataType type = firstChild->getDataType().getVectorElementType();

TR::Register *resultReg = NULL;

if (type.isIntegral()) {
resultReg = vIntReductionAddHelper(node, cg, sourceReg, type);
} else if (type.isFloat()) {
resultReg = vFloatReductionAddHelper(node, cg, sourceReg, type);
} else {
TR_ASSERT_FATAL_WITH_NODE(node, false, "Encountered unsupported data type: %s", type.toString());
}

cg->decReferenceCount(firstChild);
node->setRegister(resultReg);
return resultReg;
}

TR::Register *OMR::Z::TreeEvaluator::vreductionAndEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
Expand Down