Skip to content

Commit dbac620

Browse files
committed
8366357: C2 SuperWord: refactor VTransformNode::apply with VTransformApplyState
Reviewed-by: chagedorn, kvn, mhaessig
1 parent a6e2a32 commit dbac620

3 files changed

Lines changed: 105 additions & 100 deletions

File tree

src/hotspot/share/opto/superword.cpp

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2108,19 +2108,14 @@ void VTransformGraph::apply_memops_reordering_with_schedule() const {
21082108

21092109
void VTransformGraph::apply_vectorization_for_each_vtnode(uint& max_vector_length, uint& max_vector_width) const {
21102110
ResourceMark rm;
2111-
// We keep track of the resulting Nodes from every "VTransformNode::apply" call.
2112-
// Since "apply" is called on defs before uses, this allows us to find the
2113-
// generated def (input) nodes when we are generating the use nodes in "apply".
2114-
int length = _vtnodes.length();
2115-
GrowableArray<Node*> vtnode_idx_to_transformed_node(length, length, nullptr);
2111+
VTransformApplyState apply_state(_vloop_analyzer, _vtnodes.length());
21162112

21172113
for (int i = 0; i < _schedule.length(); i++) {
21182114
VTransformNode* vtn = _schedule.at(i);
2119-
VTransformApplyResult result = vtn->apply(_vloop_analyzer,
2120-
vtnode_idx_to_transformed_node);
2115+
VTransformApplyResult result = vtn->apply(apply_state);
21212116
NOT_PRODUCT( if (_trace._verbose) { result.trace(vtn); } )
21222117

2123-
vtnode_idx_to_transformed_node.at_put(vtn->_idx, result.node());
2118+
apply_state.set_transformed_node(vtn, result.node());
21242119
max_vector_length = MAX2(max_vector_length, result.vector_length());
21252120
max_vector_width = MAX2(max_vector_width, result.vector_width());
21262121
}
@@ -3074,7 +3069,7 @@ void VTransform::adjust_pre_loop_limit_to_align_main_loop_vectors() {
30743069
const bool is_sub = iv_scale * iv_stride > 0;
30753070

30763071
// 1.1: con
3077-
Node* xbic = igvn().intcon(is_sub ? -con : con);
3072+
Node* xbic = phase()->intcon(is_sub ? -con : con);
30783073
TRACE_ALIGN_VECTOR_NODE(xbic);
30793074

30803075
// 1.2: invar = SUM(invar_summands)
@@ -3091,7 +3086,7 @@ void VTransform::adjust_pre_loop_limit_to_align_main_loop_vectors() {
30913086
phase()->register_new_node(invar_variable, pre_ctrl);
30923087
TRACE_ALIGN_VECTOR_NODE(invar_variable);
30933088
}
3094-
Node* invar_scale_con = igvn().intcon(invar_scale);
3089+
Node* invar_scale_con = phase()->intcon(invar_scale);
30953090
TRACE_ALIGN_VECTOR_NODE(invar_scale_con);
30963091
Node* invar_summand = new MulINode(invar_variable, invar_scale_con);
30973092
phase()->register_new_node(invar_summand, pre_ctrl);
@@ -3143,7 +3138,7 @@ void VTransform::adjust_pre_loop_limit_to_align_main_loop_vectors() {
31433138
// 2: Compute (14):
31443139
// XBIC = xbic / abs(iv_scale)
31453140
// The division is executed as shift
3146-
Node* log2_abs_iv_scale = igvn().intcon(exact_log2(abs(iv_scale)));
3141+
Node* log2_abs_iv_scale = phase()->intcon(exact_log2(abs(iv_scale)));
31473142
Node* XBIC = new URShiftINode(xbic, log2_abs_iv_scale);
31483143
phase()->register_new_node(XBIC, pre_ctrl);
31493144
TRACE_ALIGN_VECTOR_NODE(log2_abs_iv_scale);
@@ -3168,7 +3163,7 @@ void VTransform::adjust_pre_loop_limit_to_align_main_loop_vectors() {
31683163
// = XBIC_OP_old_limit AND (AW - 1)
31693164
// Since AW is a power of 2, the modulo operation can be replaced with
31703165
// a bitmask operation.
3171-
Node* mask_AW = igvn().intcon(AW-1);
3166+
Node* mask_AW = phase()->intcon(AW-1);
31723167
Node* adjust_pre_iter = new AndINode(XBIC_OP_old_limit, mask_AW);
31733168
phase()->register_new_node(adjust_pre_iter, pre_ctrl);
31743169
TRACE_ALIGN_VECTOR_NODE(mask_AW);

src/hotspot/share/opto/vtransform.cpp

Lines changed: 59 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -203,13 +203,13 @@ void VTransform::add_speculative_alignment_check(Node* node, juint alignment) {
203203
TRACE_SPECULATIVE_ALIGNMENT_CHECK(node);
204204
}
205205

206-
Node* mask_alignment = igvn().intcon(alignment-1);
206+
Node* mask_alignment = phase()->intcon(alignment-1);
207207
Node* base_alignment = new AndINode(node, mask_alignment);
208208
phase()->register_new_node(base_alignment, ctrl);
209209
TRACE_SPECULATIVE_ALIGNMENT_CHECK(mask_alignment);
210210
TRACE_SPECULATIVE_ALIGNMENT_CHECK(base_alignment);
211211

212-
Node* zero = igvn().intcon(0);
212+
Node* zero = phase()->intcon(0);
213213
Node* cmp_alignment = CmpNode::make(base_alignment, zero, T_INT, false);
214214
BoolNode* bol_alignment = new BoolNode(cmp_alignment, BoolTest::eq);
215215
phase()->register_new_node(cmp_alignment, ctrl);
@@ -697,69 +697,68 @@ bool VTransformGraph::has_store_to_load_forwarding_failure(const VLoopAnalyzer&
697697
return false;
698698
}
699699

700-
Node* VTransformNode::find_transformed_input(int i, const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
701-
Node* n = vnode_idx_to_transformed_node.at(in_req(i)->_idx);
702-
assert(n != nullptr, "must find input IR node");
700+
void VTransformApplyState::set_transformed_node(VTransformNode* vtn, Node* n) {
701+
assert(_vtnode_idx_to_transformed_node.at(vtn->_idx) == nullptr, "only set once");
702+
_vtnode_idx_to_transformed_node.at_put(vtn->_idx, n);
703+
}
704+
705+
Node* VTransformApplyState::transformed_node(const VTransformNode* vtn) const {
706+
Node* n = _vtnode_idx_to_transformed_node.at(vtn->_idx);
707+
assert(n != nullptr, "must find IR node for vtnode");
703708
return n;
704709
}
705710

706-
VTransformApplyResult VTransformScalarNode::apply(const VLoopAnalyzer& vloop_analyzer,
707-
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
711+
VTransformApplyResult VTransformScalarNode::apply(VTransformApplyState& apply_state) const {
708712
// This was just wrapped. Now we simply unwap without touching the inputs.
709713
return VTransformApplyResult::make_scalar(_node);
710714
}
711715

712-
VTransformApplyResult VTransformReplicateNode::apply(const VLoopAnalyzer& vloop_analyzer,
713-
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
714-
Node* val = find_transformed_input(1, vnode_idx_to_transformed_node);
716+
VTransformApplyResult VTransformReplicateNode::apply(VTransformApplyState& apply_state) const {
717+
Node* val = apply_state.transformed_node(in_req(1));
715718
VectorNode* vn = VectorNode::scalar2vector(val, _vlen, _element_type);
716-
register_new_node_from_vectorization(vloop_analyzer, vn, val);
719+
register_new_node_from_vectorization(apply_state, vn, val);
717720
return VTransformApplyResult::make_vector(vn, _vlen, vn->length_in_bytes());
718721
}
719722

720-
VTransformApplyResult VTransformConvI2LNode::apply(const VLoopAnalyzer& vloop_analyzer,
721-
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
722-
Node* val = find_transformed_input(1, vnode_idx_to_transformed_node);
723+
VTransformApplyResult VTransformConvI2LNode::apply(VTransformApplyState& apply_state) const {
724+
Node* val = apply_state.transformed_node(in_req(1));
723725
Node* n = new ConvI2LNode(val);
724-
register_new_node_from_vectorization(vloop_analyzer, n, val);
726+
register_new_node_from_vectorization(apply_state, n, val);
725727
return VTransformApplyResult::make_scalar(n);
726728
}
727729

728-
VTransformApplyResult VTransformShiftCountNode::apply(const VLoopAnalyzer& vloop_analyzer,
729-
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
730-
PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
731-
Node* shift_count_in = find_transformed_input(1, vnode_idx_to_transformed_node);
730+
VTransformApplyResult VTransformShiftCountNode::apply(VTransformApplyState& apply_state) const {
731+
PhaseIdealLoop* phase = apply_state.phase();
732+
Node* shift_count_in = apply_state.transformed_node(in_req(1));
732733
assert(shift_count_in->bottom_type()->isa_int(), "int type only for shift count");
733734
// The shift_count_in would be automatically truncated to the lowest _mask
734735
// bits in a scalar shift operation. But vector shift does not truncate, so
735736
// we must apply the mask now.
736-
Node* shift_count_masked = new AndINode(shift_count_in, phase->igvn().intcon(_mask));
737-
register_new_node_from_vectorization(vloop_analyzer, shift_count_masked, shift_count_in);
737+
Node* shift_count_masked = new AndINode(shift_count_in, phase->intcon(_mask));
738+
register_new_node_from_vectorization(apply_state, shift_count_masked, shift_count_in);
738739
// Now that masked value is "boadcast" (some platforms only set the lowest element).
739740
VectorNode* vn = VectorNode::shift_count(_shift_opcode, shift_count_masked, _vlen, _element_bt);
740-
register_new_node_from_vectorization(vloop_analyzer, vn, shift_count_in);
741+
register_new_node_from_vectorization(apply_state, vn, shift_count_in);
741742
return VTransformApplyResult::make_vector(vn, _vlen, vn->length_in_bytes());
742743
}
743744

744745

745-
VTransformApplyResult VTransformPopulateIndexNode::apply(const VLoopAnalyzer& vloop_analyzer,
746-
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
747-
PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
748-
Node* val = find_transformed_input(1, vnode_idx_to_transformed_node);
746+
VTransformApplyResult VTransformPopulateIndexNode::apply(VTransformApplyState& apply_state) const {
747+
PhaseIdealLoop* phase = apply_state.phase();
748+
Node* val = apply_state.transformed_node(in_req(1));
749749
assert(val->is_Phi(), "expected to be iv");
750750
assert(VectorNode::is_populate_index_supported(_element_bt), "should support");
751751
const TypeVect* vt = TypeVect::make(_element_bt, _vlen);
752-
VectorNode* vn = new PopulateIndexNode(val, phase->igvn().intcon(1), vt);
753-
register_new_node_from_vectorization(vloop_analyzer, vn, val);
752+
VectorNode* vn = new PopulateIndexNode(val, phase->intcon(1), vt);
753+
register_new_node_from_vectorization(apply_state, vn, val);
754754
return VTransformApplyResult::make_vector(vn, _vlen, vn->length_in_bytes());
755755
}
756756

757-
VTransformApplyResult VTransformElementWiseVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
758-
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
757+
VTransformApplyResult VTransformElementWiseVectorNode::apply(VTransformApplyState& apply_state) const {
759758
Node* first = nodes().at(0);
760759
uint vlen = nodes().length();
761760
int opc = first->Opcode();
762-
BasicType bt = vloop_analyzer.types().velt_basic_type(first);
761+
BasicType bt = apply_state.vloop_analyzer().types().velt_basic_type(first);
763762

764763
if (first->is_Cmp()) {
765764
// Cmp + Bool -> VectorMaskCmp
@@ -769,9 +768,9 @@ VTransformApplyResult VTransformElementWiseVectorNode::apply(const VLoopAnalyzer
769768

770769
assert(2 <= req() && req() <= 4, "Must have 1-3 inputs");
771770
VectorNode* vn = nullptr;
772-
Node* in1 = find_transformed_input(1, vnode_idx_to_transformed_node);
773-
Node* in2 = (req() >= 3) ? find_transformed_input(2, vnode_idx_to_transformed_node) : nullptr;
774-
Node* in3 = (req() >= 4) ? find_transformed_input(3, vnode_idx_to_transformed_node) : nullptr;
771+
Node* in1 = apply_state.transformed_node(in_req(1));
772+
Node* in2 = (req() >= 3) ? apply_state.transformed_node(in_req(2)) : nullptr;
773+
Node* in3 = (req() >= 4) ? apply_state.transformed_node(in_req(3)) : nullptr;
775774

776775
if (first->is_CMove()) {
777776
assert(req() == 4, "three inputs expected: mask, blend1, blend2");
@@ -791,7 +790,7 @@ VTransformApplyResult VTransformElementWiseVectorNode::apply(const VLoopAnalyzer
791790
// The scalar operation was a long -> int operation.
792791
// However, the vector operation is long -> long.
793792
VectorNode* long_vn = VectorNode::make(opc, in1, nullptr, vlen, T_LONG);
794-
register_new_node_from_vectorization(vloop_analyzer, long_vn, first);
793+
register_new_node_from_vectorization(apply_state, long_vn, first);
795794
// Cast long -> int, to mimic the scalar long -> int operation.
796795
vn = VectorCastNode::make(Op_VectorCastL2X, long_vn, T_INT, vlen);
797796
} else if (req() == 3 ||
@@ -809,65 +808,62 @@ VTransformApplyResult VTransformElementWiseVectorNode::apply(const VLoopAnalyzer
809808
vn = VectorNode::make(opc, in1, in2, in3, vlen, bt); // ternary
810809
}
811810

812-
register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
811+
register_new_node_from_vectorization_and_replace_scalar_nodes(apply_state, vn);
813812
return VTransformApplyResult::make_vector(vn, vlen, vn->length_in_bytes());
814813
}
815814

816-
VTransformApplyResult VTransformBoolVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
817-
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
815+
VTransformApplyResult VTransformBoolVectorNode::apply(VTransformApplyState& apply_state) const {
818816
BoolNode* first = nodes().at(0)->as_Bool();
819817
uint vlen = nodes().length();
820-
BasicType bt = vloop_analyzer.types().velt_basic_type(first);
818+
BasicType bt = apply_state.vloop_analyzer().types().velt_basic_type(first);
821819

822820
// Cmp + Bool -> VectorMaskCmp
823821
VTransformElementWiseVectorNode* vtn_cmp = in_req(1)->isa_ElementWiseVector();
824822
assert(vtn_cmp != nullptr && vtn_cmp->nodes().at(0)->is_Cmp(),
825823
"bool vtn expects cmp vtn as input");
826824

827-
Node* cmp_in1 = vtn_cmp->find_transformed_input(1, vnode_idx_to_transformed_node);
828-
Node* cmp_in2 = vtn_cmp->find_transformed_input(2, vnode_idx_to_transformed_node);
825+
Node* cmp_in1 = apply_state.transformed_node(vtn_cmp->in_req(1));
826+
Node* cmp_in2 = apply_state.transformed_node(vtn_cmp->in_req(2));
829827
BoolTest::mask mask = test()._mask;
830828

831-
PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
832-
ConINode* mask_node = phase->igvn().intcon((int)mask);
829+
PhaseIdealLoop* phase = apply_state.phase();
830+
ConINode* mask_node = phase->intcon((int)mask);
833831
const TypeVect* vt = TypeVect::make(bt, vlen);
834832
VectorNode* vn = new VectorMaskCmpNode(mask, cmp_in1, cmp_in2, mask_node, vt);
835-
register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
833+
register_new_node_from_vectorization_and_replace_scalar_nodes(apply_state, vn);
836834
return VTransformApplyResult::make_vector(vn, vlen, vn->vect_type()->length_in_bytes());
837835
}
838836

839-
VTransformApplyResult VTransformReductionVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
840-
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
837+
VTransformApplyResult VTransformReductionVectorNode::apply(VTransformApplyState& apply_state) const {
841838
Node* first = nodes().at(0);
842839
uint vlen = nodes().length();
843840
int opc = first->Opcode();
844841
BasicType bt = first->bottom_type()->basic_type();
845842

846-
Node* init = find_transformed_input(1, vnode_idx_to_transformed_node);
847-
Node* vec = find_transformed_input(2, vnode_idx_to_transformed_node);
843+
Node* init = apply_state.transformed_node(in_req(1));
844+
Node* vec = apply_state.transformed_node(in_req(2));
848845

849846
ReductionNode* vn = ReductionNode::make(opc, nullptr, init, vec, bt);
850-
register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
847+
register_new_node_from_vectorization_and_replace_scalar_nodes(apply_state, vn);
851848
return VTransformApplyResult::make_vector(vn, vlen, vn->vect_type()->length_in_bytes());
852849
}
853850

854-
VTransformApplyResult VTransformLoadVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
855-
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
851+
VTransformApplyResult VTransformLoadVectorNode::apply(VTransformApplyState& apply_state) const {
856852
LoadNode* first = nodes().at(0)->as_Load();
857853
uint vlen = nodes().length();
858854
Node* ctrl = first->in(MemNode::Control);
859855
Node* mem = first->in(MemNode::Memory);
860856
Node* adr = first->in(MemNode::Address);
861857
int opc = first->Opcode();
862858
const TypePtr* adr_type = first->adr_type();
863-
BasicType bt = vloop_analyzer.types().velt_basic_type(first);
859+
BasicType bt = apply_state.vloop_analyzer().types().velt_basic_type(first);
864860

865861
// Set the memory dependency of the LoadVector as early as possible.
866862
// Walk up the memory chain, and ignore any StoreVector that provably
867863
// does not have any memory dependency.
868-
const VPointer& load_p = vpointer(vloop_analyzer);
864+
const VPointer& load_p = vpointer(apply_state.vloop_analyzer());
869865
while (mem->is_StoreVector()) {
870-
VPointer store_p(mem->as_Mem(), vloop_analyzer.vloop());
866+
VPointer store_p(mem->as_Mem(), apply_state.vloop());
871867
if (store_p.never_overlaps_with(load_p)) {
872868
mem = mem->in(MemNode::Memory);
873869
} else {
@@ -878,12 +874,11 @@ VTransformApplyResult VTransformLoadVectorNode::apply(const VLoopAnalyzer& vloop
878874
LoadVectorNode* vn = LoadVectorNode::make(opc, ctrl, mem, adr, adr_type, vlen, bt,
879875
control_dependency());
880876
DEBUG_ONLY( if (VerifyAlignVector) { vn->set_must_verify_alignment(); } )
881-
register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
877+
register_new_node_from_vectorization_and_replace_scalar_nodes(apply_state, vn);
882878
return VTransformApplyResult::make_vector(vn, vlen, vn->memory_size());
883879
}
884880

885-
VTransformApplyResult VTransformStoreVectorNode::apply(const VLoopAnalyzer& vloop_analyzer,
886-
const GrowableArray<Node*>& vnode_idx_to_transformed_node) const {
881+
VTransformApplyResult VTransformStoreVectorNode::apply(VTransformApplyState& apply_state) const {
887882
StoreNode* first = nodes().at(0)->as_Store();
888883
uint vlen = nodes().length();
889884
Node* ctrl = first->in(MemNode::Control);
@@ -892,27 +887,27 @@ VTransformApplyResult VTransformStoreVectorNode::apply(const VLoopAnalyzer& vloo
892887
int opc = first->Opcode();
893888
const TypePtr* adr_type = first->adr_type();
894889

895-
Node* value = find_transformed_input(MemNode::ValueIn, vnode_idx_to_transformed_node);
890+
Node* value = apply_state.transformed_node(in_req(MemNode::ValueIn));
896891
StoreVectorNode* vn = StoreVectorNode::make(opc, ctrl, mem, adr, adr_type, value, vlen);
897892
DEBUG_ONLY( if (VerifyAlignVector) { vn->set_must_verify_alignment(); } )
898-
register_new_node_from_vectorization_and_replace_scalar_nodes(vloop_analyzer, vn);
893+
register_new_node_from_vectorization_and_replace_scalar_nodes(apply_state, vn);
899894
return VTransformApplyResult::make_vector(vn, vlen, vn->memory_size());
900895
}
901896

902-
void VTransformVectorNode::register_new_node_from_vectorization_and_replace_scalar_nodes(const VLoopAnalyzer& vloop_analyzer, Node* vn) const {
903-
PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
897+
void VTransformVectorNode::register_new_node_from_vectorization_and_replace_scalar_nodes(VTransformApplyState& apply_state, Node* vn) const {
898+
PhaseIdealLoop* phase = apply_state.phase();
904899
Node* first = nodes().at(0);
905900

906-
register_new_node_from_vectorization(vloop_analyzer, vn, first);
901+
register_new_node_from_vectorization(apply_state, vn, first);
907902

908903
for (int i = 0; i < _nodes.length(); i++) {
909904
Node* n = _nodes.at(i);
910905
phase->igvn().replace_node(n, vn);
911906
}
912907
}
913908

914-
void VTransformNode::register_new_node_from_vectorization(const VLoopAnalyzer& vloop_analyzer, Node* vn, Node* old_node) const {
915-
PhaseIdealLoop* phase = vloop_analyzer.vloop().phase();
909+
void VTransformNode::register_new_node_from_vectorization(VTransformApplyState& apply_state, Node* vn, Node* old_node) const {
910+
PhaseIdealLoop* phase = apply_state.phase();
916911
phase->register_new_node_with_ctrl_of(vn, old_node);
917912
phase->igvn()._worklist.push(vn);
918913
VectorNode::trace_new_vector(vn, "AutoVectorization");

0 commit comments

Comments
 (0)