|
39 | 39 | #include "opto/intrinsicnode.hpp" |
40 | 40 | #include "opto/locknode.hpp" |
41 | 41 | #include "opto/machnode.hpp" |
| 42 | +#include "opto/memnode.hpp" |
42 | 43 | #include "opto/opaquenode.hpp" |
| 44 | +#include "opto/opcodes.hpp" |
43 | 45 | #include "opto/parse.hpp" |
44 | 46 | #include "opto/reachability.hpp" |
45 | 47 | #include "opto/rootnode.hpp" |
46 | 48 | #include "opto/runtime.hpp" |
47 | 49 | #include "opto/subtypenode.hpp" |
| 50 | +#include "opto/type.hpp" |
48 | 51 | #include "runtime/deoptimization.hpp" |
49 | 52 | #include "runtime/sharedRuntime.hpp" |
50 | 53 | #include "utilities/bitMap.inline.hpp" |
@@ -4249,51 +4252,81 @@ void GraphKit::store_String_coder(Node* str, Node* value) { |
4249 | 4252 | value, TypeInt::BYTE, T_BYTE, IN_HEAP | MO_UNORDERED); |
4250 | 4253 | } |
4251 | 4254 |
|
4252 | | -// Capture src and dst memory state with a MergeMemNode |
4253 | | -Node* GraphKit::capture_memory(const TypePtr* src_type, const TypePtr* dst_type) { |
| 4255 | +// If input and output memory types differ, capture the whole memory to preserve |
| 4256 | +// the dependency between preceding and subsequent loads/stores. |
| 4257 | +// For example, the following program: |
| 4258 | +// StoreB |
| 4259 | +// compress_string |
| 4260 | +// LoadB |
| 4261 | +// has this memory graph (use->def): |
| 4262 | +// LoadB -> compress_string -> CharMem |
| 4263 | +// ... -> StoreB -> ByteMem |
| 4264 | +// The intrinsic hides the dependency between LoadB and StoreB, causing |
| 4265 | +// the load to read from memory not containing the result of the StoreB. |
| 4266 | +// The correct memory graph should look like this: |
| 4267 | +// LoadB -> compress_string -> MergeMem -> StoreB |
| 4268 | +Node* GraphKit::capture_memory(const TypePtr*& combined_type, const TypePtr* src_type, const TypePtr* dst_type) { |
4254 | 4269 | if (src_type == dst_type) { |
4255 | 4270 | // Types are equal, we don't need a MergeMemNode |
| 4271 | + combined_type = src_type; |
4256 | 4272 | return memory(src_type); |
4257 | 4273 | } |
4258 | | - MergeMemNode* merge = MergeMemNode::make(map()->memory()); |
4259 | | - record_for_igvn(merge); // fold it up later, if possible |
4260 | | - int src_idx = C->get_alias_index(src_type); |
4261 | | - int dst_idx = C->get_alias_index(dst_type); |
4262 | | - merge->set_memory_at(src_idx, memory(src_idx)); |
4263 | | - merge->set_memory_at(dst_idx, memory(dst_idx)); |
4264 | | - return merge; |
| 4274 | + Node* mem = reset_memory(); |
| 4275 | + set_all_memory(mem); |
| 4276 | + combined_type = TypePtr::BOTTOM; |
| 4277 | + return mem; |
| 4278 | +} |
| 4279 | + |
| 4280 | +// If dst_type and src_type are different, str may have an anti-dependency with another node |
| 4281 | +// consuming src_type. |
| 4282 | +// For example: |
| 4283 | +// compress_string |
| 4284 | +// StoreC |
| 4285 | +// has this memory graph (use->def): |
| 4286 | +// compress_string -> MergeMem -> CharMem |
| 4287 | +// StoreC |
| 4288 | +// The scheduler needs to ensure that compress_string is not executed after StoreC, or it will read |
| 4289 | +// the wrong memory. For normal loads, the scheduler computes its anti-dependencies to ensure the |
| 4290 | +// memory it reads from is not killed. Since we do not compute anti-dependencies for |
| 4291 | +// StrCompressedCopyNode, manually insert a MemBar so the anti-dependency becomes use-def |
| 4292 | +// dependency: |
| 4293 | +// StoreC -> MemBar -> MergeMem -> compress_string -> MergeMem -> CharMem |
| 4294 | +// --------------------------------> |
| 4295 | +void GraphKit::memory_effect(Node* res_mem, const TypePtr* src_type, const TypePtr* dst_type) { |
| 4296 | + set_memory(res_mem, dst_type); |
| 4297 | + if (src_type != dst_type) { |
| 4298 | + Node* all_mem = reset_memory(); |
| 4299 | + set_all_memory(all_mem); |
| 4300 | + Node* membar = new MemBarCPUOrderNode(C, C->get_alias_index(src_type), nullptr); |
| 4301 | + membar->init_req(TypeFunc::Control, control()); |
| 4302 | + membar->init_req(TypeFunc::Memory, all_mem); |
| 4303 | + membar = _gvn.transform(membar); |
| 4304 | + set_control(_gvn.transform(new ProjNode(membar, TypeFunc::Control))); |
| 4305 | + set_memory(_gvn.transform(new ProjNode(membar, TypeFunc::Memory)), src_type); |
| 4306 | + } |
4265 | 4307 | } |
4266 | 4308 |
|
4267 | 4309 | Node* GraphKit::compress_string(Node* src, const TypeAryPtr* src_type, Node* dst, Node* count) { |
4268 | 4310 | assert(Matcher::match_rule_supported(Op_StrCompressedCopy), "Intrinsic not supported"); |
4269 | 4311 | assert(src_type == TypeAryPtr::BYTES || src_type == TypeAryPtr::CHARS, "invalid source type"); |
4270 | | - // If input and output memory types differ, capture both states to preserve |
4271 | | - // the dependency between preceding and subsequent loads/stores. |
4272 | | - // For example, the following program: |
4273 | | - // StoreB |
4274 | | - // compress_string |
4275 | | - // LoadB |
4276 | | - // has this memory graph (use->def): |
4277 | | - // LoadB -> compress_string -> CharMem |
4278 | | - // ... -> StoreB -> ByteMem |
4279 | | - // The intrinsic hides the dependency between LoadB and StoreB, causing |
4280 | | - // the load to read from memory not containing the result of the StoreB. |
4281 | | - // The correct memory graph should look like this: |
4282 | | - // LoadB -> compress_string -> MergeMem(CharMem, StoreB(ByteMem)) |
4283 | | - Node* mem = capture_memory(src_type, TypeAryPtr::BYTES); |
4284 | | - StrCompressedCopyNode* str = new StrCompressedCopyNode(control(), mem, src, dst, count); |
| 4312 | + const TypePtr* dst_type = TypeAryPtr::BYTES; |
| 4313 | + const TypePtr* adr_type; |
| 4314 | + Node* mem = capture_memory(adr_type, src_type, dst_type); |
| 4315 | + StrCompressedCopyNode* str = new StrCompressedCopyNode(control(), mem, adr_type, src, dst, count); |
4285 | 4316 | Node* res_mem = _gvn.transform(new SCMemProjNode(_gvn.transform(str))); |
4286 | | - set_memory(res_mem, TypeAryPtr::BYTES); |
| 4317 | + memory_effect(res_mem, src_type, dst_type); |
4287 | 4318 | return str; |
4288 | 4319 | } |
4289 | 4320 |
|
4290 | 4321 | void GraphKit::inflate_string(Node* src, Node* dst, const TypeAryPtr* dst_type, Node* count) { |
4291 | 4322 | assert(Matcher::match_rule_supported(Op_StrInflatedCopy), "Intrinsic not supported"); |
4292 | 4323 | assert(dst_type == TypeAryPtr::BYTES || dst_type == TypeAryPtr::CHARS, "invalid dest type"); |
4293 | | - // Capture src and dst memory (see comment in 'compress_string'). |
4294 | | - Node* mem = capture_memory(TypeAryPtr::BYTES, dst_type); |
4295 | | - StrInflatedCopyNode* str = new StrInflatedCopyNode(control(), mem, src, dst, count); |
4296 | | - set_memory(_gvn.transform(str), dst_type); |
| 4324 | + const TypePtr* src_type = TypeAryPtr::BYTES; |
| 4325 | + const TypePtr* adr_type; |
| 4326 | + Node* mem = capture_memory(adr_type, src_type, dst_type); |
| 4327 | + StrInflatedCopyNode* str = new StrInflatedCopyNode(control(), mem, adr_type, src, dst, count); |
| 4328 | + Node* res_mem = _gvn.transform(str); |
| 4329 | + memory_effect(res_mem, src_type, dst_type); |
4297 | 4330 | } |
4298 | 4331 |
|
4299 | 4332 | void GraphKit::inflate_string_slow(Node* src, Node* dst, Node* start, Node* count) { |
|
0 commit comments