Skip to content

Commit 64d4ec6

Browse files
committed
keep jit_has_bf16_overflows and jit_has_subnormals in the same parallel process loop to reduce memory access bandwidth
1 parent 32084e1 commit 64d4ec6

File tree

1 file changed

+19
-26
lines changed

1 file changed

+19
-26
lines changed

src/plugins/intel_cpu/src/nodes/input.cpp

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -412,36 +412,29 @@ void Input::cloneBlobIfRequired() {
412412
static const size_t batch_size = 2048;
413413
const size_t iterations_num = size / batch_size + 1;
414414

415-
volatile bool has_subnormals_local = false;
416-
volatile bool has_bf16_overflows_local = false;
417-
if (needFlushDenormalsToZero) {
415+
std::atomic<bool> has_subnormals_local(false);
416+
std::atomic<bool> has_bf16_overflows_local(false);
417+
if (needFlushDenormalsToZero || do_bf16_saturation_check) {
418418
parallel_for(iterations_num, [&](int n) {
419419
auto ptr = u32data + n * batch_size;
420-
const jit_has_special_value_base::args_t args1 = {
421-
reinterpret_cast<float const*>(ptr),
422-
std::min(batch_size, (size_t)(u32data + size - ptr)),
423-
false};
424-
425-
fn(&args1);
426-
427-
if (args1.hasTargetValues) {
428-
has_subnormals_local = true;
420+
jit_has_special_value_base::args_t args = {reinterpret_cast<float const*>(ptr),
421+
std::min(batch_size, (size_t)(u32data + size - ptr)),
422+
false};
423+
424+
if (needFlushDenormalsToZero && !has_subnormals_local) {
425+
fn(&args);
426+
if (args.hasTargetValues) {
427+
has_subnormals_local = true;
428+
}
429429
}
430-
});
431-
}
432-
433-
if (do_bf16_saturation_check) {
434-
parallel_for(iterations_num, [&](int n) {
435-
auto ptr2 = f32data + n * batch_size;
436-
const jit_has_special_value_base::args_t args2 = {
437-
reinterpret_cast<float const*>(ptr2),
438-
std::min(batch_size, (size_t)(f32data + size - ptr2)),
439-
false};
440-
441-
fn_bf16_check(&args2);
442430

443-
if (args2.hasTargetValues) {
444-
has_bf16_overflows_local = true;
431+
if (do_bf16_saturation_check && !has_bf16_overflows_local) {
432+
// batch_size is small enough, so source data are still cache-hot
433+
args.hasTargetValues = false;
434+
fn_bf16_check(&args);
435+
if (args.hasTargetValues) {
436+
has_bf16_overflows_local = true;
437+
}
445438
}
446439
});
447440
}

0 commit comments

Comments
 (0)