Skip to content

Commit 7e2ed0e

Browse files
authored
async-profiler#1677: Remove cstack=lbr option
1 parent 68244fb commit 7e2ed0e

File tree

8 files changed

+7
-71
lines changed

8 files changed

+7
-71
lines changed

docs/ProfilerOptions.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ The below options are `action`s for async-profiler and common for both `asprof`
5151
| N/A | `server=ADDRESS` | Start insecure HTTP server with the given IP address/port to control the profiler. This option can be specified as `-agentpath` argument only. Be careful not to expose async-profiler server in a public network. |
5252
| `--all-user` | `alluser` | Include only user-mode events. This option is helpful when kernel profiling is restricted by `perf_event_paranoid` settings. |
5353
| `--sched` | `sched` | Group threads by Linux-specific scheduling policy: BATCH/IDLE/OTHER. |
54-
| `--cstack MODE` | `cstack=MODE` | How to walk native frames (C stack). Possible modes are `fp` (Frame Pointer), `dwarf` (DWARF unwind info), `lbr` (Last Branch Record, available on Haswell since Linux 4.1), `vm`, `vmx` (HotSpot VM Structs) and `no` (do not collect C stack).<br><br>By default, C stack is shown in cpu, ctimer, wall-clock and perf-events profiles. Java-level events like `alloc` and `lock` collect only Java stack. |
54+
| `--cstack MODE` | `cstack=MODE` | How to walk native frames (C stack). Possible modes are `fp` (Frame Pointer), `dwarf` (DWARF unwind info), `vm`, `vmx` (HotSpot VM Structs) and `no` (do not collect C stack).<br><br>By default, C stack is shown in cpu, ctimer, wall-clock and perf-events profiles. Java-level events like `alloc` and `lock` collect only Java stack. |
5555
| `--signal NUM` | `signal=NUM` | Use alternative signal for cpu or wall clock profiling. To change both signals, specify two numbers separated by a slash: `--signal SIGCPU/SIGWALL`. |
5656
| `--clock SOURCE` | `clock=SOURCE` | Clock source for JFR timestamps: `tsc` (default) or `monotonic` (equivalent for `CLOCK_MONOTONIC`). |
5757
| `--begin function` | `begin=FUNCTION` | Automatically start profiling when the specified native function is executed. |

docs/StackWalkingModes.md

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,6 @@ due to being signal safe in async-profiler.
2323

2424
The feature can be enabled with the option `--cstack dwarf` (or its agent equivalent `cstack=dwarf`).
2525

26-
## LBR
27-
28-
Modern Intel CPUs can profile branch instructions, including `call`s and `ret`s, and store their source and destination
29-
addresses (Last Branch Records) in hardware registers. Starting from Haswell, CPU can match these addresses to form a
30-
branch stack. This branch stack will be effectively a call chain automatically collected by the hardware.
31-
32-
LBR stacks are not always complete or accurate, but they still appear much more helpful comparing to FP-based stack
33-
walking, when a native library is compiled with omitted frame pointers. It works only with hardware events like
34-
`-e cycles` (`instructions`, `cache-misses` etc.) and the maximum call chain depth is 32 (hardware limit).
35-
36-
The feature can be enabled with the option `--cstack lbr` (or its agent equivalent `cstack=lbr`).
37-
3826
## VM Structs
3927

4028
async-profiler can leverage JVM internal structures to replicate the logic of Java stack walking

src/arguments.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,8 +332,6 @@ Error Arguments::parse(const char* args) {
332332
_cstack = CSTACK_FP;
333333
} else if (strcmp(value, "dwarf") == 0) {
334334
_cstack = CSTACK_DWARF;
335-
} else if (strcmp(value, "lbr") == 0) {
336-
_cstack = CSTACK_LBR;
337335
} else if (strcmp(value, "vm") == 0) {
338336
_cstack = CSTACK_VM;
339337
} else if (strcmp(value, "vmx") == 0) {

src/arguments.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ enum SHORT_ENUM CStack {
6060
CSTACK_NO, // do not collect native frames
6161
CSTACK_FP, // walk stack using Frame Pointer links
6262
CSTACK_DWARF, // use DWARF unwinding info from .eh_frame section
63-
CSTACK_LBR, // Last Branch Record hardware capability
6463
CSTACK_VM // unwind using HotSpot VMStructs
6564
};
6665

src/flightRecorder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ static jmethodID _stop_method;
6060
static jmethodID _box_method;
6161
static bool _jfr_starting = false;
6262

63-
static const char* const SETTING_CSTACK[] = {NULL, "no", "fp", "dwarf", "lbr", "vm"};
63+
static const char* const SETTING_CSTACK[] = {NULL, "no", "fp", "dwarf", "vm"};
6464

6565

6666
struct CpuTime {

src/main/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ static const char USAGE_STRING[] =
8585
" --total accumulate the total value (time, bytes, etc.)\n"
8686
" --all-user only include user-mode events\n"
8787
" --sched group threads by scheduling policy\n"
88-
" --cstack mode how to traverse C stack: fp|dwarf|lbr|vm|no\n"
88+
" --cstack mode how to traverse C stack: fp|dwarf|vm|no\n"
8989
" --signal num use alternative signal for cpu or wall clock profiling\n"
9090
" --clock source clock source for JFR timestamps: tsc|monotonic\n"
9191
" --begin function begin profiling when function is executed\n"

src/perfEvents_linux.cpp

Lines changed: 1 addition & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -619,16 +619,6 @@ int PerfEvents::createForThread(int tid) {
619619
attr.exclude_callchain_user = 1;
620620
}
621621

622-
#ifdef PERF_ATTR_SIZE_VER5
623-
if (_cstack == CSTACK_LBR) {
624-
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK | PERF_SAMPLE_REGS_USER;
625-
attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK;
626-
attr.sample_regs_user = 1ULL << PERF_REG_PC;
627-
}
628-
#else
629-
#warning "Compiling without LBR support. Kernel headers 4.1+ required"
630-
#endif
631-
632622
if (_record_cpu) {
633623
attr.sample_type |= PERF_SAMPLE_CPU;
634624
}
@@ -852,7 +842,7 @@ Error PerfEvents::start(Arguments& args) {
852842
// Automatically switch on alluser for non-CPU events, if kernel profiling is unavailable
853843
_alluser = strcmp(args._event, EVENT_CPU) != 0 && !supported();
854844
}
855-
_use_perf_mmap = _kernel_stack || _cstack == CSTACK_DEFAULT || _cstack == CSTACK_LBR || _record_cpu;
845+
_use_perf_mmap = _kernel_stack || _cstack == CSTACK_DEFAULT || _record_cpu;
856846

857847
if (strcmp(_event_type->name, "cpu-clock") == 0 && hasPerfEventRefreshBug()) {
858848
Log::debug("Enable workaround for PERF_EVENT_IOC_REFRESH bug");
@@ -944,36 +934,6 @@ int PerfEvents::walk(int tid, void* ucontext, const void** callchain, int max_de
944934
}
945935
}
946936

947-
if (_cstack == CSTACK_LBR) {
948-
u64 bnr = ring.next();
949-
950-
// Last userspace PC is stored right after branch stack
951-
const void* pc = (const void*)ring.peek(bnr * 3 + 2);
952-
if (CodeHeap::contains(pc) || depth >= max_depth) {
953-
java_ctx->pc = pc;
954-
goto stack_complete;
955-
}
956-
callchain[depth++] = pc;
957-
958-
while (bnr-- > 0) {
959-
const void* from = (const void*)ring.next();
960-
const void* to = (const void*)ring.next();
961-
ring.next();
962-
963-
if (CodeHeap::contains(to) || depth >= max_depth) {
964-
java_ctx->pc = to;
965-
goto stack_complete;
966-
}
967-
callchain[depth++] = to;
968-
969-
if (CodeHeap::contains(from) || depth >= max_depth) {
970-
java_ctx->pc = from;
971-
goto stack_complete;
972-
}
973-
callchain[depth++] = from;
974-
}
975-
}
976-
977937
break;
978938
}
979939
tail += hdr->size;

src/profiler.cpp

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,6 @@ int Profiler::getNativeTrace(void* ucontext, ASGCT_CallFrame* frames, EventType
332332

333333
int Profiler::convertNativeTrace(int native_frames, const void** callchain, ASGCT_CallFrame* frames, EventType event_type) {
334334
int depth = 0;
335-
jmethodID prev_method = NULL;
336335

337336
for (int i = 0; i < native_frames; i++) {
338337
const char* current_method_name = findNativeMethod(callchain[i]);
@@ -360,15 +359,9 @@ int Profiler::convertNativeTrace(int native_frames, const void** callchain, ASGC
360359
}
361360
}
362361

363-
jmethodID current_method = (jmethodID)current_method_name;
364-
if (current_method == prev_method && _cstack == CSTACK_LBR) {
365-
// Skip duplicates in LBR stack, where branch_stack[N].from == branch_stack[N+1].to
366-
prev_method = NULL;
367-
} else {
368-
frames[depth].bci = BCI_NATIVE_FRAME;
369-
frames[depth].method_id = prev_method = current_method;
370-
depth++;
371-
}
362+
frames[depth].bci = BCI_NATIVE_FRAME;
363+
frames[depth].method_id = (jmethodID)current_method_name;
364+
depth++;
372365
}
373366

374367
return depth;
@@ -1160,8 +1153,6 @@ Error Profiler::start(Arguments& args, bool reset) {
11601153
_cstack = args._cstack;
11611154
if (_cstack == CSTACK_DWARF && !DWARF_SUPPORTED) {
11621155
return Error("DWARF unwinding is not supported on this platform");
1163-
} else if (_cstack == CSTACK_LBR && _engine != &perf_events) {
1164-
return Error("Branch stack is supported only with PMU events");
11651156
} else if (_cstack == CSTACK_VM && VM::loaded() && !VMStructs::hasStackStructs()) {
11661157
return Error("VMStructs stack walking is not supported on this JVM/platform");
11671158
}

0 commit comments

Comments
 (0)