diff --git a/programs/benchzstd.c b/programs/benchzstd.c index f55c8697504..e28aab593de 100644 --- a/programs/benchzstd.c +++ b/programs/benchzstd.c @@ -27,10 +27,12 @@ #include /* fprintf, fopen */ #include /* malloc, free */ #include /* memset, strerror */ +#include "counters.h" #include "util.h" /* UTIL_getFileSize, UTIL_sleep */ #include "../lib/common/mem.h" #include "benchfn.h" #include "timefn.h" /* UTIL_time_t */ + #ifndef ZSTD_STATIC_LINKING_ONLY # define ZSTD_STATIC_LINKING_ONLY #endif @@ -541,6 +543,9 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( "Warning : time measurements may be incorrect in multithreading mode... \n") } + /* FIXME(cavalcanti): only include this for Linux (!Android)@x86 */ + BMK_linuxPerfCounters_t counters; + /* Bench */ { U64 const crcOrig = (adv->mode == BMK_decodeOnly) @@ -599,6 +604,12 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( displayName, (unsigned)srcSize); + /* FIXME(cavalcanti): only include this for Linux (!Android)@x86 */ + if (adv->cpuCounters) { + BMK_countersInit(&counters); + BMK_eventStart(&counters); + } + while (!(compressionCompleted && decompressionCompleted)) { if (!compressionCompleted) { BMK_runOutcome_t const cOutcome = @@ -680,6 +691,13 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( markNb = (markNb + 1) % NB_MARKS; } /* while (!(compressionCompleted && decompressionCompleted)) */ + /* FIXME(cavalcanti): only include this for Linux (!Android)@x86 */ + if (adv->cpuCounters) { + BMK_eventStop(&counters); + BMK_countersClose(&counters); + fprintf(stdout, "###### Perf cycles: %llu\n", counters.cycles); + } + /* CRC Checking */ { const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr); U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); @@ -763,6 +781,7 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc( benchResult.cMem = (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx); + return BMK_benchOutcome_setValidResult(benchResult); } diff --git a/programs/benchzstd.h b/programs/benchzstd.h index d62a33c0aec..897604ff9fe 100644 --- a/programs/benchzstd.h +++ b/programs/benchzstd.h @@ -106,6 +106,7 @@ typedef struct { int ldmHashRateLog; ZSTD_ParamSwitch_e literalCompressionMode; int useRowMatchFinder; /* use row-based matchfinder if possible */ + int cpuCounters; } BMK_advancedParams_t; /* returns default parameters used by nonAdvanced functions */ diff --git a/programs/counters.h b/programs/counters.h new file mode 100644 index 00000000000..e92f237b576 --- /dev/null +++ b/programs/counters.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**************************************************************************** + * Performance counters + * + ****************************************************************************/ +#ifndef BENCH_ZSTD_COUNTERS +#define BENCH_ZSTD_COUNTERS +/* FIXME(cavalcanti): only include this for Linux (!Android)@x86 */ +#include +#include +#include +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +typedef struct { + struct perf_event_attr events; + int fd; + long long cycles; +} BMK_linuxPerfCounters_t; + +static int BMK_countersOpen(BMK_linuxPerfCounters_t* counters) +{ + pid_t pid = 0; + int cpu = -1; + int group_fd = -1; + unsigned long flags = 0; + + counters->fd = syscall(__NR_perf_event_open, &counters->events, pid, cpu, + group_fd, flags); + + if (counters->fd != -1) return 0; + + return -1; +} + +static int BMK_countersInit(BMK_linuxPerfCounters_t* counters) +{ + memset(counters, 0, sizeof(struct perf_event_attr)); + counters->events.type = PERF_TYPE_HARDWARE; + counters->events.size = sizeof(struct perf_event_attr); + /* TODO(cavalcanti): Add more performance counters: + * PERF_COUNT_HW_INSTRUCTIONS, PERF_COUNT_HW_BRANCH_MISSES, + * PERF_COUNT_HW_CACHE_REFERENCES, PERF_COUNT_HW_CACHE_MISSES. + */ + counters->events.config = PERF_COUNT_HW_CPU_CYCLES; + counters->events.disabled = 1; + counters->events.exclude_kernel = 1; + counters->events.exclude_hv = 1; + + counters->cycles = 0; + + return BMK_countersOpen(counters); +} + +static int BMK_eventStart(BMK_linuxPerfCounters_t* counters) +{ + int res = 0; + if (counters->fd != -1) { + res = ioctl(counters->fd, PERF_EVENT_IOC_RESET, 0); + if (res != -1) res = ioctl(counters->fd, PERF_EVENT_IOC_ENABLE, 0); + } + + return res; +} + +static int BMK_eventStop(BMK_linuxPerfCounters_t* counters) +{ + long long count = 0; + ioctl(counters->fd, PERF_EVENT_IOC_DISABLE, 0); + if (read(counters->fd, &count, sizeof(long long)) == -1) return -1; + counters->cycles += count; +} + +static int BMK_countersClose(BMK_linuxPerfCounters_t* counters) +{ + close(counters->fd); +} + +#endif diff --git a/programs/zstdcli.c b/programs/zstdcli.c index fa7ea37b3f0..770cb55339a 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -309,6 +309,7 @@ static void usageAdvanced(const char* programName) DISPLAYOUT(" -b# Perform benchmarking with compression level #. [Default: %d]\n", ZSTDCLI_CLEVEL_DEFAULT); DISPLAYOUT(" -e# Test all compression levels up to #; starting level is `-b#`. [Default: 1]\n"); DISPLAYOUT(" -i# Set the minimum evaluation to time # seconds. [Default: 3]\n"); + DISPLAYOUT(" -y# Collect CPU counters.\n"); DISPLAYOUT(" --split=# Split input into independent chunks of size #. [Default: No chunking]\n"); DISPLAYOUT(" -S Output one benchmark result per input file. [Default: Consolidated result]\n"); DISPLAYOUT(" -D dictionary Benchmark using dictionary \n"); @@ -882,6 +883,7 @@ int main(int argCount, const char* argv[]) cLevelLast = MINCLEVEL - 1, /* for benchmark range */ setThreads_non1 = 0; unsigned nbWorkers = init_nbWorkers(); + unsigned cpuCounters = 0; /* wether we want to harvest CPU counters during benchmark */ ZSTD_ParamSwitch_e mmapDict = ZSTD_ps_auto; ZSTD_ParamSwitch_e useRowMatchFinder = ZSTD_ps_auto; FIO_compressionType_t cType = FIO_zstdCompression; @@ -1316,6 +1318,15 @@ int main(int argCount, const char* argv[]) compressibility = (double)readU32FromChar(&argument) / 100; break; + /* Harvest performance counters */ + case 'y': + argument++; + cpuCounters = 1; + /* Collecting performance counters requires single threaded mode for now */ + nbWorkers = 0; + singleThread = 1; + break; + /* unknown command */ default : { char shortArgument[3] = {'-', 0, 0}; @@ -1423,6 +1434,7 @@ int main(int argCount, const char* argv[]) benchParams.ldmMinMatch = (int)g_ldmMinMatch; benchParams.ldmHashLog = (int)g_ldmHashLog; benchParams.useRowMatchFinder = (int)useRowMatchFinder; + benchParams.cpuCounters = (int)cpuCounters; if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) { benchParams.ldmBucketSizeLog = (int)g_ldmBucketSizeLog; }