Skip to content

Commit 7d1ef3c

Browse files
achow101vijaydasmp
authored andcommitted
Merge bitcoin#19690: util: improve FindByte() performance
72efc26 util: improve streams.h:FindByte() performance (Larry Ruane) 604df63 [bench] add streams findbyte (gzhao408) Pull request description: This PR is strictly a performance improvement; there is no functional change. The `CBufferedFile::FindByte()` method searches for the next occurrence of the given byte in the file. Currently, this is done by explicitly inspecting each byte in turn. This PR takes advantage of `std::find()` to do the same more efficiently, improving its CPU runtime by a factor of about 25 in typical use. ACKs for top commit: achow101: re-ACK 72efc26 stickies-v: re-ACK 72efc26 Tree-SHA512: ddf0bff335cc8aa34f911aa4e0558fa77ce35d963d602e4ab1c63090b4a386faf074548daf06ee829c7f2c760d06eed0125cf4c34e981c6129cea1804eb3b719
1 parent a0c887a commit 7d1ef3c

File tree

6 files changed

+50
-8
lines changed

6 files changed

+50
-8
lines changed

src/Makefile.bench.include

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ bench_bench_dash_SOURCES = \
4545
bench/pool.cpp \
4646
bench/rpc_blockchain.cpp \
4747
bench/rpc_mempool.cpp \
48+
bench/streams_findbyte.cpp \
4849
bench/strencodings.cpp \
4950
bench/util_time.cpp \
5051
bench/base58.cpp \

src/bench/streams_findbyte.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Copyright (c) 2023 The Bitcoin Core developers
2+
// Distributed under the MIT software license, see the accompanying
3+
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4+
5+
#include <bench/bench.h>
6+
7+
#include <util/fs.h>
8+
#include <streams.h>
9+
10+
static void FindByte(benchmark::Bench& bench)
11+
{
12+
// Setup
13+
FILE* file = fsbridge::fopen("streams_tmp", "w+b");
14+
const size_t file_size = 200;
15+
uint8_t data[file_size] = {0};
16+
data[file_size-1] = 1;
17+
fwrite(&data, sizeof(uint8_t), file_size, file);
18+
rewind(file);
19+
CBufferedFile bf(file, /*nBufSize=*/file_size + 1, /*nRewindIn=*/file_size, 0, 0);
20+
21+
bench.run([&] {
22+
bf.SetPos(0);
23+
bf.FindByte(std::byte(1));
24+
});
25+
26+
// Cleanup
27+
bf.fclose();
28+
fs::remove("streams_tmp");
29+
}
30+
31+
BENCHMARK(FindByte, benchmark::PriorityLevel::HIGH);

src/streams.h

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -752,15 +752,25 @@ class CBufferedFile
752752
}
753753

754754
//! search for a given byte in the stream, and remain positioned on it
755-
void FindByte(uint8_t ch)
755+
void FindByte(std::byte byte)
756756
{
757+
// For best performance, avoid mod operation within the loop.
758+
size_t buf_offset{size_t(m_read_pos % uint64_t(vchBuf.size()))};
757759
while (true) {
758-
if (m_read_pos == nSrcPos)
760+
if (m_read_pos == nSrcPos) {
761+
// No more bytes available; read from the file into the buffer,
762+
// setting nSrcPos to one beyond the end of the new data.
763+
// Throws exception if end-of-file reached.
759764
Fill();
760-
if (vchBuf[m_read_pos % vchBuf.size()] == std::byte{ch}) {
761-
break;
762765
}
763-
m_read_pos++;
766+
const size_t len{std::min<size_t>(vchBuf.size() - buf_offset, nSrcPos - m_read_pos)};
767+
const auto it_start{vchBuf.begin() + buf_offset};
768+
const auto it_find{std::find(it_start, it_start + len, byte)};
769+
const size_t inc{size_t(std::distance(it_start, it_find))};
770+
m_read_pos += inc;
771+
if (inc < len) break;
772+
buf_offset += inc;
773+
if (buf_offset >= vchBuf.size()) buf_offset = 0;
764774
}
765775
}
766776
};

src/test/fuzz/buffered_file.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ FUZZ_TARGET(buffered_file)
5353
return;
5454
}
5555
try {
56-
opt_buffered_file->FindByte(fuzzed_data_provider.ConsumeIntegral<uint8_t>());
56+
opt_buffered_file->FindByte(std::byte(fuzzed_data_provider.ConsumeIntegral<uint8_t>()));
5757
} catch (const std::ios_base::failure&) {
5858
}
5959
},

src/test/streams_tests.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand)
462462
size_t find = currentPos + InsecureRandRange(8);
463463
if (find >= fileSize)
464464
find = fileSize - 1;
465-
bf.FindByte(uint8_t(find));
465+
bf.FindByte(std::byte(find));
466466
// The value at each offset is the offset.
467467
BOOST_CHECK_EQUAL(bf.GetPos(), find);
468468
currentPos = find;

src/validation.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4943,7 +4943,7 @@ void CChainState::LoadExternalBlockFile(
49434943
try {
49444944
// locate a header
49454945
unsigned char buf[CMessageHeader::MESSAGE_START_SIZE];
4946-
blkdat.FindByte(m_params.MessageStart()[0]);
4946+
blkdat.FindByte(std::byte(m_params.MessageStart()[0]));
49474947
nRewind = blkdat.GetPos() + 1;
49484948
blkdat >> buf;
49494949
if (memcmp(buf, m_params.MessageStart(), CMessageHeader::MESSAGE_START_SIZE)) {

0 commit comments

Comments
 (0)