Skip to content

Commit 492af4b

Browse files
Ke Wangmeta-codesync[bot]
authored andcommitted
misc: Add cacheable plumbing to FileIoContext (facebookincubator#16495)
Summary: Pull Request resolved: facebookincubator#16495 Rename cacheable and plumb the cacheable parameter through the velox layer (FileIoContext, BufferedInput, InputStream, CachedBufferedInput) so it's available to pass to the WS layer in a future diff. This diff only adds the parameter plumbing without actually passing it to WS pread calls. Reviewed By: xiaoxmeng Differential Revision: D93788632 fbshipit-source-id: 740b32e5c9a7ca9e395bcaf16dd98a8c8db23cc9
1 parent 792f6a3 commit 492af4b

File tree

13 files changed

+79
-69
lines changed

13 files changed

+79
-69
lines changed

velox/common/file/File.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,22 @@ struct FileIoContext {
7777
/// Tracer for IO operations, providing call stack context.
7878
std::shared_ptr<FileIoTracer> ioTracer;
7979

80+
/// When false, hints to the storage layer that this read should not be cached
81+
/// or should be evicted soon after reading. This is useful for one-time reads
82+
/// where caching would waste resources.
83+
bool cacheable{true};
84+
8085
FileIoContext() = default;
8186

8287
explicit FileIoContext(
8388
IoStats* stats,
8489
folly::F14FastMap<std::string, std::string> fileOpts = {},
85-
std::shared_ptr<FileIoTracer> tracer = nullptr)
90+
std::shared_ptr<FileIoTracer> tracer = nullptr,
91+
bool cacheable = true)
8692
: ioStats(stats),
8793
fileOpts(std::move(fileOpts)),
88-
ioTracer(std::move(tracer)) {}
94+
ioTracer(std::move(tracer)),
95+
cacheable(cacheable) {}
8996
};
9097

9198
// A read-only file. All methods in this object should be thread safe.

velox/common/io/Options.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,12 +139,12 @@ class ReaderOptions {
139139
return prefetchRowGroups_;
140140
}
141141

142-
bool noCacheRetention() const {
143-
return noCacheRetention_;
142+
bool cacheable() const {
143+
return cacheable_;
144144
}
145145

146-
void setNoCacheRetention(bool noCacheRetention) {
147-
noCacheRetention_ = noCacheRetention;
146+
void setCacheable(bool cacheable) {
147+
cacheable_ = cacheable;
148148
}
149149

150150
protected:
@@ -155,6 +155,6 @@ class ReaderOptions {
155155
int32_t maxCoalesceDistance_{kDefaultCoalesceDistance};
156156
int64_t maxCoalesceBytes_{kDefaultCoalesceBytes};
157157
int32_t prefetchRowGroups_{kDefaultPrefetchRowGroups};
158-
bool noCacheRetention_{false};
158+
bool cacheable_{true};
159159
};
160160
} // namespace facebook::velox::io

velox/connectors/hive/HiveConnectorUtil.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@ void configureReaderOptions(
628628
readerOptions.setFooterEstimatedSize(hiveConfig->footerEstimatedSize());
629629
readerOptions.setFilePreloadThreshold(hiveConfig->filePreloadThreshold());
630630
readerOptions.setPrefetchRowGroups(hiveConfig->prefetchRowGroups());
631-
readerOptions.setNoCacheRetention(!hiveSplit->cacheable);
631+
readerOptions.setCacheable(hiveSplit->cacheable);
632632
const auto& sessionTzName = connectorQueryCtx->sessionTimezone();
633633
if (!sessionTzName.empty()) {
634634
const auto timezone = tz::locateZone(sessionTzName);

velox/connectors/hive/tests/HiveConnectorUtilTest.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -293,15 +293,15 @@ TEST_F(HiveConnectorUtilTest, configureReaderOptions) {
293293
TEST_F(HiveConnectorUtilTest, cacheRetention) {
294294
struct {
295295
bool splitCacheable;
296-
bool expectedNoCacheRetention;
296+
bool expectedCacheable;
297297

298298
std::string debugString() const {
299299
return fmt::format(
300-
"splitCacheable {}, expectedNoCacheRetention {}",
300+
"splitCacheable {}, expectedCacheable {}",
301301
splitCacheable,
302-
expectedNoCacheRetention);
302+
expectedCacheable);
303303
}
304-
} testSettings[] = {{false, true}, {true, false}};
304+
} testSettings[] = {{false, false}, {true, true}};
305305

306306
for (const auto& testData : testSettings) {
307307
SCOPED_TRACE(testData.debugString());
@@ -358,8 +358,7 @@ TEST_F(HiveConnectorUtilTest, cacheRetention) {
358358
hiveSplit,
359359
readerOptions);
360360

361-
ASSERT_EQ(
362-
readerOptions.noCacheRetention(), testData.expectedNoCacheRetention);
361+
ASSERT_EQ(readerOptions.cacheable(), testData.expectedCacheable);
363362
}
364363
}
365364

velox/dwio/common/BufferedInput.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,16 @@ class BufferedInput {
3838
velox::IoStats* ioStats = nullptr,
3939
uint64_t maxMergeDistance = kMaxMergeDistance,
4040
std::optional<bool> wsVRLoad = std::nullopt,
41-
folly::F14FastMap<std::string, std::string> fileReadOps = {})
41+
folly::F14FastMap<std::string, std::string> fileReadOps = {},
42+
bool cacheable = true)
4243
: BufferedInput(
4344
std::make_shared<ReadFileInputStream>(
4445
std::move(readFile),
4546
metricsLog,
4647
stats,
4748
ioStats,
48-
std::move(fileReadOps)),
49+
std::move(fileReadOps),
50+
cacheable),
4951
pool,
5052
maxMergeDistance,
5153
wsVRLoad) {}

velox/dwio/common/CacheInputStream.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,14 @@ CacheInputStream::CacheInputStream(
3535
const Region& region,
3636
std::shared_ptr<ReadFileInputStream> input,
3737
uint64_t fileNum,
38-
bool noCacheRetention,
38+
bool cacheable,
3939
std::shared_ptr<ScanTracker> tracker,
4040
TrackingId trackingId,
4141
uint64_t groupId,
4242
int32_t loadQuantum)
4343
: bufferedInput_(bufferedInput),
4444
cache_(bufferedInput_->cache()),
45-
noCacheRetention_(noCacheRetention),
45+
cacheable_(cacheable),
4646
region_(region),
4747
fileNum_(fileNum),
4848
tracker_(std::move(tracker)),
@@ -58,7 +58,7 @@ CacheInputStream::~CacheInputStream() {
5858
}
5959

6060
void CacheInputStream::makeCacheEvictable() {
61-
if (!noCacheRetention_) {
61+
if (cacheable_) {
6262
return;
6363
}
6464
// Walks through the potential prefetch or access cache space of this cache
@@ -256,15 +256,15 @@ void CacheInputStream::loadSync(const Region& region) {
256256
ioStats_->queryThreadIoLatencyUs().increment(storageReadUs);
257257
ioStats_->storageReadLatencyUs().increment(storageReadUs);
258258
ioStats_->incTotalScanTime(storageReadUs * 1'000);
259-
entry->setExclusiveToShared(!noCacheRetention_);
259+
entry->setExclusiveToShared(cacheable_);
260260
} while (pin_.empty());
261261
}
262262

263263
void CacheInputStream::clearCachePin() {
264264
if (pin_.empty()) {
265265
return;
266266
}
267-
if (noCacheRetention_) {
267+
if (!cacheable_) {
268268
pin_.checkedEntry()->makeEvictable();
269269
}
270270
pin_.clear();
@@ -345,7 +345,7 @@ void CacheInputStream::loadPosition() {
345345
{
346346
MicrosecondTimer timer(&loadUs);
347347
try {
348-
if (!load->loadOrFuture(&waitFuture, !noCacheRetention_)) {
348+
if (!load->loadOrFuture(&waitFuture, cacheable_)) {
349349
waitFuture.wait();
350350
}
351351
} catch (const std::exception& e) {

velox/dwio/common/CacheInputStream.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class CacheInputStream : public SeekableInputStream {
3535
const velox::common::Region& region,
3636
std::shared_ptr<ReadFileInputStream> input,
3737
uint64_t fileNum,
38-
bool noCacheRetention,
38+
bool cacheable,
3939
std::shared_ptr<cache::ScanTracker> tracker,
4040
cache::TrackingId trackingId,
4141
uint64_t groupId,
@@ -72,7 +72,7 @@ class CacheInputStream : public SeekableInputStream {
7272
region_,
7373
input_,
7474
fileNum_,
75-
noCacheRetention_,
75+
cacheable_,
7676
tracker_,
7777
trackingId_,
7878
groupId_,
@@ -98,8 +98,8 @@ class CacheInputStream : public SeekableInputStream {
9898
prefetchPct_ = pct;
9999
}
100100

101-
bool testingNoCacheRetention() const {
102-
return noCacheRetention_;
101+
bool testingCacheable() const {
102+
return cacheable_;
103103
}
104104

105105
private:
@@ -120,7 +120,7 @@ class CacheInputStream : public SeekableInputStream {
120120
cache::AsyncDataCacheEntry& entry);
121121

122122
// Invoked to clear the cache pin of the accessed cache entry and mark it as
123-
// immediate evictable if 'noCacheRetention_' flag is set.
123+
// immediate evictable if 'cacheable_' is false.
124124
void clearCachePin();
125125

126126
void makeCacheEvictable();
@@ -131,10 +131,10 @@ class CacheInputStream : public SeekableInputStream {
131131

132132
CachedBufferedInput* const bufferedInput_;
133133
cache::AsyncDataCache* const cache_;
134-
// True if a pin should be set to the lowest retention score after
134+
// False if a pin should be set to the lowest retention score after
135135
// unpinning. This applies to sequential reads where second access
136136
// to the page is not expected.
137-
const bool noCacheRetention_;
137+
const bool cacheable_;
138138
// The region of 'input' 'this' ranges over.
139139
const velox::common::Region region_;
140140
const uint64_t fileNum_;

velox/dwio/common/CachedBufferedInput.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ std::unique_ptr<SeekableInputStream> CachedBufferedInput::enqueue(
5959
region,
6060
input_,
6161
fileNum_.id(),
62-
options_.noCacheRetention(),
62+
options_.cacheable(),
6363
tracker_,
6464
id,
6565
groupId_.id(),
@@ -520,7 +520,7 @@ void CachedBufferedInput::readRegions(
520520
auto& load = coalescedLoads_[i];
521521
if (load->state() == CoalescedLoad::State::kPlanned) {
522522
executor_->add(
523-
[pendingLoad = load, ssdSavable = !options_.noCacheRetention()]() {
523+
[pendingLoad = load, ssdSavable = options_.cacheable()]() {
524524
process::TraceContext trace("Read Ahead");
525525
pendingLoad->loadOrFuture(nullptr, ssdSavable);
526526
});
@@ -579,7 +579,7 @@ std::unique_ptr<SeekableInputStream> CachedBufferedInput::read(
579579
Region{offset, length},
580580
input_,
581581
fileNum_.id(),
582-
options_.noCacheRetention(),
582+
options_.cacheable(),
583583
nullptr,
584584
TrackingId(),
585585
0,

velox/dwio/common/CachedBufferedInput.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ class CachedBufferedInput : public BufferedInput {
7272
ioStats.get(),
7373
kMaxMergeDistance,
7474
std::nullopt,
75-
std::move(fileReadOps)),
75+
std::move(fileReadOps),
76+
readerOptions.cacheable()),
7677
cache_(cache),
7778
fileNum_(std::move(fileNum)),
7879
tracker_(std::move(tracker)),

velox/dwio/common/InputStream.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,10 @@ ReadFileInputStream::ReadFileInputStream(
6565
const MetricsLogPtr& metricsLog,
6666
IoStatistics* stats,
6767
velox::IoStats* ioStats,
68-
folly::F14FastMap<std::string, std::string> fileOpts)
68+
folly::F14FastMap<std::string, std::string> fileOpts,
69+
bool cacheable)
6970
: InputStream(readFile->getName(), metricsLog, stats, ioStats),
70-
fileIoContext_(ioStats, std::move(fileOpts)),
71+
fileIoContext_(ioStats, std::move(fileOpts), nullptr, cacheable),
7172
readFile_(std::move(readFile)) {}
7273

7374
void ReadFileInputStream::read(

0 commit comments

Comments
 (0)