Skip to content

Commit 319e424

Browse files
authored
Merge pull request #742 from Altinity/feature/lazy_load_metadata
Make DataLake metadata more lazy
2 parents 2e344cb + 1f7dc2e commit 319e424

File tree

5 files changed

+23
-13
lines changed

5 files changed

+23
-13
lines changed

src/Disks/ObjectStorages/IObjectStorage.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,14 @@ WriteSettings IObjectStorage::patchSettings(const WriteSettings & write_settings
9797
return write_settings;
9898
}
9999

100+
101+
void RelativePathWithMetadata::loadMetadata(ObjectStoragePtr object_storage)
102+
{
103+
if (!metadata)
104+
{
105+
const auto & path = isArchive() ? getPathToArchive() : getPath();
106+
metadata = object_storage->tryGetObjectMetadata(path);
107+
}
108+
}
109+
100110
}

src/Disks/ObjectStorages/IObjectStorage.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ struct RelativePathWithMetadata
8383
virtual bool isArchive() const { return false; }
8484
virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
8585
virtual size_t fileSizeInArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
86+
87+
void loadMetadata(ObjectStoragePtr object_storage);
8688
};
8789

8890
struct ObjectKeyWithMetadata

src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,16 @@ class KeysIterator : public IObjectIterator
3434
return nullptr;
3535

3636
auto key = data_files[current_index];
37-
auto object_metadata = object_storage->getObjectMetadata(key);
3837

3938
if (callback)
40-
callback(FileProgress(0, object_metadata.size_bytes));
39+
{
40+
/// Too expencive to load size for metadata always
41+
/// because it requires API call to external storage.
42+
/// In many cases only keys are needed.
43+
callback(FileProgress(0, 1));
44+
}
4145

42-
return std::make_shared<ObjectInfo>(key, std::move(object_metadata));
46+
return std::make_shared<ObjectInfo>(key, std::nullopt);
4347
}
4448
}
4549

src/Storages/ObjectStorage/ReadBufferIterator.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,7 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
7474
const auto & object_info = (*it);
7575
auto get_last_mod_time = [&] -> std::optional<time_t>
7676
{
77-
const auto & path = object_info->isArchive() ? object_info->getPathToArchive() : object_info->getPath();
78-
if (!object_info->metadata)
79-
object_info->metadata = object_storage->tryGetObjectMetadata(path);
80-
77+
object_info->loadMetadata(object_storage);
8178
return object_info->metadata
8279
? std::optional<time_t>(object_info->metadata->last_modified.epochTime())
8380
: std::nullopt;
@@ -149,7 +146,6 @@ std::unique_ptr<ReadBuffer> ReadBufferIterator::recreateLastReadBuffer()
149146
{
150147
auto context = getContext();
151148

152-
const auto & path = current_object_info->isArchive() ? current_object_info->getPathToArchive() : current_object_info->getPath();
153149
auto impl = StorageObjectStorageSource::createReadBuffer(*current_object_info, object_storage, context, getLogger("ReadBufferIterator"));
154150

155151
const auto compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method);
@@ -248,6 +244,8 @@ ReadBufferIterator::Data ReadBufferIterator::next()
248244
prev_read_keys_size = read_keys.size();
249245
}
250246

247+
current_object_info->loadMetadata(object_storage);
248+
251249
if (query_settings.skip_empty_files
252250
&& current_object_info->metadata && current_object_info->metadata->size_bytes == 0)
253251
continue;

src/Storages/ObjectStorage/StorageObjectStorageSource.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -389,11 +389,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
389389
if (!object_info || object_info->getPath().empty())
390390
return {};
391391

392-
if (!object_info->metadata)
393-
{
394-
const auto & path = object_info->isArchive() ? object_info->getPathToArchive() : object_info->getPath();
395-
object_info->metadata = object_storage->getObjectMetadata(path);
396-
}
392+
object_info->loadMetadata(object_storage);
397393
}
398394
while (query_settings.skip_empty_files && object_info->metadata->size_bytes == 0);
399395

0 commit comments

Comments
 (0)