Skip to content

Commit b9b5546

Browse files
fix: storage v2 get field data with correct column group files (#42107)
related: #39173 Signed-off-by: shaoting-huang <[email protected]>
1 parent c00005b commit b9b5546

File tree

1 file changed

+16
-2
lines changed

1 file changed

+16
-2
lines changed

internal/core/src/storage/Util.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,10 +1023,24 @@ GetFieldDatasFromStorageV2(std::vector<std::vector<std::string>>& remote_files,
10231023
AssertInfo(remote_files.size() > 0, "remote files size is 0");
10241024
std::vector<FieldDataPtr> field_data_list;
10251025

1026+
// remote files might not followed the sequence of column group id,
1027+
// so we need to put into map<column_group_id, remote_chunk_files>
1028+
std::unordered_map<int64_t, std::vector<std::string>> column_group_files;
10261029
for (int i = 0; i < remote_files.size(); i++) {
10271030
auto& remote_chunk_files = remote_files[i];
10281031
AssertInfo(remote_chunk_files.size() > 0, "remote files size is 0");
10291032

1033+
// find second last of / to get group_id
1034+
std::string path = remote_chunk_files[0];
1035+
size_t last_slash = path.find_last_of("/");
1036+
size_t second_last_slash = path.find_last_of("/", last_slash - 1);
1037+
int64_t group_id = std::stol(path.substr(
1038+
second_last_slash + 1, last_slash - second_last_slash - 1));
1039+
1040+
column_group_files[group_id] = remote_chunk_files;
1041+
}
1042+
1043+
for (auto& [column_group_id, remote_chunk_files] : column_group_files) {
10301044
auto fs = milvus_storage::ArrowFileSystemSingleton::GetInstance()
10311045
.GetArrowFileSystem();
10321046
// read first file to get path and column offset of the field id
@@ -1045,9 +1059,9 @@ GetFieldDatasFromStorageV2(std::vector<std::vector<std::string>>& remote_files,
10451059
AssertInfo(column_offset.path_index < remote_files.size(),
10461060
"column offset path index {} is out of range",
10471061
column_offset.path_index);
1048-
if (column_offset.path_index != i) {
1062+
if (column_offset.path_index != column_group_id) {
10491063
LOG_INFO("Skip group id {} since target field shall be in group {}",
1050-
i,
1064+
column_group_id,
10511065
column_offset.path_index);
10521066
continue;
10531067
}

0 commit comments

Comments
 (0)