@@ -1023,10 +1023,24 @@ GetFieldDatasFromStorageV2(std::vector<std::vector<std::string>>& remote_files,
1023
1023
AssertInfo (remote_files.size () > 0 , " remote files size is 0" );
1024
1024
std::vector<FieldDataPtr> field_data_list;
1025
1025
1026
+ // remote files might not followed the sequence of column group id,
1027
+ // so we need to put into map<column_group_id, remote_chunk_files>
1028
+ std::unordered_map<int64_t , std::vector<std::string>> column_group_files;
1026
1029
for (int i = 0 ; i < remote_files.size (); i++) {
1027
1030
auto & remote_chunk_files = remote_files[i];
1028
1031
AssertInfo (remote_chunk_files.size () > 0 , " remote files size is 0" );
1029
1032
1033
+ // find second last of / to get group_id
1034
+ std::string path = remote_chunk_files[0 ];
1035
+ size_t last_slash = path.find_last_of (" /" );
1036
+ size_t second_last_slash = path.find_last_of (" /" , last_slash - 1 );
1037
+ int64_t group_id = std::stol (path.substr (
1038
+ second_last_slash + 1 , last_slash - second_last_slash - 1 ));
1039
+
1040
+ column_group_files[group_id] = remote_chunk_files;
1041
+ }
1042
+
1043
+ for (auto & [column_group_id, remote_chunk_files] : column_group_files) {
1030
1044
auto fs = milvus_storage::ArrowFileSystemSingleton::GetInstance ()
1031
1045
.GetArrowFileSystem ();
1032
1046
// read first file to get path and column offset of the field id
@@ -1045,9 +1059,9 @@ GetFieldDatasFromStorageV2(std::vector<std::vector<std::string>>& remote_files,
1045
1059
AssertInfo (column_offset.path_index < remote_files.size (),
1046
1060
" column offset path index {} is out of range" ,
1047
1061
column_offset.path_index );
1048
- if (column_offset.path_index != i ) {
1062
+ if (column_offset.path_index != column_group_id ) {
1049
1063
LOG_INFO (" Skip group id {} since target field shall be in group {}" ,
1050
- i ,
1064
+ column_group_id ,
1051
1065
column_offset.path_index );
1052
1066
continue ;
1053
1067
}
0 commit comments