@@ -16,7 +16,8 @@ use std::collections::HashMap;
16
16
use std:: collections:: HashSet ;
17
17
use std:: sync:: Arc ;
18
18
19
- use databend_common_base:: runtime:: execute_futures_in_parallel;
19
+ use databend_common_base:: base:: tokio:: sync:: Semaphore ;
20
+ use databend_common_base:: runtime:: Runtime ;
20
21
use databend_common_catalog:: plan:: block_idx_in_segment;
21
22
use databend_common_catalog:: plan:: split_prefix;
22
23
use databend_common_catalog:: plan:: split_row_id;
@@ -32,6 +33,7 @@ use databend_storages_common_cache::LoadParams;
32
33
use databend_storages_common_io:: ReadSettings ;
33
34
use databend_storages_common_table_meta:: meta:: BlockMeta ;
34
35
use databend_storages_common_table_meta:: meta:: TableSnapshot ;
36
+ use futures_util:: future;
35
37
use itertools:: Itertools ;
36
38
37
39
use super :: fuse_rows_fetcher:: RowsFetcher ;
@@ -56,6 +58,9 @@ pub(super) struct ParquetRowsFetcher<const BLOCKING_IO: bool> {
56
58
57
59
// To control the parallelism of fetching blocks.
58
60
max_threads : usize ,
61
+
62
+ semaphore : Arc < Semaphore > ,
63
+ runtime : Arc < Runtime > ,
59
64
}
60
65
61
66
#[ async_trait:: async_trait]
@@ -125,19 +130,25 @@ impl<const BLOCKING_IO: bool> RowsFetcher for ParquetRowsFetcher<BLOCKING_IO> {
125
130
begin = end;
126
131
}
127
132
128
- let num_task = tasks. len ( ) ;
129
- let blocks = execute_futures_in_parallel (
130
- tasks,
131
- num_task,
132
- num_task * 2 ,
133
- "parqeut rows fetch" . to_string ( ) ,
134
- )
135
- . await ?
136
- . into_iter ( )
137
- . collect :: < Result < Vec < _ > > > ( ) ?
138
- . into_iter ( )
139
- . flatten ( )
140
- . collect :: < Vec < _ > > ( ) ;
133
+ let tasks = tasks. into_iter ( ) . map ( |v| {
134
+ |permit| async {
135
+ let r = v. await ;
136
+ drop ( permit) ;
137
+ r
138
+ }
139
+ } ) ;
140
+ let join_handlers = self
141
+ . runtime
142
+ . try_spawn_batch_with_owned_semaphore ( self . semaphore . clone ( ) , tasks)
143
+ . await ?;
144
+
145
+ let joint = future:: try_join_all ( join_handlers) . await ?;
146
+ let blocks = joint
147
+ . into_iter ( )
148
+ . collect :: < Result < Vec < _ > > > ( ) ?
149
+ . into_iter ( )
150
+ . flatten ( )
151
+ . collect :: < Vec < _ > > ( ) ;
141
152
// Take result rows from blocks.
142
153
let indices = row_set
143
154
. iter ( )
@@ -171,6 +182,8 @@ impl<const BLOCKING_IO: bool> ParquetRowsFetcher<BLOCKING_IO> {
171
182
reader : Arc < BlockReader > ,
172
183
settings : ReadSettings ,
173
184
max_threads : usize ,
185
+ semaphore : Arc < Semaphore > ,
186
+ runtime : Arc < Runtime > ,
174
187
) -> Self {
175
188
let schema = table. schema ( ) ;
176
189
let segment_reader =
@@ -186,6 +199,8 @@ impl<const BLOCKING_IO: bool> ParquetRowsFetcher<BLOCKING_IO> {
186
199
part_map : HashMap :: new ( ) ,
187
200
segment_blocks_cache : HashMap :: new ( ) ,
188
201
max_threads,
202
+ semaphore,
203
+ runtime,
189
204
}
190
205
}
191
206
0 commit comments