@@ -15,71 +15,20 @@ limitations under the License.
1515*/
1616
1717use std:: collections:: HashMap ;
18- use std:: sync:: Arc ;
1918
2019use arrow:: array:: RecordBatch ;
2120use async_trait:: async_trait;
22- use object_store:: aws:: AmazonS3Builder ;
23- use object_store:: path:: Path as ObjectPath ;
24- use object_store:: { ObjectStore , PutPayload } ;
21+ use object_store:: PutPayload ;
2522use parquet:: arrow:: ArrowWriter ;
2623use parquet:: basic:: Compression ;
2724use parquet:: file:: properties:: WriterProperties ;
2825
29- use crate :: config :: TargetConfig ;
26+ use crate :: storage :: s3 :: S3Storage ;
3027
3128use super :: { Target , WriteResult } ;
3229
33- #[ derive( Clone ) ]
34- pub struct S3Target {
35- store : Arc < dyn ObjectStore > ,
36- bucket : String ,
37- prefix : String ,
38- table_format : String ,
39- executor_instance_type : String ,
40- region : Option < String > ,
41- }
42-
43- impl S3Target {
44- pub fn new ( config : & TargetConfig ) -> anyhow:: Result < Self > {
45- let mut builder = AmazonS3Builder :: from_env ( ) . with_bucket_name ( & config. bucket ) ;
46-
47- if let Some ( region) = & config. region {
48- tracing:: info!( "S3 Target with region: {region}" ) ;
49- builder = builder. with_region ( region) ;
50- }
51- if let Some ( endpoint) = & config. endpoint
52- && !endpoint. is_empty ( )
53- {
54- builder = builder. with_endpoint ( endpoint) ;
55- if endpoint. starts_with ( "http://" ) {
56- builder = builder. with_allow_http ( true ) ;
57- }
58- }
59-
60- let store = Arc :: new ( builder. build ( ) ?) ;
61- Ok ( Self {
62- store,
63- bucket : config. bucket . clone ( ) ,
64- prefix : config. prefix . clone ( ) ,
65- table_format : config. table_format . to_string ( ) ,
66- executor_instance_type : config. executor_instance_type . clone ( ) ,
67- region : config. region . clone ( ) ,
68- } )
69- }
70-
71- /// Returns the S3 URI for a given table name (e.g. `s3://bucket/prefix/customer/`).
72- pub fn table_s3_path ( & self , table_name : & str ) -> String {
73- if self . prefix . is_empty ( ) {
74- format ! ( "s3://{}/{table_name}/" , self . bucket)
75- } else {
76- format ! ( "s3://{}/{}/{table_name}/" , self . bucket, self . prefix)
77- }
78- }
79- }
80-
8130#[ async_trait]
82- impl Target for S3Target {
31+ impl Target for S3Storage {
8332 fn expected_files ( & self , table_name : & str , batch_ids : & [ u64 ] ) -> Vec < String > {
8433 batch_ids
8534 . iter ( )
@@ -110,14 +59,6 @@ impl Target for S3Target {
11059 "file_format" . to_string ( ) ,
11160 serde_json:: Value :: String ( "parquet" . to_string ( ) ) ,
11261 ) ;
113- params. insert (
114- "table_format" . to_string ( ) ,
115- serde_json:: Value :: String ( self . table_format . clone ( ) ) ,
116- ) ;
117- params. insert (
118- "executor_instance_type" . to_string ( ) ,
119- serde_json:: Value :: String ( self . executor_instance_type . clone ( ) ) ,
120- ) ;
12162
12263 if let Some ( region) = & self . region {
12364 params. insert (
@@ -151,14 +92,7 @@ impl Target for S3Target {
15192 let bytes_written = buf. len ( ) as u64 ;
15293
15394 // Upload to S3 with per-table directory structure
154- let path = if self . prefix . is_empty ( ) {
155- ObjectPath :: from ( format ! ( "{table_name}/batch-{batch_id:06}.parquet" ) )
156- } else {
157- ObjectPath :: from ( format ! (
158- "{}/{table_name}/batch-{batch_id:06}.parquet" ,
159- self . prefix
160- ) )
161- } ;
95+ let path = self . batch_object_path ( table_name, batch_id) ;
16296
16397 self . store . put ( & path, PutPayload :: from ( buf) ) . await ?;
16498
0 commit comments