57
57
log = logging .getLogger (__name__ )
58
58
59
59
60
- @attrs .frozen (auto_attribs = True , kw_only = True )
60
+ @attrs .frozen (kw_only = True )
61
61
class MirrorController (ActionController [MirrorAction ]):
62
62
schema_url_func : SchemaUrlFunc
63
63
@@ -107,31 +107,39 @@ def mirror_source(self, catalog: CatalogName, source_json: JSON):
107
107
plugin = self .repository_plugin (catalog )
108
108
source = plugin .source_ref_cls .from_json (source_json )
109
109
source = plugin .partition_source_for_mirroring (catalog , source )
110
+ prefix = source .spec .prefix
111
+ log .info ('Queueing %d partitions of source %r in catalog %r' ,
112
+ prefix .num_partitions , str (source .spec ), catalog )
110
113
111
- def message (prefix : str ) -> SQSMessage :
112
- log .info ('Mirroring files in partition %r of source %r from catalog %r' ,
113
- prefix , str (source .spec ), catalog )
114
- return self .mirror_partition_message (catalog , source , prefix )
114
+ def message (partition : str ) -> SQSMessage :
115
+ log .debug ('Queueing partition %r' , partition )
116
+ return self .mirror_partition_message (catalog , source , partition )
115
117
116
- messages = map (message , source . spec . prefix .partition_prefixes ())
118
+ messages = map (message , prefix .partition_prefixes ())
117
119
self .client .queue_mirror_messages (messages )
118
120
119
- def mirror_partition (self , catalog : CatalogName , source_json : JSON , prefix : str ):
121
+ def mirror_partition (self ,
122
+ catalog : CatalogName ,
123
+ source_json : JSON ,
124
+ prefix : str
125
+ ):
120
126
plugin = self .repository_plugin (catalog )
121
127
source = plugin .source_ref_cls .from_json (source_json )
122
128
already_mirrored = self .service .list_info_objects (catalog , prefix )
129
+ files = plugin .list_files (source , prefix )
123
130
124
131
def messages () -> Iterable [SQSMessage ]:
125
- for file in plugin . list_files ( source , prefix ) :
132
+ for file in files :
126
133
info_key = self .service .info_object_key (file )
127
134
if info_key in already_mirrored :
128
- log .info ('Not mirroring file %r because info object already exists at %r' ,
129
- file .uuid , info_key )
135
+ log .debug ('Not queueing file %r because info object already exists' , file )
130
136
else :
131
- log .info ( 'Mirroring file %r' , file . uuid )
137
+ log .debug ( 'Queueing file %r' , file )
132
138
yield self .mirror_file_message (catalog , source , file )
133
139
134
- self .client .queue_mirror_messages (messages ())
140
+ message_count = self .client .queue_mirror_messages (messages ())
141
+ log .info ('Queued %d/%d files in partition %r of source %r in catalog %r' ,
142
+ message_count , len (files ), prefix , str (source ), catalog )
135
143
136
144
def mirror_file (self ,
137
145
catalog : CatalogName ,
@@ -145,26 +153,36 @@ def mirror_file(self,
145
153
and not config .deployment .is_unit_test
146
154
and catalog not in config .integration_test_catalogs )
147
155
if file_is_large and not deployment_is_stable :
148
- log .info ('Not mirroring file %r (%d bytes) to save cost' ,
149
- file .uuid , file .size )
156
+ log .info ('Not mirroring file to save cost: %r' , file )
150
157
else :
151
158
# Ensure we test with multiple parts on lower deployments
152
159
part_size = FilePart .default_size if deployment_is_stable else FilePart .min_size
153
160
if file .size <= part_size :
154
- log .info ('Mirroring file %r via standard upload' , file . uuid )
161
+ log .info ('Mirroring file via standard upload: %r ' , file )
155
162
self .service .mirror_file (catalog , file )
156
- log .info ('Successfully mirrored file %r via standard upload' , file . uuid )
163
+ log .info ('Successfully mirrored file via standard upload: %r ' , file )
157
164
else :
158
- log .info ('Mirroring file %r via multi-part upload' , file . uuid )
165
+ log .info ('Mirroring file via multi-part upload: %r ' , file )
159
166
_ , digest_type = file .digest ()
160
167
hasher = get_resumable_hasher (digest_type )
161
168
upload_id = self .service .begin_mirroring_file (catalog , file )
162
169
first_part = FilePart .first (file , part_size )
163
- etag = self .service .mirror_file_part (catalog , file , first_part , upload_id , hasher )
170
+ log .info ('Uploading part #%d of file %r' , first_part .index , file )
171
+ etag = self .service .mirror_file_part (catalog ,
172
+ file ,
173
+ first_part ,
174
+ upload_id ,
175
+ hasher )
164
176
next_part = first_part .next (file )
165
177
assert next_part is not None
166
- messages = [self .mirror_part_message (catalog , file , next_part , upload_id , [etag ], hasher )]
167
- self .client .queue_mirror_messages (messages )
178
+ log .info ('Queueing part #%d of file %r' , next_part .index , file )
179
+ message = self .mirror_part_message (catalog ,
180
+ file ,
181
+ next_part ,
182
+ upload_id ,
183
+ [etag ],
184
+ hasher )
185
+ self .client .queue_mirror_messages ([message ])
168
186
169
187
def mirror_file_part (self ,
170
188
catalog : CatalogName ,
@@ -177,17 +195,19 @@ def mirror_file_part(self,
177
195
file = self .load_file (catalog , file_json )
178
196
part = FilePart .from_json (part_json )
179
197
hasher = hasher_from_str (hasher_data )
198
+ log .info ('Uploading part #%d of file %r' , part .index , file )
180
199
etag = self .service .mirror_file_part (catalog , file , part , upload_id , hasher )
181
200
etags = [* etags , etag ]
182
201
next_part = part .next (file )
183
202
if next_part is None :
184
- log .info ('File %r fully uploaded in %d parts' , file . uuid , len (etags ))
203
+ log .info ('File fully uploaded in %d parts: %r ' , len (etags ), file )
185
204
message = self .finalize_file_message (catalog ,
186
205
file ,
187
206
upload_id ,
188
207
etags ,
189
208
hasher )
190
209
else :
210
+ log .info ('Queueing part #%d of file %r' , next_part .index , file )
191
211
message = self .mirror_part_message (catalog ,
192
212
file ,
193
213
next_part ,
@@ -211,7 +231,7 @@ def finalize_file(self,
211
231
upload_id = upload_id ,
212
232
etags = etags ,
213
233
hasher = hasher )
214
- log .info ('Successfully mirrored file %r via multi-part upload' , file . uuid )
234
+ log .info ('Successfully mirrored file via multi-part upload: %r ' , file )
215
235
216
236
def load_file (self , catalog : CatalogName , file : JSON ) -> File :
217
237
return self .client .metadata_plugin (catalog ).file_class .from_json (file )
0 commit comments