@@ -73,6 +73,15 @@ def prepare_bases(engines: dict[str, Engine]) -> dict[str, Any]:
73
73
bases = prepare_bases (engines )
74
74
75
75
76
+ class Sessions :
77
+ """Container for all required Phabricator DB sessions."""
78
+
79
+ users = Session (engines ["user" ])
80
+ projects = Session (engines ["project" ])
81
+ repo = Session (engines ["repository" ])
82
+ diff = Session (engines ["differential" ])
83
+
84
+
76
85
@dataclass
77
86
class UserDb :
78
87
User = bases ["user" ].classes .user
@@ -101,41 +110,41 @@ class DiffDb:
101
110
CustomFieldStorage = bases ["differential" ].classes .differential_customfieldstorage
102
111
103
112
104
- def get_last_review_id (revision_phid : str , session_diff : Session ) -> Optional [int ]:
113
+ def get_last_review_id (revision_phid : str , sessions : Sessions ) -> Optional [int ]:
105
114
last_review = (
106
- session_diff .query (DiffDb .Reviewer )
115
+ sessions . diff .query (DiffDb .Reviewer )
107
116
.filter_by (revisionPHID = revision_phid )
108
117
.order_by (desc ("dateModified" ))
109
118
.first ()
110
119
)
111
120
return last_review .id if last_review else None
112
121
113
122
114
- def get_target_repository (repository_phid : str , session_repo : Session ) -> Optional [str ]:
123
+ def get_target_repository (repository_phid : str , sessions : Sessions ) -> Optional [str ]:
115
124
repository = (
116
- session_repo .query (RepoDb .Repository )
125
+ sessions . repo .query (RepoDb .Repository )
117
126
.filter_by (repositoryPHID = repository_phid )
118
127
.first ()
119
128
)
120
129
return repository .uri if repository else None
121
130
122
131
123
- def diff_phid_to_id (diff_phid : Optional [str ], session_diff : Session ) -> Optional [int ]:
132
+ def diff_phid_to_id (diff_phid : Optional [str ], sessions : Sessions ) -> Optional [int ]:
124
133
if diff_phid is None :
125
134
return None
126
135
127
- diff = session_diff .query (DiffDb .Differential ).filter_by (phid = diff_phid ).one ()
136
+ diff = sessions . diff .query (DiffDb .Differential ).filter_by (phid = diff_phid ).one ()
128
137
129
138
return diff .id
130
139
131
140
132
141
def get_diff_id_for_changeset (
133
- changeset_id : Optional [int ], session_diff : Session
142
+ changeset_id : Optional [int ], sessions : Sessions
134
143
) -> Optional [int ]:
135
144
if changeset_id is None :
136
145
return None
137
146
138
- changeset = session_diff .query (DiffDb .Changeset ).filter_by (id = changeset_id ).one ()
147
+ changeset = sessions . diff .query (DiffDb .Changeset ).filter_by (id = changeset_id ).one ()
139
148
140
149
return changeset .diffID
141
150
@@ -160,12 +169,12 @@ class PhabricatorEdgeConstant(IntEnum):
160
169
161
170
162
171
def get_revision_projects (
163
- revision : Any , session_diff : Session , projects_query : Any
172
+ revision : Any , sessions : Sessions , projects_query : Any
164
173
) -> list [str ]:
165
174
"""Return the project tags associated with a revision."""
166
175
# Get all edges between the revision and a project.
167
176
edge_query_result = (
168
- session_diff .query (DiffDb .Edges )
177
+ sessions . diff .query (DiffDb .Edges )
169
178
.filter (
170
179
DiffDb .Edges .src == revision .phid ,
171
180
DiffDb .Edges .type == PhabricatorEdgeConstant .OBJECT_HAS_PROJECT .value ,
@@ -188,7 +197,7 @@ def get_stack_size(
188
197
bug_id : Optional [int ],
189
198
all_revisions : Any ,
190
199
bug_id_query : Any ,
191
- session_diff : Session ,
200
+ sessions : Sessions ,
192
201
) -> int :
193
202
# The stack size is always 1 for stacks without a bug ID.
194
203
if not bug_id :
@@ -200,7 +209,7 @@ def get_stack_size(
200
209
while neighbors :
201
210
# Query for all edges related to the current set of neighbors.
202
211
edge_query_result = (
203
- session_diff .query (DiffDb .Edges )
212
+ sessions . diff .query (DiffDb .Edges )
204
213
.filter (
205
214
or_ (DiffDb .Edges .src .in_ (neighbors ), DiffDb .Edges .dst .in_ (neighbors )),
206
215
DiffDb .Edges .type .in_ (
@@ -238,18 +247,18 @@ def get_stack_size(
238
247
return len (stack )
239
248
240
249
241
- def get_user_name (author_phid : str , session_users : Session ) -> Optional [str ]:
250
+ def get_user_name (author_phid : str , sessions : Sessions ) -> Optional [str ]:
242
251
try :
243
- user = session_users .query (UserDb .User ).filter_by (phid = author_phid ).one ()
252
+ user = sessions . users .query (UserDb .User ).filter_by (phid = author_phid ).one ()
244
253
return user .userName
245
254
except NoResultFound :
246
255
return None
247
256
248
257
249
- def get_user_email (author_phid : str , session_users : Session ) -> Optional [str ]:
258
+ def get_user_email (author_phid : str , sessions : Sessions ) -> Optional [str ]:
250
259
try :
251
260
user_email = (
252
- session_users .query (UserDb .UserEmail )
261
+ sessions . users .query (UserDb .UserEmail )
253
262
.filter_by (userPHID = author_phid , isPrimary = 1 )
254
263
.one ()
255
264
)
@@ -260,28 +269,26 @@ def get_user_email(author_phid: str, session_users: Session) -> Optional[str]:
260
269
261
270
def get_review_requests (
262
271
revision : DiffDb .Revision ,
263
- session_diff : Session ,
264
- session_projects : Session ,
265
- session_users : Session ,
272
+ sessions : Sessions ,
266
273
) -> tuple [list [dict ], Optional [int ]]:
267
274
review_requests = []
268
275
date_approved = None
269
276
270
- for review in session_diff .query (DiffDb .Reviewer ).filter_by (
277
+ for review in sessions . diff .query (DiffDb .Reviewer ).filter_by (
271
278
revisionPHID = revision .phid
272
279
):
273
280
is_reviewer_group = review .reviewerPHID .startswith (b"PHID-PROJ-" )
274
281
if is_reviewer_group :
275
282
reviewer = (
276
- session_projects .query (ProjectDb .Project )
283
+ sessions . projects .query (ProjectDb .Project )
277
284
.filter_by (phid = review .reviewerPHID )
278
285
.one ()
279
286
)
280
287
reviewer_username = reviewer .name
281
288
reviewer_email = None
282
289
else :
283
- reviewer_username = get_user_name (review .reviewerPHID , session_users )
284
- reviewer_email = get_user_email (review .reviewerPHID , session_users )
290
+ reviewer_username = get_user_name (review .reviewerPHID , sessions )
291
+ reviewer_email = get_user_email (review .reviewerPHID , sessions )
285
292
286
293
# Set `date_approved` as the latest `accepted` review modified time.
287
294
if review .reviewerStatus == "accepted" and (
@@ -298,11 +305,9 @@ def get_review_requests(
298
305
"date_created" : review .dateCreated ,
299
306
"date_modified" : review .dateModified ,
300
307
"status" : review .reviewerStatus ,
301
- "last_action_diff_id" : diff_phid_to_id (
302
- review .lastActionDiffPHID , session_diff
303
- ),
308
+ "last_action_diff_id" : diff_phid_to_id (review .lastActionDiffPHID , sessions ),
304
309
"last_comment_diff_id" : diff_phid_to_id (
305
- review .lastCommentDiffPHID , session_diff
310
+ review .lastCommentDiffPHID , sessions
306
311
),
307
312
}
308
313
@@ -313,13 +318,12 @@ def get_review_requests(
313
318
314
319
def get_diffs_changesets (
315
320
revision : DiffDb .Revision ,
316
- session_diff : Session ,
317
- session_users : Session ,
321
+ sessions : Sessions ,
318
322
) -> tuple [list [dict ], list [dict ], Optional [int ]]:
319
323
diffs = []
320
324
changesets = []
321
325
date_landed = None
322
- for diff in session_diff .query (DiffDb .Differential ).filter_by (
326
+ for diff in sessions . diff .query (DiffDb .Differential ).filter_by (
323
327
revisionID = revision .id
324
328
):
325
329
if diff .creationMethod == "commit" :
@@ -337,21 +341,21 @@ def get_diffs_changesets(
337
341
"diff_id" : diff .id ,
338
342
"revision_id" : revision .id ,
339
343
"date_created" : diff .dateCreated ,
340
- "author_email" : get_user_email (diff .authorPHID , session_users ),
341
- "author_username" : get_user_name (diff .authorPHID , session_users ),
344
+ "author_email" : get_user_email (diff .authorPHID , sessions ),
345
+ "author_username" : get_user_name (diff .authorPHID , sessions ),
342
346
}
343
347
344
348
diffs .append (diff_obj )
345
- changesets .extend (get_changesets (revision , diff , session_diff ))
349
+ changesets .extend (get_changesets (revision , diff , sessions ))
346
350
347
351
return diffs , changesets , date_landed
348
352
349
353
350
354
def get_changesets (
351
- revision : DiffDb .Revision , diff : DiffDb .Differential , session_diff : Session
355
+ revision : DiffDb .Revision , diff : DiffDb .Differential , sessions : Sessions
352
356
) -> list [dict ]:
353
357
changesets = []
354
- for changeset in session_diff .query (DiffDb .Changeset ).filter_by (diffID = diff .id ):
358
+ for changeset in sessions . diff .query (DiffDb .Changeset ).filter_by (diffID = diff .id ):
355
359
changeset_obj = {
356
360
"revision_id" : revision .id ,
357
361
"diff_id" : diff .id ,
@@ -366,14 +370,12 @@ def get_changesets(
366
370
return changesets
367
371
368
372
369
- def get_comments (
370
- revision : DiffDb .Revision , session_diff : Session , session_users : Session
371
- ) -> list [dict ]:
373
+ def get_comments (revision : DiffDb .Revision , sessions : Sessions ) -> list [dict ]:
372
374
comments = []
373
375
374
376
# Query comments that are left on revisions but not specific diffs/changesets.
375
377
comment_transaction_phids_query = (
376
- session_diff .query (DiffDb .Transaction )
378
+ sessions . diff .query (DiffDb .Transaction )
377
379
.with_entities (DiffDb .Transaction .commentPHID )
378
380
.filter_by (
379
381
objectPHID = revision .phid ,
@@ -384,7 +386,7 @@ def get_comments(
384
386
385
387
comment_transaction_phids = [row [0 ] for row in comment_transaction_phids_query ]
386
388
387
- for comment in session_diff .query (DiffDb .TransactionComment ).filter (
389
+ for comment in sessions . diff .query (DiffDb .TransactionComment ).filter (
388
390
# Query all TransactionComments that match our revision PHID
389
391
# or the non-diff comments.
390
392
(DiffDb .TransactionComment .revisionPHID == revision .phid )
@@ -398,11 +400,11 @@ def get_comments(
398
400
399
401
comment_obj = {
400
402
"revision_id" : revision .id ,
401
- "diff_id" : get_diff_id_for_changeset (comment .changesetID , session_diff ),
403
+ "diff_id" : get_diff_id_for_changeset (comment .changesetID , sessions ),
402
404
"changeset_id" : comment .changesetID ,
403
405
"comment_id" : comment .id ,
404
- "author_email" : get_user_email (comment .authorPHID , session_users ),
405
- "author_username" : get_user_name (comment .authorPHID , session_users ),
406
+ "author_email" : get_user_email (comment .authorPHID , sessions ),
407
+ "author_username" : get_user_name (comment .authorPHID , sessions ),
406
408
"date_created" : comment .dateCreated ,
407
409
"character_count" : len (comment .content ),
408
410
"is_suggestion" : is_suggestion ,
@@ -418,8 +420,7 @@ def get_revision(
418
420
bug_id : Optional [int ],
419
421
date_approved : Optional [int ],
420
422
date_landed : Optional [int ],
421
- session_diff : Session ,
422
- session_repo : Session ,
423
+ sessions : Sessions ,
423
424
all_revisions : Any ,
424
425
bug_id_query : Any ,
425
426
projects_query : Any ,
@@ -433,15 +434,13 @@ def get_revision(
433
434
"date_created" : revision .dateCreated ,
434
435
"date_modified" : revision .dateModified ,
435
436
"date_landed" : date_landed ,
436
- "last_review_id" : get_last_review_id (revision .phid , session_diff ),
437
+ "last_review_id" : get_last_review_id (revision .phid , sessions ),
437
438
"current_status" : revision .status ,
438
- "target_repository" : get_target_repository (
439
- revision .repositoryPHID , session_repo
440
- ),
439
+ "target_repository" : get_target_repository (revision .repositoryPHID , sessions ),
441
440
"stack_size" : get_stack_size (
442
- revision , bug_id , all_revisions , bug_id_query , session_diff
441
+ revision , bug_id , all_revisions , bug_id_query , sessions
443
442
),
444
- "project_tags" : get_revision_projects (revision , session_diff , projects_query ),
443
+ "project_tags" : get_revision_projects (revision , sessions , projects_query ),
445
444
}
446
445
447
446
@@ -610,10 +609,7 @@ def process():
610
609
611
610
logging .info (f"Starting Phab-ETL with timestamp { now } ." )
612
611
613
- session_users = Session (engines ["user" ])
614
- session_projects = Session (engines ["project" ])
615
- session_repo = Session (engines ["repository" ])
616
- session_diff = Session (engines ["differential" ])
612
+ sessions = Sessions ()
617
613
618
614
bq_client = bigquery .Client ()
619
615
@@ -622,12 +618,12 @@ def process():
622
618
623
619
time_queries = get_time_queries (now , bq_client )
624
620
625
- updated_revisions = session_diff .query (DiffDb .Revision ).filter (* time_queries )
626
- all_revisions = session_diff .query (DiffDb .Revision )
621
+ updated_revisions = sessions . diff .query (DiffDb .Revision ).filter (* time_queries )
622
+ all_revisions = sessions . diff .query (DiffDb .Revision )
627
623
628
- projects_query = session_projects .query (ProjectDb .Project )
624
+ projects_query = sessions . projects .query (ProjectDb .Project )
629
625
630
- bug_id_query = session_diff .query (DiffDb .CustomFieldStorage ).filter (
626
+ bug_id_query = sessions . diff .query (DiffDb .CustomFieldStorage ).filter (
631
627
# TODO I got this value from the DB, what is it?
632
628
DiffDb .CustomFieldStorage .fieldIndex
633
629
== b"zdMFYM6423ua"
@@ -644,27 +640,23 @@ def process():
644
640
645
641
diffs , changesets , date_landed = get_diffs_changesets (
646
642
revision ,
647
- session_diff ,
648
- session_users ,
643
+ sessions ,
649
644
)
650
645
651
- review_requests , date_approved = get_review_requests (
652
- revision , session_diff , session_projects , session_users
653
- )
646
+ review_requests , date_approved = get_review_requests (revision , sessions )
654
647
655
648
revision_json = get_revision (
656
649
revision ,
657
650
bug_id ,
658
651
date_approved ,
659
652
date_landed ,
660
- session_diff ,
661
- session_repo ,
653
+ sessions ,
662
654
all_revisions ,
663
655
bug_id_query ,
664
656
projects_query ,
665
657
)
666
658
667
- comments = get_comments (revision , session_diff , session_users )
659
+ comments = get_comments (revision , sessions )
668
660
669
661
phab_gathering_time = round (
670
662
time .perf_counter () - phab_querying_start , ndigits = 2
0 commit comments