Skip to content

Commit 9617420

Browse files
committed
phab: create a Sessions class to hold the sessions
1 parent 2b04f9a commit 9617420

File tree

1 file changed

+59
-67
lines changed

1 file changed

+59
-67
lines changed

stats.py

Lines changed: 59 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,15 @@ def prepare_bases(engines: dict[str, Engine]) -> dict[str, Any]:
7373
bases = prepare_bases(engines)
7474

7575

76+
class Sessions:
77+
"""Container for all required Phabricator DB sessions."""
78+
79+
users = Session(engines["user"])
80+
projects = Session(engines["project"])
81+
repo = Session(engines["repository"])
82+
diff = Session(engines["differential"])
83+
84+
7685
@dataclass
7786
class UserDb:
7887
User = bases["user"].classes.user
@@ -101,41 +110,41 @@ class DiffDb:
101110
CustomFieldStorage = bases["differential"].classes.differential_customfieldstorage
102111

103112

104-
def get_last_review_id(revision_phid: str, session_diff: Session) -> Optional[int]:
113+
def get_last_review_id(revision_phid: str, sessions: Sessions) -> Optional[int]:
105114
last_review = (
106-
session_diff.query(DiffDb.Reviewer)
115+
sessions.diff.query(DiffDb.Reviewer)
107116
.filter_by(revisionPHID=revision_phid)
108117
.order_by(desc("dateModified"))
109118
.first()
110119
)
111120
return last_review.id if last_review else None
112121

113122

114-
def get_target_repository(repository_phid: str, session_repo: Session) -> Optional[str]:
123+
def get_target_repository(repository_phid: str, sessions: Sessions) -> Optional[str]:
115124
repository = (
116-
session_repo.query(RepoDb.Repository)
125+
sessions.repo.query(RepoDb.Repository)
117126
.filter_by(repositoryPHID=repository_phid)
118127
.first()
119128
)
120129
return repository.uri if repository else None
121130

122131

123-
def diff_phid_to_id(diff_phid: Optional[str], session_diff: Session) -> Optional[int]:
132+
def diff_phid_to_id(diff_phid: Optional[str], sessions: Sessions) -> Optional[int]:
124133
if diff_phid is None:
125134
return None
126135

127-
diff = session_diff.query(DiffDb.Differential).filter_by(phid=diff_phid).one()
136+
diff = sessions.diff.query(DiffDb.Differential).filter_by(phid=diff_phid).one()
128137

129138
return diff.id
130139

131140

132141
def get_diff_id_for_changeset(
133-
changeset_id: Optional[int], session_diff: Session
142+
changeset_id: Optional[int], sessions: Sessions
134143
) -> Optional[int]:
135144
if changeset_id is None:
136145
return None
137146

138-
changeset = session_diff.query(DiffDb.Changeset).filter_by(id=changeset_id).one()
147+
changeset = sessions.diff.query(DiffDb.Changeset).filter_by(id=changeset_id).one()
139148

140149
return changeset.diffID
141150

@@ -160,12 +169,12 @@ class PhabricatorEdgeConstant(IntEnum):
160169

161170

162171
def get_revision_projects(
163-
revision: Any, session_diff: Session, projects_query: Any
172+
revision: Any, sessions: Sessions, projects_query: Any
164173
) -> list[str]:
165174
"""Return the project tags associated with a revision."""
166175
# Get all edges between the revision and a project.
167176
edge_query_result = (
168-
session_diff.query(DiffDb.Edges)
177+
sessions.diff.query(DiffDb.Edges)
169178
.filter(
170179
DiffDb.Edges.src == revision.phid,
171180
DiffDb.Edges.type == PhabricatorEdgeConstant.OBJECT_HAS_PROJECT.value,
@@ -188,7 +197,7 @@ def get_stack_size(
188197
bug_id: Optional[int],
189198
all_revisions: Any,
190199
bug_id_query: Any,
191-
session_diff: Session,
200+
sessions: Sessions,
192201
) -> int:
193202
# The stack size is always 1 for stacks without a bug ID.
194203
if not bug_id:
@@ -200,7 +209,7 @@ def get_stack_size(
200209
while neighbors:
201210
# Query for all edges related to the current set of neighbors.
202211
edge_query_result = (
203-
session_diff.query(DiffDb.Edges)
212+
sessions.diff.query(DiffDb.Edges)
204213
.filter(
205214
or_(DiffDb.Edges.src.in_(neighbors), DiffDb.Edges.dst.in_(neighbors)),
206215
DiffDb.Edges.type.in_(
@@ -238,18 +247,18 @@ def get_stack_size(
238247
return len(stack)
239248

240249

241-
def get_user_name(author_phid: str, session_users: Session) -> Optional[str]:
250+
def get_user_name(author_phid: str, sessions: Sessions) -> Optional[str]:
242251
try:
243-
user = session_users.query(UserDb.User).filter_by(phid=author_phid).one()
252+
user = sessions.users.query(UserDb.User).filter_by(phid=author_phid).one()
244253
return user.userName
245254
except NoResultFound:
246255
return None
247256

248257

249-
def get_user_email(author_phid: str, session_users: Session) -> Optional[str]:
258+
def get_user_email(author_phid: str, sessions: Sessions) -> Optional[str]:
250259
try:
251260
user_email = (
252-
session_users.query(UserDb.UserEmail)
261+
sessions.users.query(UserDb.UserEmail)
253262
.filter_by(userPHID=author_phid, isPrimary=1)
254263
.one()
255264
)
@@ -260,28 +269,26 @@ def get_user_email(author_phid: str, session_users: Session) -> Optional[str]:
260269

261270
def get_review_requests(
262271
revision: DiffDb.Revision,
263-
session_diff: Session,
264-
session_projects: Session,
265-
session_users: Session,
272+
sessions: Sessions,
266273
) -> tuple[list[dict], Optional[int]]:
267274
review_requests = []
268275
date_approved = None
269276

270-
for review in session_diff.query(DiffDb.Reviewer).filter_by(
277+
for review in sessions.diff.query(DiffDb.Reviewer).filter_by(
271278
revisionPHID=revision.phid
272279
):
273280
is_reviewer_group = review.reviewerPHID.startswith(b"PHID-PROJ-")
274281
if is_reviewer_group:
275282
reviewer = (
276-
session_projects.query(ProjectDb.Project)
283+
sessions.projects.query(ProjectDb.Project)
277284
.filter_by(phid=review.reviewerPHID)
278285
.one()
279286
)
280287
reviewer_username = reviewer.name
281288
reviewer_email = None
282289
else:
283-
reviewer_username = get_user_name(review.reviewerPHID, session_users)
284-
reviewer_email = get_user_email(review.reviewerPHID, session_users)
290+
reviewer_username = get_user_name(review.reviewerPHID, sessions)
291+
reviewer_email = get_user_email(review.reviewerPHID, sessions)
285292

286293
# Set `date_approved` as the latest `accepted` review modified time.
287294
if review.reviewerStatus == "accepted" and (
@@ -298,11 +305,9 @@ def get_review_requests(
298305
"date_created": review.dateCreated,
299306
"date_modified": review.dateModified,
300307
"status": review.reviewerStatus,
301-
"last_action_diff_id": diff_phid_to_id(
302-
review.lastActionDiffPHID, session_diff
303-
),
308+
"last_action_diff_id": diff_phid_to_id(review.lastActionDiffPHID, sessions),
304309
"last_comment_diff_id": diff_phid_to_id(
305-
review.lastCommentDiffPHID, session_diff
310+
review.lastCommentDiffPHID, sessions
306311
),
307312
}
308313

@@ -313,13 +318,12 @@ def get_review_requests(
313318

314319
def get_diffs_changesets(
315320
revision: DiffDb.Revision,
316-
session_diff: Session,
317-
session_users: Session,
321+
sessions: Sessions,
318322
) -> tuple[list[dict], list[dict], Optional[int]]:
319323
diffs = []
320324
changesets = []
321325
date_landed = None
322-
for diff in session_diff.query(DiffDb.Differential).filter_by(
326+
for diff in sessions.diff.query(DiffDb.Differential).filter_by(
323327
revisionID=revision.id
324328
):
325329
if diff.creationMethod == "commit":
@@ -337,21 +341,21 @@ def get_diffs_changesets(
337341
"diff_id": diff.id,
338342
"revision_id": revision.id,
339343
"date_created": diff.dateCreated,
340-
"author_email": get_user_email(diff.authorPHID, session_users),
341-
"author_username": get_user_name(diff.authorPHID, session_users),
344+
"author_email": get_user_email(diff.authorPHID, sessions),
345+
"author_username": get_user_name(diff.authorPHID, sessions),
342346
}
343347

344348
diffs.append(diff_obj)
345-
changesets.extend(get_changesets(revision, diff, session_diff))
349+
changesets.extend(get_changesets(revision, diff, sessions))
346350

347351
return diffs, changesets, date_landed
348352

349353

350354
def get_changesets(
351-
revision: DiffDb.Revision, diff: DiffDb.Differential, session_diff: Session
355+
revision: DiffDb.Revision, diff: DiffDb.Differential, sessions: Sessions
352356
) -> list[dict]:
353357
changesets = []
354-
for changeset in session_diff.query(DiffDb.Changeset).filter_by(diffID=diff.id):
358+
for changeset in sessions.diff.query(DiffDb.Changeset).filter_by(diffID=diff.id):
355359
changeset_obj = {
356360
"revision_id": revision.id,
357361
"diff_id": diff.id,
@@ -366,14 +370,12 @@ def get_changesets(
366370
return changesets
367371

368372

369-
def get_comments(
370-
revision: DiffDb.Revision, session_diff: Session, session_users: Session
371-
) -> list[dict]:
373+
def get_comments(revision: DiffDb.Revision, sessions: Sessions) -> list[dict]:
372374
comments = []
373375

374376
# Query comments that are left on revisions but not specific diffs/changesets.
375377
comment_transaction_phids_query = (
376-
session_diff.query(DiffDb.Transaction)
378+
sessions.diff.query(DiffDb.Transaction)
377379
.with_entities(DiffDb.Transaction.commentPHID)
378380
.filter_by(
379381
objectPHID=revision.phid,
@@ -384,7 +386,7 @@ def get_comments(
384386

385387
comment_transaction_phids = [row[0] for row in comment_transaction_phids_query]
386388

387-
for comment in session_diff.query(DiffDb.TransactionComment).filter(
389+
for comment in sessions.diff.query(DiffDb.TransactionComment).filter(
388390
# Query all TransactionComments that match our revision PHID
389391
# or the non-diff comments.
390392
(DiffDb.TransactionComment.revisionPHID == revision.phid)
@@ -398,11 +400,11 @@ def get_comments(
398400

399401
comment_obj = {
400402
"revision_id": revision.id,
401-
"diff_id": get_diff_id_for_changeset(comment.changesetID, session_diff),
403+
"diff_id": get_diff_id_for_changeset(comment.changesetID, sessions),
402404
"changeset_id": comment.changesetID,
403405
"comment_id": comment.id,
404-
"author_email": get_user_email(comment.authorPHID, session_users),
405-
"author_username": get_user_name(comment.authorPHID, session_users),
406+
"author_email": get_user_email(comment.authorPHID, sessions),
407+
"author_username": get_user_name(comment.authorPHID, sessions),
406408
"date_created": comment.dateCreated,
407409
"character_count": len(comment.content),
408410
"is_suggestion": is_suggestion,
@@ -418,8 +420,7 @@ def get_revision(
418420
bug_id: Optional[int],
419421
date_approved: Optional[int],
420422
date_landed: Optional[int],
421-
session_diff: Session,
422-
session_repo: Session,
423+
sessions: Sessions,
423424
all_revisions: Any,
424425
bug_id_query: Any,
425426
projects_query: Any,
@@ -433,15 +434,13 @@ def get_revision(
433434
"date_created": revision.dateCreated,
434435
"date_modified": revision.dateModified,
435436
"date_landed": date_landed,
436-
"last_review_id": get_last_review_id(revision.phid, session_diff),
437+
"last_review_id": get_last_review_id(revision.phid, sessions),
437438
"current_status": revision.status,
438-
"target_repository": get_target_repository(
439-
revision.repositoryPHID, session_repo
440-
),
439+
"target_repository": get_target_repository(revision.repositoryPHID, sessions),
441440
"stack_size": get_stack_size(
442-
revision, bug_id, all_revisions, bug_id_query, session_diff
441+
revision, bug_id, all_revisions, bug_id_query, sessions
443442
),
444-
"project_tags": get_revision_projects(revision, session_diff, projects_query),
443+
"project_tags": get_revision_projects(revision, sessions, projects_query),
445444
}
446445

447446

@@ -610,10 +609,7 @@ def process():
610609

611610
logging.info(f"Starting Phab-ETL with timestamp {now}.")
612611

613-
session_users = Session(engines["user"])
614-
session_projects = Session(engines["project"])
615-
session_repo = Session(engines["repository"])
616-
session_diff = Session(engines["differential"])
612+
sessions = Sessions()
617613

618614
bq_client = bigquery.Client()
619615

@@ -622,12 +618,12 @@ def process():
622618

623619
time_queries = get_time_queries(now, bq_client)
624620

625-
updated_revisions = session_diff.query(DiffDb.Revision).filter(*time_queries)
626-
all_revisions = session_diff.query(DiffDb.Revision)
621+
updated_revisions = sessions.diff.query(DiffDb.Revision).filter(*time_queries)
622+
all_revisions = sessions.diff.query(DiffDb.Revision)
627623

628-
projects_query = session_projects.query(ProjectDb.Project)
624+
projects_query = sessions.projects.query(ProjectDb.Project)
629625

630-
bug_id_query = session_diff.query(DiffDb.CustomFieldStorage).filter(
626+
bug_id_query = sessions.diff.query(DiffDb.CustomFieldStorage).filter(
631627
# TODO I got this value from the DB, what is it?
632628
DiffDb.CustomFieldStorage.fieldIndex
633629
== b"zdMFYM6423ua"
@@ -644,27 +640,23 @@ def process():
644640

645641
diffs, changesets, date_landed = get_diffs_changesets(
646642
revision,
647-
session_diff,
648-
session_users,
643+
sessions,
649644
)
650645

651-
review_requests, date_approved = get_review_requests(
652-
revision, session_diff, session_projects, session_users
653-
)
646+
review_requests, date_approved = get_review_requests(revision, sessions)
654647

655648
revision_json = get_revision(
656649
revision,
657650
bug_id,
658651
date_approved,
659652
date_landed,
660-
session_diff,
661-
session_repo,
653+
sessions,
662654
all_revisions,
663655
bug_id_query,
664656
projects_query,
665657
)
666658

667-
comments = get_comments(revision, session_diff, session_users)
659+
comments = get_comments(revision, sessions)
668660

669661
phab_gathering_time = round(
670662
time.perf_counter() - phab_querying_start, ndigits=2

0 commit comments

Comments
 (0)