Skip to content

Commit a7fc1e8

Browse files
committed
Improved temporary output displayed while analyzing a Repository
Added inspection of branches, tags, environments and artifacts Replicating some of the contributor-specific "releases" findings under the contributor too for visibility Added count of Stars to the repository profiling section Also showing under repository a summary on the amount of unreliable commits identified due to potentially erroneous dates
1 parent 68ce8a0 commit a7fc1e8

File tree

1 file changed

+92
-12
lines changed

1 file changed

+92
-12
lines changed

src/gitxray/xrays/repository_xray.py

+92-12
Original file line numberDiff line numberDiff line change
@@ -18,24 +18,87 @@ def run(gx_context, gx_output):
1818
if repository.get('homepage'):
1919
gx_output.r_log(f"Homepage: [{repository.get('homepage')}]", rtype="urls")
2020

21-
print(f"Checking for repository deployments..")
21+
print(f"Checking for repository deployments..", end="")
2222
if repository.get('deployments_url'):
2323
deployments = gh_api.fetch_repository_deployments(repository)
24-
if len(deployments) > 0: gx_output.r_log(f"Deployments available at: [{repository.get('html_url')}/deployments]", rtype="urls")
24+
if len(deployments) > 0: gx_output.r_log(f"{len(deployments)} Deployments available at: [{repository.get('html_url')}/deployments]", rtype="urls")
2525

26+
print(f"\rChecking for repository environments.."+" "*30, end="")
27+
environments = gh_api.fetch_repository_environments(repository)
28+
if environments != None and environments.get('total_count') > 0:
29+
gx_output.r_log(f"{environments.get('total_count')} Environments available at: [{repository.get('url')}/environments]", rtype="urls")
30+
for environment in environments.get('environments'):
31+
gx_output.r_log(f"Environment [{environment.get('name')}] created [{environment.get('created_at')}], updated [{environment.get('updated_at')}]: {environment.get('html_url')}", rtype="environments")
32+
33+
print(f"\rChecking for repository forks.."+" "*30, end="")
2634
if repository.get('forks_count') > 0:
2735
gx_output.r_log(f"Repository has {repository.get('forks_count')} forks: {repository.get('forks_url')}", rtype="profiling")
2836

29-
print(f"Querying about repository action workflows..")
37+
print(f"\rQuerying for repository action workflows.."+" "*30, end="")
3038
workflows = gh_api.fetch_repository_actions_workflows(repository)
3139
if workflows != None and workflows.get('total_count') > 0:
40+
gx_output.r_log(f"{workflows.get('total_count')} Workflows available at: [{repository.get('url')}/actions/workflows]", rtype="urls")
3241
for workflow in workflows.get('workflows'):
33-
gx_output.r_log(f"Repository has a GitHub Action workflow at {workflow.get('html_url')}", rtype="workflows")
34-
35-
36-
print(f"Inspecting repository releases..")
42+
gx_output.r_log(f"Workflow [{workflow.get('name')}] created [{workflow.get('created_at')}], updated [{workflow.get('updated_at')}]: {workflow.get('html_url')}", rtype="workflows")
43+
44+
print(f"\rQuerying for repository action artifacts.."+" "*30, end="")
45+
artifacts = gh_api.fetch_repository_actions_artifacts(repository)
46+
if artifacts != None and artifacts.get('total_count') > 0:
47+
gx_output.r_log(f"{artifacts.get('total_count')} Artifacts available at: [{repository.get('url')}/actions/artifacts]", rtype="urls")
48+
for artifact in artifacts.get('artifacts'):
49+
# There are normally multiple artifacts hence we keep them under verbose.
50+
gx_output.r_log(f"Artifact [{artifact.get('name')}] created [{artifact.get('created_at')}], updated [{artifact.get('updated_at')}]: {artifact.get('url')}", rtype="v_artifacts")
51+
created_at = artifact.get('created_at')
52+
created_at_ts = gh_time.parse_date(created_at)
53+
updated_at = artifact.get('updated_at')
54+
updated_at_ts = gh_time.parse_date(updated_at)
55+
# This shouldn't happen but we still run a check; artifacts can't be updated but instead completely overwritten
56+
# More data here: https://github.com/actions/upload-artifact#overwriting-an-artifact
57+
if (updated_at_ts-created_at_ts).days > 0:
58+
gx_output.r_log(f"An artifact [{artifact.get('name')}] was updated {(updated_at_ts-created_at_ts).days} days after being created: {artifact.get('url')}", rtype="artifacts")
59+
60+
print(f"\rInspecting repository branches.."+" "*40, end="")
61+
branches = gh_api.fetch_repository_branches(repository)
62+
if len(branches) > 0:
63+
gx_output.r_log(f"{len(branches)} Branches available at: [{repository.get('html_url')}/branches]", rtype="urls")
64+
unprotected_branches = []
65+
protected_branches = []
66+
for branch in branches:
67+
if branch.get('protected') == False:
68+
unprotected_branches.append(branch.get('name'))
69+
else:
70+
protected_branches.append(branch.get('name'))
71+
72+
if len(unprotected_branches) > 0: gx_output.r_log(f"{len(unprotected_branches)} Unprotected Branches: {unprotected_branches}", rtype="branches")
73+
if len(protected_branches) > 0: gx_output.r_log(f"{len(protected_branches)} Protected Branches: {protected_branches}", rtype="branches")
74+
75+
print(f"\rInspecting repository tags.."+" "*40, end="")
76+
tags = gh_api.fetch_repository_tags(repository)
77+
if len(tags) > 0: gx_output.r_log(f"{len(tags)} Tags available at: [{repository.get('html_url')}/tags]", rtype="urls")
78+
tag_taggers = defaultdict(int)
79+
80+
""" A bit shameful here because we can't really get too much data out of tags because of the way the GH API is implemented.
81+
It only returns stripped tags when getting all tags, we can't even get who the tagger was. """
82+
for tag in tags:
83+
tagger = tag.get('tagger')
84+
if tagger == None:
85+
# Lightweight tags - for some reason GitHub's API is returning stripped down version of tags even if they are not lightweight
86+
gx_output.r_log(f"Tag [{tag.get('name')}] is available at: [{repository.get('html_url')}/tags]", rtype="v_tags")
87+
else:
88+
tagger = tagger.get('email')
89+
tag_taggers[tagger] += 1
90+
gx_output.r_log(f"A tag was created by {tagger} at {tag.get('tagger').get('date')}: {tag.get('url')}", rtype="v_tags")
91+
92+
total_tags = sum(tag_taggers.values())
93+
for tagger, tags in tag_taggers.items():
94+
percentage_tags = (tags / total_tags) * 100
95+
message = f"{tagger} created historically {tags} tags [{percentage_tags:.2f}%]"
96+
gx_output.r_log(message, rtype="tags")
97+
98+
99+
print(f"\rInspecting repository releases.."+" "*40, end="")
37100
releases = gh_api.fetch_repository_releases(repository)
38-
if len(releases) > 0: gx_output.r_log(f"Releases available at: [{repository.get('html_url')}/releases]", rtype="urls")
101+
if len(releases) > 0: gx_output.r_log(f"{len(releases)} Releases available at: [{repository.get('html_url')}/releases]", rtype="urls")
39102

40103
release_authors = defaultdict(int)
41104
asset_uploaders = defaultdict(int)
@@ -50,12 +113,14 @@ def run(gx_context, gx_output):
50113
uploaded_by = asset.get('uploader').get('login')
51114
asset_uploaders[uploaded_by] += 1
52115
created_at = asset.get('created_at')
53-
gx_output.r_log(f"An asset was uploaded by {uploaded_by} at {created_at}: {asset.get('url')}", rtype="v_releases")
116+
message = f"An asset was uploaded by {uploaded_by} at {created_at}: {asset.get('url')}"
117+
gx_output.r_log(message, rtype="v_releases")
118+
gx_output.c_log(message, rtype="v_releases", contributor=uploaded_by)
54119
created_at_ts = gh_time.parse_date(created_at)
55120
updated_at = asset.get('updated_at')
56121
updated_at_ts = gh_time.parse_date(updated_at)
57122
if (updated_at_ts-created_at_ts).days > 0:
58-
gx_output.r_log(f"An asset in Release [{release.get('name')}] by [{uploaded_by}] was updated {(updated_at_ts-created_at_ts).days} days after its release: {asset.get('url')}", rtype="releases")
123+
gx_output.r_log(f"WARNING: An asset in Release [{release.get('name')}] by [{uploaded_by}] was updated {(updated_at_ts-created_at_ts).days} days after its release: {asset.get('url')}", rtype="releases")
59124

60125
total_releases = sum(release_authors.values())
61126
total_assets = sum(asset_uploaders.values())
@@ -73,13 +138,19 @@ def run(gx_context, gx_output):
73138
else:
74139
message += " and never uploaded assets."
75140

141+
if gx_context.verboseEnabled() == False: message += " Turn on Verbose mode for more information."
142+
76143
gx_output.r_log(message, rtype="releases")
144+
gx_output.c_log(message, rtype="releases", contributor=author)
77145

78146
# Handle asset uploaders who did not create any releases
79147
for uploader in asset_uploaders_set:
80148
assets = asset_uploaders[uploader]
81149
percentage_assets = (assets / total_assets) * 100
82-
gx_output.r_log(f"User {uploader} has uploaded {assets} assets [{percentage_assets:.2f}%] and never created a release, Warning.", rtype="releases")
150+
message = f"WARNING: User {uploader} has uploaded {assets} assets [{percentage_assets:.2f}%] and never created a release."
151+
if gx_context.verboseEnabled() == False: message += " Turn on Verbose mode for more information."
152+
gx_output.r_log(message, rtype="releases")
153+
gx_output.c_log(message, rtype="releases", contributor=uploader)
83154

84155
""" Work in Progress: This sounded fun but ended up being a dead end.
85156
# Let's run an additional check on stargazers if, and only if, the repository has up to 5000 gazers.
@@ -123,7 +194,10 @@ def run(gx_context, gx_output):
123194
"""
124195

125196
if repository.get('watchers_count') > 0:
126-
gx_output.r_log(f"Repository is being Watched by {repository.get('subscribers_count')} Subscribers: {repository.get('subscribers_url')}", rtype="profiling")
197+
gx_output.r_log(f"Watchers count: [{repository.get('subscribers_count')}] List at: {repository.get('subscribers_url')}", rtype="profiling")
198+
199+
if repository.get('stargazers_count') > 0:
200+
gx_output.r_log(f"Stars count: [{repository.get('stargazers_count')}] List at: {repository.get('stargazers_url')}", rtype="profiling")
127201

128202
if repository.get('open_issues_count') > 0:
129203
gx_output.r_log(f"Repository has {repository.get('open_issues_count')} Open Issues: {repository.get('html_url')}/issues", rtype="profiling")
@@ -243,6 +317,11 @@ def run(gx_context, gx_output):
243317
if details['open'] > 0:
244318
gx_output.c_log(f"The user submitted {details['submitted']} Pull Requests out of which {details['open']} remain open.", rtype="profiling", contributor=user)
245319

320+
# Check if there were any users with mismatches in commits dates in the repository.
321+
for user, dates_mismatch_commits in gx_context.getIdentifierValues("DATE_MISMATCH_COMMITS").items():
322+
gx_output.r_log(f"WARNING: UNRELIABLE DATES in {dates_mismatch_commits} commits by Contributor [{user}]. The account is newer than the commit! Unreliable historic activity or account re-use.", rtype="commits")
323+
324+
246325
""" This here next is Work in Progress - trying to figure out what to pay attention to here that makes sense.
247326
# Get all Issues. Note from GitHub that Issues returns both Issues + PRs:
248327
# https://docs.github.com/en/rest/issues/issues?apiVersion=2022-11-28
@@ -261,4 +340,5 @@ def run(gx_context, gx_output):
261340
gx_output.r_log(f"The repository has no record of Issues or Pull Requests.", rtype="profiling")
262341
"""
263342

343+
print(f"\rRepository has been analyzed.." + " "*40)
264344
return True

0 commit comments

Comments
 (0)