Skip to content

Commit 58ebda7

Browse files
committed
2 parents 62d46b1 + ebcaa85 commit 58ebda7

34 files changed

+1714
-0
lines changed
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
#!/usr/bin/env python3
2+
import argparse
3+
import subprocess
4+
import sys
5+
from dataclasses import dataclass
6+
from itertools import chain
7+
from pathlib import Path
8+
from typing import Dict, Iterable, List, Literal, Optional, Tuple
9+
10+
11+
@dataclass
12+
class GitDiffTreeRecord:
13+
"""Represents a line of output from 'git diff-tree'"""
14+
15+
src_mode: str
16+
src_hash: str
17+
dst_mode: str
18+
dst_hash: str
19+
src_path: Path
20+
dst_path: Optional[Path]
21+
status: Literal["A", "C", "D", "M", "R", "T", "U", "X"]
22+
score: Optional[int]
23+
24+
25+
@dataclass
26+
class GitChange:
27+
diff_record: GitDiffTreeRecord
28+
bytes_changed: int
29+
30+
31+
def parse_git_diff_tree_output(output: str) -> List[GitDiffTreeRecord]:
32+
"""Parses the output of `git diff-tree` as described in the "Raw Output" section
33+
of the man page
34+
"""
35+
36+
def make_record(line: str) -> GitDiffTreeRecord:
37+
src_mode, dst_mode, src_hash, dst_hash, rest = line[1:].split(" ", maxsplit=4)
38+
status_score_and_paths = rest.split("\t")
39+
return GitDiffTreeRecord(
40+
src_mode=src_mode,
41+
src_hash=src_hash,
42+
dst_mode=dst_mode,
43+
dst_hash=dst_hash,
44+
status=status_score_and_paths[0][0],
45+
score=int(status_score_and_paths[0][1:]) if len(status_score_and_paths[0]) > 1 else None,
46+
src_path=Path(status_score_and_paths[1]),
47+
dst_path=Path(status_score_and_paths[2]) if len(status_score_and_paths) >= 3 else None,
48+
)
49+
50+
return [make_record(line) for line in output.splitlines(keepends=False)]
51+
52+
53+
def get_blob_sizes(hashes: Iterable[str]) -> Dict[str, Optional[int]]:
54+
"""Fetches the sizes, in bytes, of git blobs
55+
56+
:param hashes: A iterable of git blob hashes
57+
:type hashes: Iterable[str]
58+
59+
:return: A dictionary that mapping blob hashes to their size if the blob exists,
60+
or None otherwise
61+
:rtype: Dict[str, Optional[int]]
62+
"""
63+
input = "\n".join(set(hashes))
64+
cat_file_output = subprocess.run(
65+
["git", "cat-file", "--batch-check"],
66+
input=input,
67+
check=True,
68+
text=True,
69+
capture_output=True,
70+
).stdout
71+
72+
def make_object_size_tuple(line: str) -> Tuple[str, Optional[int]]:
73+
hash, *_, size = line.split()
74+
return (hash, int(size) if size != "missing" else None)
75+
76+
return dict(make_object_size_tuple(line) for line in cat_file_output.splitlines(keepends=False))
77+
78+
79+
def get_file_size_differences(commit_range: str) -> Dict[Path, GitChange]:
80+
"""Computes the size difference, in bytes, of files changed between two commits
81+
82+
:param commit_range: A git commit range (e.g. HEAD~3..HEAD)
83+
:type commit_range: str
84+
85+
:return: A dictionary mapping paths (relative to repository root) to size
86+
differences.
87+
:rtype: dict[Path, GitChange]
88+
"""
89+
changed_records = parse_git_diff_tree_output(
90+
subprocess.run(
91+
["git", "diff-tree", "-r", commit_range],
92+
capture_output=True,
93+
text=True,
94+
check=True,
95+
).stdout
96+
)
97+
98+
sizes = get_blob_sizes(chain.from_iterable((idx.src_hash, idx.dst_hash) for idx in changed_records))
99+
100+
assert {"A", "D", "M"}.issuperset(idx.status for idx in changed_records)
101+
102+
def as_int(maybe_num: Optional[int]) -> int:
103+
return maybe_num or 0
104+
105+
return {
106+
x.src_path: GitChange(
107+
diff_record=x,
108+
bytes_changed=as_int(sizes[x.dst_hash]) - as_int(sizes[x.src_hash]),
109+
)
110+
for x in changed_records
111+
}
112+
113+
114+
def main(
115+
commit_range: str,
116+
quiet: bool = False,
117+
limit: Optional[int] = None,
118+
show_n_largest_files: int = 30,
119+
) -> Literal[0, 1]:
120+
size_differences = get_file_size_differences(commit_range)
121+
cumulative_size_difference = sum(x.bytes_changed for x in size_differences.values())
122+
exceeds_limit = limit is not None and cumulative_size_difference > limit
123+
124+
def bytes_diff(num: int) -> str:
125+
return ("+" if num >= 0 else "") + human_friendly_bytes(num)
126+
127+
if not quiet:
128+
print(f"Total file size difference for commit range '{commit_range}': ")
129+
print(f"\t{bytes_diff(cumulative_size_difference)}", end="")
130+
print(f" (Exceeds set limit of {bytes_diff(limit)})" if exceeds_limit else "")
131+
132+
largest_n_sizes = sorted(size_differences.items(), key=lambda x: x[1].bytes_changed, reverse=True)[
133+
:show_n_largest_files
134+
]
135+
136+
if largest_n_sizes:
137+
print("")
138+
print(f"Largest {len(largest_n_sizes)} filesize differences:")
139+
140+
for path, val in largest_n_sizes:
141+
print(f"\t{bytes_diff(val.bytes_changed)}\t{path}")
142+
143+
return 1 if exceeds_limit else 0
144+
145+
146+
def num_bytes(arg: str) -> int:
147+
"""Converts a string to a number of bytes"""
148+
error = argparse.ArgumentTypeError(f"'{arg}' cannot be parsed into a number of bytes")
149+
try:
150+
return int(arg)
151+
except ValueError:
152+
pass
153+
154+
if len(arg) < 3:
155+
raise error
156+
157+
num, suffix = (arg[:-2], arg[-2:])
158+
shift_values = {
159+
"KB": 1,
160+
"MB": 2,
161+
"GB": 3,
162+
"TB": 4,
163+
"PB": 5,
164+
"EB": 6,
165+
"ZB": 7,
166+
"YB": 8,
167+
}
168+
169+
shift = shift_values.get(suffix)
170+
171+
if shift is None:
172+
raise error
173+
try:
174+
return int(num) << (shift * 10)
175+
except ValueError as e:
176+
raise error from e
177+
178+
179+
def human_friendly_bytes(num: int) -> str:
180+
"""Prints a number of bytes as a human friendly string"""
181+
for prefix in ["", "K", "M", "G", "T", "P", "E", "Z"]:
182+
if abs(num) < 1024.0:
183+
return f"{num:.1f}{prefix}B"
184+
num /= 1024.0
185+
return f"{num:.1f}YB"
186+
187+
188+
if __name__ == "__main__":
189+
parser = argparse.ArgumentParser(
190+
description="A program that summarizes the file size " + "differences between two git commits."
191+
)
192+
parser.add_argument(
193+
"commit_range",
194+
default="HEAD^..HEAD",
195+
type=str,
196+
help="Commit range to commit the diff for (e.g. HEAD~3..HEAD)",
197+
)
198+
parser.add_argument("--quiet", action="store_true", help="Silence all output")
199+
parser.add_argument(
200+
"--limit",
201+
type=num_bytes,
202+
help="Exit non-zero if total changes exceeds this value. "
203+
+ "Can be a raw number of bytes (e.g. 65536) or a suffixed value (e.g 2MB)",
204+
)
205+
parser.add_argument(
206+
"--show-n-largest-files",
207+
type=int,
208+
help="Show this many of the largest files in diff",
209+
default=30,
210+
)
211+
sys.exit(main(**vars(parser.parse_args())))

.github/workflows/pre-commit.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: Pre-Commit
2+
3+
4+
on:
5+
push:
6+
branches:
7+
- main
8+
pull_request:
9+
branches:
10+
- main
11+
workflow_dispatch:
12+
13+
jobs:
14+
pre-commit:
15+
runs-on: ubuntu-latest
16+
steps:
17+
- uses: actions/checkout@v3
18+
- uses: actions/setup-python@v4
19+
with:
20+
python-version: "3.9"
21+
- run: pip install -r dev-requirements.txt
22+
- name: Run Pre-Commit
23+
run: pre-commit run --all-files
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: Pull Request Checks
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
branches:
9+
- main
10+
workflow_dispatch:
11+
12+
jobs:
13+
pull_request_size:
14+
if: github.event_name == 'pull_request'
15+
runs-on: ubuntu-latest
16+
steps:
17+
- uses: actions/checkout@v3
18+
- uses: actions/setup-python@v4
19+
with:
20+
python-version: "3.9"
21+
- name: Check Pull Request Size
22+
run: |
23+
git fetch origin ${{ github.event.pull_request.base.ref }} --quiet # Need to manually fetch base branch in CI
24+
python ./.github/scripts/commit-filesize-diff-summary.py --limit 1MB origin/${{ github.event.pull_request.base.ref }}..HEAD

.github/workflows/run-samples.yml

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
name: Run Samples
2+
on:
3+
# By design pull_request_target event run against the version of the workflow in the target branch.
4+
# So you have to merge changes to this workflow to observe the effects.
5+
pull_request_target:
6+
branches:
7+
- main
8+
paths:
9+
- scenarios/**
10+
- .infra/deployments/**/*.bicep
11+
jobs:
12+
check-if-external:
13+
runs-on: ubuntu-latest
14+
outputs:
15+
environment: ${{ steps.set-environment.outputs.result }}
16+
steps:
17+
- uses: actions/github-script@v7
18+
id: set-environment
19+
with:
20+
script: |
21+
const actionInitiator = context.payload.sender.login;
22+
const org = "Azure-AI-Foundry";
23+
let isPublicMember = true;
24+
25+
// Check if initiator is a public member of the org
26+
try {
27+
await github.rest.orgs.checkPublicMembershipForUser({
28+
org,
29+
username: actionInitiator
30+
});
31+
} catch (error) {
32+
if (error.status != 404) {
33+
throw new Error("Unknown error", {cause: error});
34+
}
35+
36+
console.debug([
37+
`User is not a public member of the organization "${org}"`,
38+
"",
39+
`If you are a Microsoft employee, you can join the "${org}" org and set your org membership visibility to public: https://docs.github.com/en/account-and-profile/setting-up-and-managing-your-personal-account-on-github/managing-your-membership-in-organizations/publicizing-or-hiding-organization-membership#changing-the-visibility-of-your-organization-membership`
40+
].join("\n"));
41+
42+
isPublicMember = false;
43+
}
44+
45+
46+
const isPullRequestEvent = ["pull_request", "pull_request_target"].includes(context.eventName);
47+
48+
if (!(isPublicMember && isPullRequestEvent)) {
49+
return "external-contribution";
50+
}
51+
return "";
52+
result-encoding: string
53+
run-samples:
54+
permissions:
55+
contents: 'read'
56+
id-token: 'write'
57+
needs: check-if-external
58+
runs-on: ubuntu-latest
59+
# Require manual approval if initiator is not a public member of Azure-AI-Foundry
60+
environment: ${{ needs.check-if-external.outputs.environment }}
61+
steps:
62+
- uses: actions/checkout@v4
63+
- uses: actions/checkout@v4
64+
with:
65+
ref: ${{ github.event.pull_request.head.sha || github.ref }}
66+
- uses: actions/setup-python@v5
67+
with:
68+
python-version: "3.9"
69+
- name: Install dev dependencies
70+
run: |
71+
pip install -r dev-requirements.txt
72+
- uses: azure/login@v2
73+
with:
74+
client-id: ${{ secrets.AZURE_CLIENT_ID }}
75+
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
76+
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
77+
- name: Deploy resources
78+
run: |
79+
principalId="$(az ad sp show --id ${{ secrets.AZURE_CLIENT_ID }} -o tsv --query id)"
80+
az deployment sub create --location eastus \
81+
--template-file .infra/deployment/main.bicep \
82+
--parameters principalType=ServicePrincipal \
83+
--parameters principalId="$principalId" \
84+
-o json > deployment.json
85+
- name: Run Changed Samples
86+
run:
87+
pytest --changed-samples-only-from ${{ github.event.pull_request.base.sha }}

0 commit comments

Comments
 (0)