Skip to content

Commit e854f50

Browse files
authored
Bundle Analysis: Add asset comparison and fetching PR files (#480)
* initial commit * add unit tests * add in match by uuid and tests
1 parent 09d69db commit e854f50

File tree

4 files changed

+506
-66
lines changed

4 files changed

+506
-66
lines changed

shared/bundle_analysis/comparison.py

+82-32
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
BundleAnalysisReport,
1414
BundleReport,
1515
BundleRouteReport,
16+
ModuleReport,
1617
)
1718
from shared.bundle_analysis.storage import BundleAnalysisReportLoader
1819
from shared.django_apps.core.models import Repository
@@ -77,11 +78,66 @@ class AssetChange(BaseChange):
7778
"""
7879

7980
asset_name: str
81+
percentage_delta: float
82+
size_base: int
83+
size_head: int
8084

8185

8286
AssetMatch = Tuple[Optional[AssetReport], Optional[AssetReport]]
8387

8488

89+
class AssetComparison:
90+
def __init__(
91+
self,
92+
base_asset_report: Optional[AssetReport] = None,
93+
head_asset_report: Optional[AssetReport] = None,
94+
):
95+
self.base_asset_report = base_asset_report
96+
self.head_asset_report = head_asset_report
97+
98+
@sentry_sdk.trace
99+
def asset_change(self) -> AssetChange:
100+
if self.base_asset_report is None:
101+
return AssetChange(
102+
asset_name=self.head_asset_report.name,
103+
change_type=AssetChange.ChangeType.ADDED,
104+
size_delta=self.head_asset_report.size,
105+
percentage_delta=100,
106+
size_base=0,
107+
size_head=self.head_asset_report.size,
108+
)
109+
elif self.head_asset_report is None:
110+
return AssetChange(
111+
asset_name=self.base_asset_report.name,
112+
change_type=AssetChange.ChangeType.REMOVED,
113+
size_delta=-self.base_asset_report.size,
114+
percentage_delta=-100.0,
115+
size_base=self.base_asset_report.size,
116+
size_head=0,
117+
)
118+
else:
119+
size_delta = self.head_asset_report.size - self.base_asset_report.size
120+
percentage_delta = round(
121+
(size_delta / self.base_asset_report.size) * 100, 2
122+
)
123+
return AssetChange(
124+
asset_name=self.head_asset_report.name,
125+
change_type=AssetChange.ChangeType.CHANGED,
126+
size_delta=size_delta,
127+
percentage_delta=percentage_delta,
128+
size_base=self.base_asset_report.size,
129+
size_head=self.head_asset_report.size,
130+
)
131+
132+
def contributing_modules(
133+
self, pr_changed_files: Optional[List[str]] = None
134+
) -> List[ModuleReport]:
135+
asset_report = self.head_asset_report
136+
if asset_report is None:
137+
return []
138+
return asset_report.modules(pr_changed_files)
139+
140+
85141
class BundleComparison:
86142
def __init__(
87143
self, base_bundle_report: BundleReport, head_bundle_report: BundleReport
@@ -95,7 +151,7 @@ def total_size_delta(self) -> int:
95151
return head_size - base_size
96152

97153
@sentry_sdk.trace
98-
def asset_changes(self) -> List[AssetChange]:
154+
def asset_comparisons(self) -> List[AssetComparison]:
99155
# this groups assets by name
100156
# there can be multiple assets with the same name and we
101157
# need to try and match them across base and head reports
@@ -119,29 +175,10 @@ def asset_changes(self) -> List[AssetChange]:
119175
if asset_name not in asset_names:
120176
matches += self._match_assets(asset_reports, [])
121177

122-
changes = []
123-
for base_asset_report, head_asset_report in matches:
124-
if base_asset_report is None:
125-
change = AssetChange(
126-
asset_name=head_asset_report.name,
127-
change_type=AssetChange.ChangeType.ADDED,
128-
size_delta=head_asset_report.size,
129-
)
130-
elif head_asset_report is None:
131-
change = AssetChange(
132-
asset_name=base_asset_report.name,
133-
change_type=AssetChange.ChangeType.REMOVED,
134-
size_delta=-base_asset_report.size,
135-
)
136-
else:
137-
change = AssetChange(
138-
asset_name=head_asset_report.name,
139-
change_type=AssetChange.ChangeType.CHANGED,
140-
size_delta=head_asset_report.size - base_asset_report.size,
141-
)
142-
changes.append(change)
143-
144-
return changes
178+
return [
179+
AssetComparison(base_asset_report, head_asset_report)
180+
for base_asset_report, head_asset_report in matches
181+
]
145182

146183
def _match_assets(
147184
self,
@@ -153,9 +190,11 @@ def _match_assets(
153190
This method attempts to pick the most likely matching of assets between
154191
base and head (so as to track their changes through time).
155192
156-
The current approach is fairly naive and just picks the asset with the
157-
closest size. There are probably better ways of doing this that we can
158-
improve upon in the future.
193+
Current approach:
194+
1. Pick asset with the same UUID. This means the base and head assets have either of:
195+
- same hashed name
196+
- same modules by name
197+
2. Pick asset with the closest size
159198
"""
160199
n = max([len(base_asset_reports), len(head_asset_reports)])
161200
matches: List[AssetMatch] = []
@@ -169,13 +208,24 @@ def _match_assets(
169208
# no more base assets to match against
170209
matches.append((None, head_asset_report))
171210
else:
172-
# try and find the most "similar" base asset
173-
size_deltas = {
174-
abs(head_asset_report.size - base_bundle.size): base_bundle
211+
# 1. Pick asset with the same UUID
212+
base_asset_report_uuids = {
213+
base_bundle.uuid: base_bundle
175214
for base_bundle in base_asset_reports
176215
}
177-
min_delta = min(size_deltas.keys())
178-
base_asset_report = size_deltas[min_delta]
216+
if head_asset_report.uuid in base_asset_report_uuids:
217+
base_asset_report = base_asset_report_uuids[
218+
head_asset_report.uuid
219+
]
220+
221+
# 2. Pick asset with the closest size
222+
else:
223+
size_deltas = {
224+
abs(head_asset_report.size - base_bundle.size): base_bundle
225+
for base_bundle in base_asset_reports
226+
}
227+
min_delta = min(size_deltas.keys())
228+
base_asset_report = size_deltas[min_delta]
179229

180230
matches.append((base_asset_report, head_asset_report))
181231
base_asset_reports.remove(base_asset_report)

shared/bundle_analysis/parsers/v3.py

+11
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import ijson
99
import sentry_sdk
10+
from sqlalchemy import tuple_
1011
from sqlalchemy.orm import Session as DbSession
1112
from sqlalchemy.orm.exc import MultipleResultsFound, NoResultFound
1213

@@ -160,6 +161,16 @@ def parse(self, path: str) -> Tuple[int, str]:
160161
# but first we need to find the Asset by the hashed file name
161162
dynamic_imports_list = self._parse_dynamic_imports()
162163
if dynamic_imports_list:
164+
self.db_session.execute(
165+
DynamicImport.__table__.delete().where(
166+
tuple_(DynamicImport.chunk_id, DynamicImport.asset_id).in_(
167+
[
168+
(item["chunk_id"], item["asset_id"])
169+
for item in dynamic_imports_list
170+
]
171+
)
172+
)
173+
)
163174
insert_dynamic_imports = DynamicImport.__table__.insert().values(
164175
dynamic_imports_list
165176
)

shared/bundle_analysis/report.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -92,16 +92,28 @@ def uuid(self) -> str:
9292
def asset_type(self) -> AssetType:
9393
return self.asset.asset_type
9494

95-
def modules(self) -> List[ModuleReport]:
95+
def modules(
96+
self, pr_changed_files: Optional[List[str]] = None
97+
) -> List[ModuleReport]:
9698
with get_db_session(self.db_path) as session:
97-
modules = (
99+
query = (
98100
session.query(Module)
99101
.join(Module.chunks)
100102
.join(Chunk.assets)
101103
.filter(Asset.id == self.asset.id)
102-
.all()
103104
)
104-
return [ModuleReport(self.db_path, module) for module in modules]
105+
106+
# Apply additional filter if pr_changed_files is provided
107+
if pr_changed_files:
108+
# Normalize pr_changed_files to have './' prefix for relative paths
109+
normalized_files = {
110+
f"./{file}" if not file.startswith("./") else file
111+
for file in pr_changed_files
112+
}
113+
query = query.filter(Module.name.in_(normalized_files))
114+
115+
modules = query.all()
116+
return [ModuleReport(self.db_path, module) for module in modules]
105117

106118
def routes(self) -> Optional[List[str]]:
107119
plugin_name = self.bundle_info.get("plugin_name")

0 commit comments

Comments
 (0)