Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 78 additions & 6 deletions enterprise/app/tap/grid.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ interface Stat {
cached: number;
}

interface CacheInfo {
fullyCached: boolean;
partiallyCached: boolean;
cachedCount: number;
cachedLocallyCount: number;
cachedRemotelyCount: number;
totalRunCount: number;
}

const Status = api.v1.Status;

const MIN_OPACITY = 0.1;
Expand Down Expand Up @@ -174,6 +183,7 @@ export default class TestGridComponent extends React.Component<Props, State> {
for (let targetHistory of histories) {
let stats: Stat = { count: 0, pass: 0, totalDuration: 0, maxDuration: 0, avgDuration: 0, flake: 0, cached: 0 };
for (let status of targetHistory.targetStatus) {
const cacheInfo = getCacheInfo(status);
stats.count += 1;
let duration = this.durationToNum(status.timing?.duration || undefined);
stats.totalDuration += duration;
Expand All @@ -183,7 +193,7 @@ export default class TestGridComponent extends React.Component<Props, State> {
} else if (status.status == Status.FLAKY) {
stats.flake += 1;
}
if (isCached(status)) {
if (cacheInfo.fullyCached) {
stats.cached += 1;
}
}
Expand Down Expand Up @@ -525,6 +535,7 @@ export default class TestGridComponent extends React.Component<Props, State> {
return (
<div className="tap-commit-container">
{statuses.map((status) => {
const cacheInfo = getCacheInfo(status);
let destinationUrl = `/invocation/${status.invocationId}?${new URLSearchParams({
target: targetHistory.target?.label || "",
targetStatus: String(status),
Expand All @@ -536,9 +547,9 @@ export default class TestGridComponent extends React.Component<Props, State> {
title += ` at commit ${commitStatus.commitSha}`;
}

let cached = isCached(status);
if (cached) {
title += ` (cached)`;
const cacheLabel = describeCacheStatus(cacheInfo);
if (cacheLabel) {
title += ` (${cacheLabel})`;
}

return (
Expand All @@ -560,7 +571,7 @@ export default class TestGridComponent extends React.Component<Props, State> {
className={`tap-block ${
this.getColorMode() == "status" ? `status-${status.status}` : "timing"
}
${cached ? `cached` : ""}
${cacheInfo.fullyCached ? `cached` : ""}
clickable`}>
{this.statusToIcon(status.status || Status.STATUS_UNSPECIFIED)}
</a>
Expand All @@ -584,7 +595,68 @@ export default class TestGridComponent extends React.Component<Props, State> {
}
}

function isCached(status: target.ITargetStatus) {
function getCacheInfo(status: target.ITargetStatus): CacheInfo {
const cachedLocallyCount = Number(status.cachedLocallyCount || 0);
const cachedRemotelyCount = Number(status.cachedRemotelyCount || 0);
const cachedCount = Math.max(Number(status.cachedCount || 0), cachedLocallyCount + cachedRemotelyCount);
const explicitTotalRunCount = Number(status.totalRunCount || 0);

if (explicitTotalRunCount > 0 || cachedCount > 0 || cachedLocallyCount > 0 || cachedRemotelyCount > 0) {
const totalRunCount = Math.max(explicitTotalRunCount, cachedCount);
// When explicit counts are available, use them as the source of truth. The
// legacy cached bool is only a fallback for older rows without counts.
const fullyCached = explicitTotalRunCount > 0 ? cachedCount === totalRunCount : Boolean(status.cached);
return {
fullyCached,
partiallyCached: !fullyCached && cachedCount > 0,
cachedCount,
cachedLocallyCount,
cachedRemotelyCount,
totalRunCount,
};
}

// Older ClickHouse rows only have the legacy cached bool, so preserve the
// historical fallback instead of treating missing counts as uncached.
const fullyCached = Boolean(status.cached) || isHeuristicallyCached(status);
return {
fullyCached,
partiallyCached: false,
cachedCount: fullyCached ? 1 : 0,
cachedLocallyCount: 0,
cachedRemotelyCount: 0,
totalRunCount: fullyCached ? 1 : 0,
};
}

function describeCacheStatus(cacheInfo: CacheInfo): string {
if (!cacheInfo.fullyCached && !cacheInfo.partiallyCached) {
return "";
}

const originParts = [];
if (cacheInfo.cachedLocallyCount > 0) {
originParts.push(`${cacheInfo.cachedLocallyCount} local`);
}
if (cacheInfo.cachedRemotelyCount > 0) {
originParts.push(`${cacheInfo.cachedRemotelyCount} remote`);
}
const originSuffix = originParts.length ? ` (${originParts.join(", ")})` : "";

if (cacheInfo.fullyCached) {
if (cacheInfo.cachedLocallyCount > 0 && cacheInfo.cachedLocallyCount === cacheInfo.totalRunCount) {
return "cached locally";
}
if (cacheInfo.cachedRemotelyCount > 0 && cacheInfo.cachedRemotelyCount === cacheInfo.totalRunCount) {
return "cached remotely";
}
return `cached${originSuffix}`;
}

return `${cacheInfo.cachedCount}/${cacheInfo.totalRunCount} cached${originSuffix}`;
}

function isHeuristicallyCached(status: target.ITargetStatus) {
return (
+(status.timing?.startTime?.seconds || 0) > 0 &&
Math.floor(+(status.invocationCreatedAtUsec || 0) / 1000000) > +(status.timing?.startTime?.seconds || 0)
Expand Down
29 changes: 29 additions & 0 deletions proto/target.proto
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ message TargetMetadata {
api.v1.TestSize test_size = 5;
}

// Status for a target within target history. Today GetTargetHistory only
// stores test targets, so the cache fields below describe test attempts rather
// than generic build actions.
message TargetStatus {
// The invocation identifier itself.
string invocation_id = 1;
Expand All @@ -51,6 +54,32 @@ message TargetStatus {

// When the invocation was created.
int64 invocation_created_at_usec = 5;

// Whether all recorded test attempts for this target invocation were served
// from cache. For new ClickHouse rows, this legacy field is derived from
// total_run_count > 0 && cached_count == total_run_count. When
// total_run_count is 0, the row does not have explicit attempt counts, so
// clients should fall back to this bool instead of treating 0 as the number
// of attempts.
bool cached = 6;
Comment on lines +58 to +64

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

without the explicit cache counts

Is this equivalent to checking total_run_count == 0? (maybe be more explicit in this comment)


// The number of cached test attempts recorded for this target invocation,
// including both local and remote cache hits. A non-zero cached_count does
// not imply that all attempts were cached; compare against total_run_count.
int32 cached_count = 7;

// The number of cached test attempts served from the local cache.
int32 cached_locally_count = 8;

// The number of cached test attempts served from the remote cache.
int32 cached_remotely_count = 9;

// The total number of cached and uncached test shard attempts recorded for
// this target invocation. This comes from Bazel TestSummary.total_run_count;
// compare cached_count against it to determine whether all attempts were
// cached. A value of 0 indicates that explicit attempt counts are unavailable
// for legacy rows.
int32 total_run_count = 10;
}

message TargetHistory {
Expand Down
91 changes: 66 additions & 25 deletions server/build_event_protocol/target_tracker/target_tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,23 @@ const (
)

type target struct {
label string
aspect string
ruleType string
firstStartTime time.Time
totalDuration time.Duration
state targetState
id int64
overallStatus build_event_stream.TestStatus
cached bool
targetType cmpb.TargetType
testSize build_event_stream.TestSize
buildSuccess bool
label string
aspect string
ruleType string
firstStartTime time.Time
totalDuration time.Duration
state targetState
id int64
overallStatus build_event_stream.TestStatus
cached bool
cachedCount int32
cachedLocallyCount int32
cachedRemotelyCount int32
totalRunCount int32
observedResultCount int32
targetType cmpb.TargetType
testSize build_event_stream.TestSize
buildSuccess bool
}

func md5Int64(text string) int64 {
Expand All @@ -76,6 +81,24 @@ func newTarget(label string, aspect string) *target {
}
}

func (t *target) effectiveCachedCount(ctx context.Context) int32 {
count := t.cachedCount
if observed := t.cachedLocallyCount + t.cachedRemotelyCount; observed > count {
log.CtxDebugf(ctx, "Observed %d cached TestResult entries for target %q, exceeding TestSummary total_num_cached=%d; using observed count", observed, t.label, t.cachedCount)
count = observed
}
return count
}

func (t *target) effectiveTotalRunCount(ctx context.Context) int32 {
count := t.totalRunCount
if t.observedResultCount > count {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we expect this to ever happen? if not, maybe add a debug log here (with CtxDebug)?

log.CtxDebugf(ctx, "Observed %d TestResult events for target %q, exceeding TestSummary total_run_count=%d; using observed count", t.observedResultCount, t.label, t.totalRunCount)
count = t.observedResultCount
}
return count
}

func getTestStatus(aborted *build_event_stream.Aborted) build_event_stream.TestStatus {
switch aborted.GetReason() {
case build_event_stream.Aborted_USER_INTERRUPTED:
Expand All @@ -99,11 +122,22 @@ func (t *target) updateFromEvent(event *build_event_stream.BuildEvent) {
}
t.state = targetStateCompleted
case *build_event_stream.BuildEvent_TestResult:
t.cached = p.TestResult.GetCachedLocally() || p.TestResult.GetExecutionInfo().GetCachedRemotely()
cachedLocally := p.TestResult.GetCachedLocally()
cachedRemotely := p.TestResult.GetExecutionInfo().GetCachedRemotely()
t.cached = cachedLocally || cachedRemotely
t.observedResultCount++
if cachedLocally {
t.cachedLocallyCount++
}
if cachedRemotely {
t.cachedRemotelyCount++
}
t.state = targetStateResult
case *build_event_stream.BuildEvent_TestSummary:
ts := p.TestSummary
t.overallStatus = ts.GetOverallStatus()
t.cachedCount = ts.GetTotalNumCached()
t.totalRunCount = ts.GetTotalRunCount()
t.firstStartTime = timeutil.GetTimeWithFallback(ts.GetFirstStartTime(), ts.GetFirstStartTimeMillis())
t.totalDuration = timeutil.GetDurationWithFallback(ts.GetTotalRunDuration(), ts.GetTotalRunDurationMillis())
t.state = targetStateSummary
Expand Down Expand Up @@ -356,25 +390,32 @@ func (t *TargetTracker) writeTestTargetStatusesToOLAPDB(ctx context.Context, per
testStartTimeUsec = target.firstStartTime.UnixMicro()
}

cachedCount := target.effectiveCachedCount(ctx)
totalRunCount := target.effectiveTotalRunCount(ctx)

entries = append(entries, &schema.TestTargetStatus{
GroupID: permissions.GroupID,
RepoURL: repoURL,
CommitSHA: commitSHA,
Label: target.label,
InvocationStartTimeUsec: invocationStartTime.UnixMicro(),

RuleType: target.ruleType,
UserID: permissions.UserID,
InvocationUUID: invocationUUID,
TargetType: int32(target.targetType),
TestSize: int32(target.testSize),
Status: int32(target.overallStatus),
Cached: target.cached,
StartTimeUsec: testStartTimeUsec,
DurationUsec: target.totalDuration.Microseconds(),
BranchName: t.buildEventAccumulator.Invocation().GetBranchName(),
Role: t.buildEventAccumulator.Invocation().GetRole(),
Command: t.buildEventAccumulator.Invocation().GetCommand(),
RuleType: target.ruleType,
UserID: permissions.UserID,
InvocationUUID: invocationUUID,
TargetType: int32(target.targetType),
TestSize: int32(target.testSize),
Status: int32(target.overallStatus),
Cached: totalRunCount > 0 && cachedCount == totalRunCount,
CachedCount: cachedCount,
CachedLocallyCount: target.cachedLocallyCount,
CachedRemotelyCount: target.cachedRemotelyCount,
TotalRunCount: totalRunCount,
StartTimeUsec: testStartTimeUsec,
DurationUsec: target.totalDuration.Microseconds(),
BranchName: t.buildEventAccumulator.Invocation().GetBranchName(),
Role: t.buildEventAccumulator.Invocation().GetRole(),
Command: t.buildEventAccumulator.Invocation().GetCommand(),
})
}
err := t.env.GetOLAPDBHandle().FlushTestTargetStatuses(ctx, entries)
Expand Down
Loading