Skip to content

Commit 6352fa4

Browse files
committed
dashboard/app: periodically remove coverage garbage from spanner
1 parent f614e9a commit 6352fa4

File tree

5 files changed

+101
-1
lines changed

5 files changed

+101
-1
lines changed

dashboard/app/batch_coverage.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,17 @@ func nsDataAvailable(ctx context.Context, ns string) ([]coveragedb.TimePeriod, [
151151
}
152152
return periods, recordsCount, nil
153153
}
154+
155+
func handleBatchCoverageClean(w http.ResponseWriter, r *http.Request) {
156+
ctx := context.Background()
157+
totalDeleted, err := coveragedb.DeleteGarbage(ctx)
158+
if err != nil {
159+
errMsg := fmt.Sprintf("failed to coveragedb.DeleteGarbage: %s", err.Error())
160+
log.Errorf(ctx, "%s", errMsg)
161+
w.Write([]byte(errMsg))
162+
return
163+
}
164+
logMsg := fmt.Sprintf("successfully deleted %d rows\n", totalDeleted)
165+
log.Infof(ctx, "%s", logMsg)
166+
w.Write([]byte(logMsg))
167+
}

dashboard/app/batch_main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
func initBatchProcessors() {
1919
http.HandleFunc("/cron/batch_coverage", handleBatchCoverage)
2020
http.HandleFunc("/cron/batch_db_export", handleBatchDBExport)
21+
http.HandleFunc("/cron/batch_coverage_clean", handleBatchCoverageClean)
2122
}
2223

2324
// from https://cloud.google.com/batch/docs/samples/batch-create-script-job

dashboard/app/cron.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@ cron:
2424
schedule: every sunday 00:00
2525
# Update other coverage numbers every day.
2626
- url: /cron/batch_coverage?days=true&months=true&steps=10
27-
schedule: every 24 hours
27+
schedule: every day 00:00
28+
# Clean up coverage db every week.
29+
# We're adding data w/o transactions.
30+
# It is important to run clean operation when there are no batch_coverage in progress.
31+
- url: /cron/batch_coverage_clean
32+
schedule: every saturday 12:00
2833
# Export reproducers every week.
2934
- url: /cron/batch_db_export
3035
schedule: every saturday 00:00

pkg/coveragedb/spanner.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@ import (
77
"context"
88
"fmt"
99
"os"
10+
"sync/atomic"
1011
"time"
1112

1213
"cloud.google.com/go/civil"
1314
"cloud.google.com/go/spanner"
1415
"github.com/google/syzkaller/pkg/subsystem"
1516
_ "github.com/google/syzkaller/pkg/subsystem/lists"
1617
"github.com/google/uuid"
18+
"golang.org/x/sync/errgroup"
1719
"google.golang.org/api/iterator"
1820
)
1921

@@ -248,3 +250,75 @@ func NsDataMerged(ctx context.Context, projectID, ns string) ([]TimePeriod, []in
248250
}
249251
return periods, totalRows, nil
250252
}
253+
254+
// DeleteGarbage removes orphaned file entries from the database.
255+
//
256+
// It identifies files in the "files" table that are not referenced by any entries in the "merge_history" table,
257+
// indicating they are no longer associated with an active merge session.
258+
//
259+
// To avoid exceeding Spanner transaction limits, orphaned files are deleted in batches of 10,000.
260+
// Note that in case of an error during batch deletion, some files may be deleted but not counted in the total.
261+
//
262+
// Returns the number of orphaned file entries successfully deleted.
263+
func DeleteGarbage(ctx context.Context) (int64, error) {
264+
batchSize := 10_000
265+
client, err := NewClient(ctx, os.Getenv("GOOGLE_CLOUD_PROJECT"))
266+
if err != nil {
267+
return 0, fmt.Errorf("coveragedb.NewClient: %w", err)
268+
}
269+
defer client.Close()
270+
271+
iter := client.Single().Query(ctx, spanner.Statement{
272+
SQL: `SELECT session, filepath
273+
FROM files
274+
WHERE NOT EXISTS (
275+
SELECT 1
276+
FROM merge_history
277+
WHERE merge_history.session = files.session
278+
)`})
279+
defer iter.Stop()
280+
281+
var totalDeleted atomic.Int64
282+
eg, _ := errgroup.WithContext(ctx)
283+
var batch []spanner.Key
284+
for {
285+
row, err := iter.Next()
286+
if err == iterator.Done {
287+
break
288+
}
289+
if err != nil {
290+
return 0, fmt.Errorf("iter.Next: %w", err)
291+
}
292+
var r struct {
293+
Session string
294+
Filepath string
295+
}
296+
if err = row.ToStruct(&r); err != nil {
297+
return 0, fmt.Errorf("row.ToStruct: %w", err)
298+
}
299+
batch = append(batch, spanner.Key{r.Session, r.Filepath})
300+
if len(batch) > batchSize {
301+
goSpannerDelete(ctx, batch, eg, client, &totalDeleted)
302+
batch = nil
303+
}
304+
}
305+
goSpannerDelete(ctx, batch, eg, client, &totalDeleted)
306+
if err = eg.Wait(); err != nil {
307+
return 0, fmt.Errorf("spanner.Delete: %w", err)
308+
}
309+
return totalDeleted.Load(), nil
310+
}
311+
312+
func goSpannerDelete(ctx context.Context, batch []spanner.Key, eg *errgroup.Group, client *spanner.Client,
313+
totalDeleted *atomic.Int64) {
314+
ks := spanner.KeySetFromKeys(batch...)
315+
ksSize := len(batch)
316+
eg.Go(func() error {
317+
mutation := spanner.Delete("files", ks)
318+
_, err := client.Apply(ctx, []*spanner.Mutation{mutation})
319+
if err == nil {
320+
totalDeleted.Add(int64(ksSize))
321+
}
322+
return err
323+
})
324+
}

tools/syz-covermerger/init_db.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ CREATE TABLE
2323
(session, filepath) );')
2424
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
2525
--ddl="$create_table"
26+
echo "creating 'files' index"
27+
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
28+
--ddl="CREATE INDEX files_session ON files (session);"
2629

2730
echo "drop table 'merge_history' if exists"
2831
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
@@ -43,6 +46,9 @@ CREATE TABLE
4346
(namespace, repo, duration, dateto) );')
4447
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
4548
--ddl="$create_table"
49+
echo "creating 'merge_history' index"
50+
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
51+
--ddl="CREATE INDEX merge_history_session ON merge_history (session);"
4652

4753
echo "drop table 'file_subsystems' if exists"
4854
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \

0 commit comments

Comments
 (0)