Skip to content

Commit 6297013

Browse files
committed
dashboard/app: periodically remove coverage garbage from spanner
1 parent 52b38cc commit 6297013

File tree

5 files changed

+104
-1
lines changed

5 files changed

+104
-1
lines changed

dashboard/app/batch_coverage.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,17 @@ func nsDataAvailable(ctx context.Context, ns string) ([]coveragedb.TimePeriod, [
151151
}
152152
return periods, recordsCount, nil
153153
}
154+
155+
func handleBatchCoverageClean(w http.ResponseWriter, r *http.Request) {
156+
ctx := context.Background()
157+
totalDeleted, err := coveragedb.DeleteGarbage(ctx)
158+
if err != nil {
159+
errMsg := fmt.Sprintf("failed to coveragedb.DeleteGarbage: %s", err.Error())
160+
log.Errorf(ctx, "%s", errMsg)
161+
w.Write([]byte(errMsg))
162+
return
163+
}
164+
logMsg := fmt.Sprintf("successfully deleted %d rows\n", totalDeleted)
165+
log.Infof(ctx, "%s", logMsg)
166+
w.Write([]byte(logMsg))
167+
}

dashboard/app/batch_main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
func initBatchProcessors() {
1919
http.HandleFunc("/cron/batch_coverage", handleBatchCoverage)
2020
http.HandleFunc("/cron/batch_db_export", handleBatchDBExport)
21+
http.HandleFunc("/cron/batch_coverage_clean", handleBatchCoverageClean)
2122
}
2223

2324
// from https://cloud.google.com/batch/docs/samples/batch-create-script-job

dashboard/app/cron.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@ cron:
2424
schedule: every sunday 00:00
2525
# Update other coverage numbers every day.
2626
- url: /cron/batch_coverage?days=true&months=true&steps=10
27-
schedule: every 24 hours
27+
schedule: every day 00:00
28+
# Clean up coverage db every week.
29+
# We're adding data w/o transactions.
30+
# It is important to run clean operation when there are no batch_coverage in progress.
31+
- url: /cron/batch_coverage_clean
32+
schedule: every saturday 12:00
2833
# Export reproducers every week.
2934
- url: /cron/batch_db_export
3035
schedule: every saturday 00:00

pkg/coveragedb/spanner.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@ import (
77
"context"
88
"fmt"
99
"os"
10+
"sync/atomic"
1011
"time"
1112

1213
"cloud.google.com/go/civil"
1314
"cloud.google.com/go/spanner"
1415
"github.com/google/syzkaller/pkg/subsystem"
1516
_ "github.com/google/syzkaller/pkg/subsystem/lists"
1617
"github.com/google/uuid"
18+
"golang.org/x/sync/errgroup"
1719
"google.golang.org/api/iterator"
1820
)
1921

@@ -248,3 +250,78 @@ func NsDataMerged(ctx context.Context, projectID, ns string) ([]TimePeriod, []in
248250
}
249251
return periods, totalRows, nil
250252
}
253+
254+
// DeleteGarbage removes orphaned file entries from the database.
255+
//
256+
// It identifies files in the "files" table that are not referenced
257+
// by any entries in the "merge_history" table, indicating they are
258+
// no longer associated with an active merge session.
259+
//
260+
// To avoid exceeding Spanner transaction limits, orphaned files
261+
// are deleted in batches of 10,000. Note that in case of an error
262+
// during batch deletion, some files may be deleted but not counted
263+
// in the returned total.
264+
//
265+
// Returns the number of orphaned file entries successfully deleted.
266+
func DeleteGarbage(ctx context.Context) (int64, error) {
267+
batchSize := 10_000
268+
client, err := NewClient(ctx, os.Getenv("GOOGLE_CLOUD_PROJECT"))
269+
if err != nil {
270+
return 0, fmt.Errorf("coveragedb.NewClient: %w", err)
271+
}
272+
defer client.Close()
273+
274+
iter := client.Single().Query(ctx, spanner.Statement{
275+
SQL: `SELECT session, filepath
276+
FROM files
277+
WHERE NOT EXISTS (
278+
SELECT 1
279+
FROM merge_history
280+
WHERE merge_history.session = files.session
281+
)`})
282+
defer iter.Stop()
283+
284+
var totalDeleted atomic.Int64
285+
eg, _ := errgroup.WithContext(ctx)
286+
var batch []spanner.Key
287+
for {
288+
row, err := iter.Next()
289+
if err == iterator.Done {
290+
break
291+
}
292+
if err != nil {
293+
return 0, fmt.Errorf("iter.Next: %w", err)
294+
}
295+
var r struct {
296+
Session string
297+
Filepath string
298+
}
299+
if err = row.ToStruct(&r); err != nil {
300+
return 0, fmt.Errorf("row.ToStruct: %w", err)
301+
}
302+
batch = append(batch, spanner.Key{r.Session, r.Filepath})
303+
if len(batch) > batchSize {
304+
goSpannerDelete(ctx, batch, eg, client, &totalDeleted)
305+
batch = nil
306+
}
307+
}
308+
goSpannerDelete(ctx, batch, eg, client, &totalDeleted)
309+
if err = eg.Wait(); err != nil {
310+
return 0, fmt.Errorf("spanner.Delete: %w", err)
311+
}
312+
return totalDeleted.Load(), nil
313+
}
314+
315+
func goSpannerDelete(ctx context.Context, batch []spanner.Key, eg *errgroup.Group, client *spanner.Client,
316+
totalDeleted *atomic.Int64) {
317+
ks := spanner.KeySetFromKeys(batch...)
318+
ksSize := len(batch)
319+
eg.Go(func() error {
320+
mutation := spanner.Delete("files", ks)
321+
_, err := client.Apply(ctx, []*spanner.Mutation{mutation})
322+
if err == nil {
323+
totalDeleted.Add(int64(ksSize))
324+
}
325+
return err
326+
})
327+
}

tools/syz-covermerger/init_db.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ CREATE TABLE
2323
(session, filepath) );')
2424
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
2525
--ddl="$create_table"
26+
echo "creating 'files' index"
27+
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
28+
--ddl="CREATE INDEX files_session ON files (session);"
2629

2730
echo "drop table 'merge_history' if exists"
2831
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
@@ -43,6 +46,9 @@ CREATE TABLE
4346
(namespace, repo, duration, dateto) );')
4447
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
4548
--ddl="$create_table"
49+
echo "creating 'merge_history' index"
50+
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
51+
--ddl="CREATE INDEX merge_history_session ON merge_history (session);"
4652

4753
echo "drop table 'file_subsystems' if exists"
4854
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \

0 commit comments

Comments
 (0)