Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions dashboard/app/batch_coverage.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,17 @@ func nsDataAvailable(ctx context.Context, ns string) ([]coveragedb.TimePeriod, [
}
return periods, recordsCount, nil
}

func handleBatchCoverageClean(w http.ResponseWriter, r *http.Request) {
ctx := context.Background()
totalDeleted, err := coveragedb.DeleteGarbage(ctx)
if err != nil {
errMsg := fmt.Sprintf("failed to coveragedb.DeleteGarbage: %s", err.Error())
log.Errorf(ctx, "%s", errMsg)
w.Write([]byte(errMsg))
return
}
logMsg := fmt.Sprintf("successfully deleted %d rows\n", totalDeleted)
log.Infof(ctx, "%s", logMsg)
w.Write([]byte(logMsg))
}
1 change: 1 addition & 0 deletions dashboard/app/batch_main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
func initBatchProcessors() {
http.HandleFunc("/cron/batch_coverage", handleBatchCoverage)
http.HandleFunc("/cron/batch_db_export", handleBatchDBExport)
http.HandleFunc("/cron/batch_coverage_clean", handleBatchCoverageClean)
}

// from https://cloud.google.com/batch/docs/samples/batch-create-script-job
Expand Down
7 changes: 6 additions & 1 deletion dashboard/app/cron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ cron:
schedule: every sunday 00:00
# Update other coverage numbers every day.
- url: /cron/batch_coverage?days=true&months=true&steps=10
schedule: every 24 hours
schedule: every day 00:00
# Clean up coverage db every week.
# We're adding data w/o transactions.
# It is important to run clean operation when there are no batch_coverage in progress.
- url: /cron/batch_coverage_clean
schedule: every saturday 12:00
# Export reproducers every week.
- url: /cron/batch_db_export
schedule: every saturday 00:00
74 changes: 74 additions & 0 deletions pkg/coveragedb/spanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@ import (
"context"
"fmt"
"os"
"sync/atomic"
"time"

"cloud.google.com/go/civil"
"cloud.google.com/go/spanner"
"github.com/google/syzkaller/pkg/subsystem"
_ "github.com/google/syzkaller/pkg/subsystem/lists"
"github.com/google/uuid"
"golang.org/x/sync/errgroup"
"google.golang.org/api/iterator"
)

Expand Down Expand Up @@ -248,3 +250,75 @@ func NsDataMerged(ctx context.Context, projectID, ns string) ([]TimePeriod, []in
}
return periods, totalRows, nil
}

// DeleteGarbage removes orphaned file entries from the database.
//
// It identifies files in the "files" table that are not referenced by any entries in the "merge_history" table,
// indicating they are no longer associated with an active merge session.
//
// To avoid exceeding Spanner transaction limits, orphaned files are deleted in batches of 10,000.
// Note that in case of an error during batch deletion, some files may be deleted but not counted in the total.
//
// Returns the number of orphaned file entries successfully deleted.
func DeleteGarbage(ctx context.Context) (int64, error) {
batchSize := 10_000
client, err := NewClient(ctx, os.Getenv("GOOGLE_CLOUD_PROJECT"))
if err != nil {
return 0, fmt.Errorf("coveragedb.NewClient: %w", err)
}
defer client.Close()

iter := client.Single().Query(ctx, spanner.Statement{
SQL: `SELECT session, filepath
FROM files
WHERE NOT EXISTS (
SELECT 1
FROM merge_history
WHERE merge_history.session = files.session
)`})
defer iter.Stop()

var totalDeleted atomic.Int64
eg, _ := errgroup.WithContext(ctx)
var batch []spanner.Key
for {
row, err := iter.Next()
if err == iterator.Done {
break
}
if err != nil {
return 0, fmt.Errorf("iter.Next: %w", err)
}
var r struct {
Session string
Filepath string
}
if err = row.ToStruct(&r); err != nil {
return 0, fmt.Errorf("row.ToStruct: %w", err)
}
batch = append(batch, spanner.Key{r.Session, r.Filepath})
if len(batch) > batchSize {
goSpannerDelete(ctx, batch, eg, client, &totalDeleted)
batch = nil
}
}
goSpannerDelete(ctx, batch, eg, client, &totalDeleted)
if err = eg.Wait(); err != nil {
return 0, fmt.Errorf("spanner.Delete: %w", err)
}
return totalDeleted.Load(), nil
}

func goSpannerDelete(ctx context.Context, batch []spanner.Key, eg *errgroup.Group, client *spanner.Client,
totalDeleted *atomic.Int64) {
ks := spanner.KeySetFromKeys(batch...)
ksSize := len(batch)
eg.Go(func() error {
mutation := spanner.Delete("files", ks)
_, err := client.Apply(ctx, []*spanner.Mutation{mutation})
if err == nil {
totalDeleted.Add(int64(ksSize))
}
return err
})
}
6 changes: 6 additions & 0 deletions tools/syz-covermerger/init_db.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ CREATE TABLE
(session, filepath) );')
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
--ddl="$create_table"
echo "creating 'files' index"
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
--ddl="CREATE INDEX files_session ON files (session);"

echo "drop table 'merge_history' if exists"
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
Expand All @@ -43,6 +46,9 @@ CREATE TABLE
(namespace, repo, duration, dateto) );')
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
--ddl="$create_table"
echo "creating 'merge_history' index"
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
--ddl="CREATE INDEX merge_history_session ON merge_history (session);"

echo "drop table 'file_subsystems' if exists"
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
Expand Down
Loading