Skip to content

Commit 3450269

Browse files
committed
dashboard/app: periodically remove coverage garbage from spanner
1 parent 52b38cc commit 3450269

File tree

4 files changed

+84
-1
lines changed

4 files changed

+84
-1
lines changed

dashboard/app/batch_coverage.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,13 @@ import (
77
"context"
88
"fmt"
99
"net/http"
10+
"os"
1011
"strconv"
1112

1213
"cloud.google.com/go/batch/apiv1/batchpb"
1314
"cloud.google.com/go/bigquery"
1415
"cloud.google.com/go/civil"
16+
"cloud.google.com/go/spanner"
1517
"github.com/google/syzkaller/pkg/coveragedb"
1618
"google.golang.org/api/iterator"
1719
"google.golang.org/appengine/v2"
@@ -151,3 +153,71 @@ func nsDataAvailable(ctx context.Context, ns string) ([]coveragedb.TimePeriod, [
151153
}
152154
return periods, recordsCount, nil
153155
}
156+
157+
// Abandoned DB cleanup is expensive.
158+
// Every namespace creates min 3 sessions every day (yesterday, today, this month).
159+
// It means every day we deprecate 3 sessions records.
160+
// Every session is ~10k file records. Plus some mutations in the index.
161+
// One transaction allows to delete up to 80k rows.
162+
// To clean up everything once/week we have to garbage 7 * 3 * count(namespaces) at least.
163+
func handleBatchCoverageClean(w http.ResponseWriter, r *http.Request) {
164+
paramSessions := r.FormValue("sessions")
165+
if paramSessions == "" {
166+
paramSessions = "3" // one day, one namespace means 3 records
167+
}
168+
maxSessionsToDel, err := strconv.Atoi(r.FormValue("sessions"))
169+
if err != nil {
170+
w.WriteHeader(http.StatusBadRequest)
171+
w.Write([]byte("failed to parse 'sessions', integer expected"))
172+
return
173+
}
174+
ctx := context.Background()
175+
for i := 0; i < maxSessionsToDel; i++ {
176+
deletedRows, err := deleteGarbageSession(ctx)
177+
if err != nil {
178+
errMsg := fmt.Sprintf("failed to cleanCoverageDB: %s", err.Error())
179+
log.Errorf(ctx, errMsg)
180+
w.Write([]byte(errMsg))
181+
return
182+
}
183+
w.Write([]byte(fmt.Sprintf("deleteGarbageSession -> -%d rows\n", deletedRows)))
184+
}
185+
}
186+
187+
// deleteGarbageSession generates approximately 20k mutations (10k in files and 10k in index)
188+
// Spanner limit for every transaction is 80k mutations.
189+
// It means we can delete up to 4 sessions data at once.
190+
// Let's keep it simple and delete only 1 session at once.
191+
//
192+
// deleteGarbageSession returns the deleted rows count.
193+
// (0, nil) means there is no data to delete.
194+
func deleteGarbageSession(ctx context.Context) (int64, error) {
195+
client, err := coveragedb.NewClient(ctx, os.Getenv("GOOGLE_CLOUD_PROJECT"))
196+
if err != nil {
197+
return 0, fmt.Errorf("failed to coveragedb.NewClient: %w", err)
198+
}
199+
defer client.Close()
200+
var rowCount int64
201+
202+
_, err = client.ReadWriteTransaction(ctx, func(ctx context.Context, txn *spanner.ReadWriteTransaction) error {
203+
stmt := spanner.Statement{
204+
SQL: ` delete from files
205+
where files.session in (
206+
select
207+
distinct(files.session)
208+
from files
209+
left join merge_history
210+
on files.session = merge_history.session
211+
where merge_history.session is NULL
212+
limit 1
213+
);`,
214+
}
215+
var err error
216+
rowCount, err = txn.Update(ctx, stmt)
217+
if err != nil {
218+
return fmt.Errorf("txn.Update: %w", err)
219+
}
220+
return nil
221+
})
222+
return rowCount, err
223+
}

dashboard/app/batch_main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
func initBatchProcessors() {
1919
http.HandleFunc("/cron/batch_coverage", handleBatchCoverage)
2020
http.HandleFunc("/cron/batch_db_export", handleBatchDBExport)
21+
http.HandleFunc("/cron/batch_coverage_clean", handleBatchCoverageClean)
2122
}
2223

2324
// from https://cloud.google.com/batch/docs/samples/batch-create-script-job

dashboard/app/cron.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,13 @@ cron:
2424
schedule: every sunday 00:00
2525
# Update other coverage numbers every day.
2626
- url: /cron/batch_coverage?days=true&months=true&steps=10
27-
schedule: every 24 hours
27+
schedule: every day 00:00
28+
# Clean up coverage db every week.
29+
# We're adding data w/o transactions.
30+
# It is important to run clean operation when there are no batch_coverage in progress.
31+
# Let's delete max 7 days * 3 namespaces * 3 new_sessions/day * 2 to_remove old too
32+
- url: /cron/batch_coverage_clean?sessions=126
33+
schedule: every saturday 12:00
2834
# Export reproducers every week.
2935
- url: /cron/batch_db_export
3036
schedule: every saturday 00:00

tools/syz-covermerger/init_db.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ CREATE TABLE
2323
(session, filepath) );')
2424
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
2525
--ddl="$create_table"
26+
echo "creating 'files' index"
27+
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
28+
--ddl="CREATE INDEX files_session ON files (session);"
2629

2730
echo "drop table 'merge_history' if exists"
2831
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
@@ -43,6 +46,9 @@ CREATE TABLE
4346
(namespace, repo, duration, dateto) );')
4447
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
4548
--ddl="$create_table"
49+
echo "creating 'merge_history' index"
50+
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
51+
--ddl="CREATE INDEX merge_history_session ON merge_history (session);"
4652

4753
echo "drop table 'file_subsystems' if exists"
4854
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \

0 commit comments

Comments
 (0)