Skip to content

Commit 1077fbb

Browse files
Add purge-datatype utility (#23)
* Add purge-datatype utility * Add dryrun warning
1 parent 02f9a78 commit 1077fbb

File tree

1 file changed

+100
-0
lines changed

1 file changed

+100
-0
lines changed

cmd/purge-datatype/main.go

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"flag"
6+
"fmt"
7+
"log"
8+
"strings"
9+
10+
"github.com/m-lab/go/flagx"
11+
"github.com/m-lab/go/storagex"
12+
13+
"cloud.google.com/go/bigquery"
14+
"cloud.google.com/go/storage"
15+
)
16+
17+
var (
18+
datatypes = flagx.StringArray{}
19+
project string
20+
dryrun bool
21+
)
22+
23+
func init() {
24+
flag.StringVar(&project, "project", "mlab-sandbox", "Operate on the given project.")
25+
flag.BoolVar(&dryrun, "dryrun", true, "Take no action.")
26+
flag.Var(&datatypes, "datatype", "The experiment/datatype to delete from GCS and BQ.")
27+
}
28+
29+
func main() {
30+
flag.Parse()
31+
flagx.ArgsFromEnv(flag.CommandLine)
32+
33+
ctx := context.Background()
34+
sclient, err := storage.NewClient(ctx)
35+
if err != nil {
36+
panic(err)
37+
}
38+
defer sclient.Close()
39+
40+
bqclient, err := bigquery.NewClient(ctx, project)
41+
if err != nil {
42+
panic(err)
43+
}
44+
defer bqclient.Close()
45+
46+
if dryrun {
47+
log.Println("NOTE:")
48+
log.Println("NOTE: dryrun mode! Use -dryrun=false to delete data.")
49+
log.Println("NOTE:")
50+
}
51+
52+
for _, dt := range datatypes {
53+
fields := strings.Split(dt, "/")
54+
if len(fields) != 2 {
55+
log.Printf("wrong datatype format; skipping %q", dt)
56+
continue
57+
}
58+
log.Printf("Removing: %s", dt)
59+
exp, datatype := fields[0], fields[1]
60+
b := storagex.NewBucket(sclient.Bucket("pusher-" + project))
61+
deleteObjects(ctx, b, fmt.Sprintf("autoload/v1/tables/%s/%s.table.json", exp, datatype))
62+
deleteObjects(ctx, b, fmt.Sprintf("autoload/v1/%s/%s", exp, datatype))
63+
64+
b = storagex.NewBucket(sclient.Bucket("archive-" + project))
65+
deleteObjects(ctx, b, fmt.Sprintf("autoload/v1/tables/%s/%s.table.json", exp, datatype))
66+
deleteObjects(ctx, b, fmt.Sprintf("autoload/v1/%s/%s", exp, datatype))
67+
68+
deleteTable(ctx, bqclient, "raw_"+exp, datatype)
69+
}
70+
71+
if !dryrun {
72+
log.Println("WARNING:")
73+
log.Println("WARNING: active storage transfer jobs may recreate files just removed from the archive bucket")
74+
log.Println("WARNING:")
75+
}
76+
}
77+
78+
func deleteObjects(ctx context.Context, bucket *storagex.Bucket, path string) error {
79+
attrs, err := bucket.Attrs(ctx)
80+
if err != nil {
81+
return err
82+
}
83+
log.Println("GCS", attrs.Name)
84+
return bucket.Walk(ctx, path, func(o *storagex.Object) error {
85+
log.Println("\tdelete:", o.ObjectName())
86+
if dryrun {
87+
return nil
88+
}
89+
return o.Delete(ctx)
90+
})
91+
}
92+
func deleteTable(ctx context.Context, client *bigquery.Client, dataset, table string) error {
93+
t := client.Dataset(dataset).Table(table)
94+
log.Println("BigQuery", client.Project())
95+
log.Println("\tdelete:", t.DatasetID, t.TableID)
96+
if dryrun {
97+
return nil
98+
}
99+
return t.Delete(ctx)
100+
}

0 commit comments

Comments
 (0)