Skip to content

feat(cel-shed): datastore: erase samples #4146

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions cmd/cel-shed/datastore.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package main

import (
"context"
"errors"
"fmt"

"github.com/ipfs/boxo/blockstore"
ds "github.com/ipfs/go-datastore"
"github.com/ipfs/go-datastore/namespace"
dsq "github.com/ipfs/go-datastore/query"
"github.com/spf13/cobra"
"golang.org/x/sync/errgroup"

"github.com/celestiaorg/celestia-node/nodebuilder"
)

func init() {
datastoreCmd.AddCommand(eraseCmd, eraseSamplesCmd)
}

var datastoreCmd = &cobra.Command{
Use: "datastore [subcommand]",
Short: "Collection of datastore related utilities",
}

var eraseCmd = &cobra.Command{
Use: "erase <ds_key>",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why need the discover stopped? Why can't they pass either --node.store or --p2p.network to make it more explicit and versatile cmd?

Copy link
Member Author

@Wondertan Wondertan Mar 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should have both, so you can be lazy by not specifying those, but also can override if use non default path

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I personally have multiple types of nodes for multiple networks. Would appreciate an option to specify network name + type of the node in this command. And only network for command of erasing samples

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's TBD

Short: "Erase datastore namespace",
RunE: func(cmd *cobra.Command, args []string) error {
path, err := nodebuilder.DiscoverStopped()
if err != nil {
return fmt.Errorf("discovering stopped node: %w", err)
}
fmt.Printf("Discovered stopped node at %s\n", path)

nodestore, err := nodebuilder.OpenStore(path, nil)
if err != nil {
return err
}
defer func() {
err = errors.Join(err, nodestore.Close())
}()

store, err := nodestore.Datastore()
if err != nil {
return fmt.Errorf("getting datastore: %w", err)
}

key := ds.NewKey(args[0])
err = eraseDatastoreNamespace(cmd.Context(), store, key)
if err != nil {
return fmt.Errorf("erasing datastore namespace: %w", err)
}

fmt.Printf("Erased %s\n", key)
return nil
},
Args: cobra.ExactArgs(1),
}

var sampleDataKeys = []ds.Key{
ds.NewKey("sampling_result"),
ds.NewKey("das"),
ds.NewKey("pruner"),
Comment on lines +63 to +65
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: Make those importable

blockstore.BlockPrefix,
}

var eraseSamplesCmd = &cobra.Command{
Use: "erase-samples [subcommand]",
Short: "Erase samples data and state. Useful to resample, avoiding resyncing headers",
RunE: func(cmd *cobra.Command, _ []string) error {
path, err := nodebuilder.DiscoverStopped()
if err != nil {
return fmt.Errorf("discovering stopped node: %w", err)
}
fmt.Printf("Discovered stopped node at %s\n", path)

nodestore, err := nodebuilder.OpenStore(path, nil)
if err != nil {
return err
}
defer func() {
err = errors.Join(err, nodestore.Close())
}()

ds, err := nodestore.Datastore()
if err != nil {
return fmt.Errorf("getting datastore: %w", err)
}

group, ctx := errgroup.WithContext(cmd.Context())
for _, key := range sampleDataKeys {
group.Go(func() error {
err := eraseDatastoreNamespace(ctx, ds, key)
if err != nil {
return fmt.Errorf("erasing datastore namespace: %w", err)
}
fmt.Printf("Erased %s\n", key)
return nil
})
}

return group.Wait()
},
}

func eraseDatastoreNamespace(ctx context.Context, store ds.Datastore, key ds.Key) error {
store = namespace.Wrap(store, key)

q := dsq.Query{KeysOnly: true}
res, err := store.Query(ctx, q)
if err != nil {
return fmt.Errorf("querying datastore: %w", err)
}

for {
e, ok := res.NextSync()
if !ok {
break
}
if e.Error != nil {
return fmt.Errorf("getting next key: %w", e.Error)
}

key := ds.RawKey(e.Key)
err := store.Delete(ctx, key)
if err != nil {
return fmt.Errorf("deleting key: %w", err)
}
}

return nil
}
2 changes: 1 addition & 1 deletion cmd/cel-shed/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
)

func init() {
rootCmd.AddCommand(p2pCmd, headerCmd, edsStoreCmd, shwapCmd)
rootCmd.AddCommand(p2pCmd, headerCmd, edsStoreCmd, shwapCmd, datastoreCmd)
}

var rootCmd = &cobra.Command{
Expand Down
27 changes: 27 additions & 0 deletions nodebuilder/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,33 @@ type fsStore struct {
dirLock *flock.Flock // protects directory
}

// DiscoverStopped finds a path of an initialized store of a stopped Node and returns its path.
// If multiple store exists, it only returns the path of the first found.
// Network is favored over node type.
//
// Network preference order: Mainnet, Mocha, Arabica, Private, Custom
// Type preference order: Bridge, Full, Light
func DiscoverStopped() (string, error) {
defaultNetwork := p2p.GetNetworks()
nodeTypes := nodemod.GetTypes()

for _, n := range defaultNetwork {
for _, tp := range nodeTypes {
path, err := DefaultNodeStorePath(tp, n)
if err != nil {
return "", err
}

ok, _ := IsOpened(path)
if !ok && IsInit(path) {
return path, nil
}
}
}

return "", ErrNotInited
}

// DiscoverOpened finds a path of an opened Node Store and returns its path.
// If multiple nodes are running, it only returns the path of the first found node.
// Network is favored over node type.
Expand Down
Loading