Skip to content

Commit 95ad07c

Browse files
author
Saied Kazemi
authored
Prepare traceroute-caller for supporting hop annotations (#119)
* Prepare traceroute-caller for supporting hop annotations Although this commit seems big, it does not change the behavior of traceroute-caller (hence this work was done in feature branch sandbox-saied-noop). Here's a summary of the changes: - Remove poll mode because it's not used and its package testing was intermittently failing due to race conditions. - Remove legacy debug code for scamper-daemon-with-scamper-backup. - Make changes to comply with Go style guidelines with respect to comments, error messages, etc. - Improve package testing code and coverage. - Introduce a no-op hopannotation code to be followed with an actual implementation in the next commit. The changes were tested with "go test ./... -race" and "docker-compose up" on my local workstation. * Make changes based on code review comments
1 parent 8c45dc9 commit 95ad07c

22 files changed

+450
-901
lines changed

.dockerignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
**/local

caller.go

Lines changed: 52 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,52 @@
1-
// traceroute-caller is a wrapper around the
2-
// `scamper` commands and can be invoked in two different poll and
3-
// listen modes:
1+
// traceroute-caller is a wrapper around scamper, a tool that actively
2+
// probes the Internet in order to analyze topology and performance.
3+
// For details, visit https://www.caida.org/catalog/software/scamper.
44
//
5-
// - Poll mode uses the `connectionpoller` package to get a complete list
6-
// of all connections by executing `/bin/ss -e -n` every 5 seconds
7-
// and running a traceroute on all closed connections. This mode is
8-
// mostly for local test and debugging purposes as it doesn't require
9-
// any services such as `tcp-info` or `uuid-annotator`.
10-
//
11-
// - Listen mode uses the `tcp-info/eventsocket` package to listen for
12-
// open and close connection events, and runs a traceroute measurement
13-
// on closed connections.
14-
//
15-
// traceroute-caller on M-Lab servers always runs in the listen mode.
16-
// To see all available flags:
17-
//
18-
// $ go build
19-
// $ ./traceroute-caller --help
5+
// traceroute-caller uses the tcp-info/eventsocket package to listen for
6+
// open and close connection events, and runs a traceroute measurement
7+
// on closed connections.
208
package main
219

2210
import (
2311
"context"
12+
"errors"
2413
"flag"
2514
"log"
2615
"os"
2716
"sync"
2817
"time"
2918

3019
"github.com/m-lab/traceroute-caller/connection"
20+
"github.com/m-lab/traceroute-caller/connectionlistener"
21+
"github.com/m-lab/traceroute-caller/hopannotation"
22+
"github.com/m-lab/traceroute-caller/ipcache"
3123
"github.com/m-lab/traceroute-caller/tracer"
3224

3325
"github.com/m-lab/go/flagx"
26+
"github.com/m-lab/go/prometheusx"
3427
"github.com/m-lab/go/rtx"
3528
"github.com/m-lab/tcp-info/eventsocket"
36-
37-
"github.com/m-lab/go/prometheusx"
38-
"github.com/m-lab/traceroute-caller/connectionlistener"
39-
"github.com/m-lab/traceroute-caller/connectionpoller"
40-
"github.com/m-lab/traceroute-caller/ipcache"
29+
"github.com/m-lab/uuid-annotator/ipservice"
4130
)
4231

4332
var (
44-
// TODO: scamper and its commands (e.g., tracelb) support a
45-
// relatively large number of flags. Instead of adding these
46-
// flags one by one to traceroute-caller flags, going forward
47-
// it's much better to have traceroute-caller read a configuration
48-
// file in textproto format that would support all scamper and
49-
// its command flags.
50-
scamperBin = flag.String("scamper.bin", "scamper", "The path to the scamper binary.")
51-
scattachBin = flag.String("scamper.sc_attach", "sc_attach", "The path to the sc_attach binary.")
52-
scwarts2jsonBin = flag.String("scamper.sc_warts2json", "sc_warts2json", "The path to the sc_warts2json binary.")
53-
scamperCtrlSocket = flag.String("scamper.unixsocket", "/tmp/scamperctrl", "The name of the UNIX-domain socket that the scamper daemon should listen on.")
54-
scamperTimeout = flag.Duration("scamper.timeout", 900*time.Second, "How long to wait to complete a scamper trace.")
55-
scamperPTR = flag.Bool("scamper.tracelb-ptr", true, "Look up DNS pointer records for IP addresses.")
56-
scamperWaitProbe = flag.Int("scamper.tracelb-W", 25, "How long to wait between probes in 1/100ths of seconds (min 15, max 200).")
57-
outputPath = flag.String("outputPath", "/var/spool/scamper", "The path of output.")
58-
waitTime = flag.Duration("waitTime", 5*time.Second, "How long to wait between subsequent listings of open connections.")
59-
poll = flag.Bool("poll", true, "Whether the polling method should be used to see new connections.")
60-
tracerType = flagx.Enum{
61-
Options: []string{"scamper", "scamper-daemon", "scamper-daemon-with-scamper-backup"},
33+
// TODO(SaiedKazemi): scamper and its commands (e.g., tracelb)
34+
// support a large number of flags. Instead of adding
35+
// these flags one by one to traceroute-caller flags, it
36+
// will be much better to have traceroute-caller read a
37+
// configuration file in textproto format that would support
38+
// all scamper and its command flags.
39+
scamperBin = flag.String("scamper.bin", "scamper", "The path to the scamper binary.")
40+
scattachBin = flag.String("scamper.sc_attach", "sc_attach", "The path to the sc_attach binary.")
41+
scwarts2jsonBin = flag.String("scamper.sc_warts2json", "sc_warts2json", "The path to the sc_warts2json binary.")
42+
scamperCtrlSocket = flag.String("scamper.unixsocket", "/tmp/scamperctrl", "The name of the UNIX-domain socket that the scamper daemon should listen on.")
43+
scamperTimeout = flag.Duration("scamper.timeout", 900*time.Second, "How long to wait to complete a scamper trace.")
44+
scamperPTR = flag.Bool("scamper.tracelb-ptr", true, "Look up DNS pointer records for IP addresses.")
45+
scamperWaitProbe = flag.Int("scamper.tracelb-W", 25, "How long to wait between probes in 1/100ths of seconds (min 15, max 200).")
46+
tracerouteOutput = flag.String("traceroute-output", "/var/spool/scamper", "The path to store traceroute output.")
47+
hopAnnotationOutput = flag.String("hopannotation-output", "/var/spool/hopannotation1", "The path to store hop annotations.")
48+
tracerType = flagx.Enum{
49+
Options: []string{"scamper", "scamper-daemon"},
6250
Value: "scamper",
6351
}
6452

@@ -75,18 +63,26 @@ func main() {
7563
log.SetFlags(log.LstdFlags | log.Lshortfile)
7664

7765
flag.Parse()
78-
rtx.Must(flagx.ArgsFromEnv(flag.CommandLine), "Could not get args from environment")
79-
rtx.Must(os.MkdirAll(*outputPath, 0777), "Could not create data directory")
66+
rtx.Must(flagx.ArgsFromEnv(flag.CommandLine), "failed to get args from environment")
67+
if *eventsocket.Filename == "" {
68+
logFatal("tcpinfo.eventsocket was set to \"\"")
69+
}
70+
rtx.Must(os.MkdirAll(*tracerouteOutput, 0777), "failed to create directory for traceroute results")
71+
rtx.Must(os.MkdirAll(*hopAnnotationOutput, 0777), "failed to create directory for hop annotation results")
8072

8173
defer cancel()
8274
wg := sync.WaitGroup{}
8375

8476
promSrv := prometheusx.MustServeMetrics()
85-
defer promSrv.Shutdown(ctx)
77+
defer func() {
78+
if err := promSrv.Shutdown(ctx); err != nil && !errors.Is(err, context.Canceled) {
79+
log.Printf("failed to shut down Prometheus server (error: %v)", err)
80+
}
81+
}()
8682

8783
scamper := &tracer.Scamper{
8884
Binary: *scamperBin,
89-
OutputPath: *outputPath,
85+
OutputPath: *tracerouteOutput,
9086
ScamperTimeout: *scamperTimeout,
9187
TracelbPTR: *scamperPTR,
9288
TracelbWaitProbe: *scamperWaitProbe,
@@ -98,58 +94,28 @@ func main() {
9894
ControlSocket: *scamperCtrlSocket,
9995
}
10096

101-
var cache *ipcache.RecentIPCache
102-
103-
// Set up the cache three different ways, depending on the trace method requested.
97+
// Set up the ipCache depending on the trace method requested.
98+
var ipCache *ipcache.RecentIPCache
10499
switch tracerType.Value {
105100
case "scamper":
106-
cache = ipcache.New(ctx, scamper, *ipcache.IPCacheTimeout, *ipcache.IPCacheUpdatePeriod)
101+
ipCache = ipcache.New(ctx, scamper, *ipcache.IPCacheTimeout, *ipcache.IPCacheUpdatePeriod)
107102
case "scamper-daemon":
108-
cache = ipcache.New(ctx, scamperDaemon, *ipcache.IPCacheTimeout, *ipcache.IPCacheUpdatePeriod)
103+
ipCache = ipcache.New(ctx, scamperDaemon, *ipcache.IPCacheTimeout, *ipcache.IPCacheUpdatePeriod)
109104
wg.Add(1)
110105
go func() {
111106
scamperDaemon.MustStart(ctx)
112107
// When the scamper daemon dies, cancel main() and exit.
113108
cancel()
114109
wg.Done()
115110
}()
116-
// These are hacks - the scamper daemon should not fail at all.
117-
case "scamper-daemon-with-scamper-backup":
118-
cache = ipcache.New(ctx, scamperDaemon, *ipcache.IPCacheTimeout, *ipcache.IPCacheUpdatePeriod)
119-
wg.Add(1)
120-
go func() {
121-
scamperDaemon.MustStart(ctx)
122-
// When the scamper daemon dies, switch to scamper
123-
cache.UpdateTracer(scamper)
124-
wg.Done()
125-
}()
126111
}
127112

128-
if *poll {
129-
wg.Add(1)
130-
go func(c *ipcache.RecentIPCache) {
131-
connPoller := connectionpoller.New(c)
132-
for ctx.Err() == nil {
133-
connPoller.TraceClosedConnections()
134-
135-
select {
136-
case <-time.After(*waitTime):
137-
case <-ctx.Done():
138-
}
139-
}
140-
wg.Done()
141-
}(cache)
142-
} else if *eventsocket.Filename != "" {
143-
wg.Add(1)
144-
go func() {
145-
connCreator, err := connection.NewCreator()
146-
rtx.Must(err, "Could not discover local IPs")
147-
connListener := connectionlistener.New(connCreator, cache)
148-
eventsocket.MustRun(ctx, *eventsocket.Filename, connListener)
149-
wg.Done()
150-
}()
151-
} else {
152-
logFatal("--poll was false but --tcpinfo.eventsocket was set to \"\". This is a nonsensical configuration.")
153-
}
113+
localIPs, err := connection.NewLocalRemoteIPs()
114+
rtx.Must(err, "failed to discover local IPs")
115+
ipserviceClient := ipservice.NewClient(*ipservice.SocketFilename)
116+
hopAnnotator := hopannotation.New(ctx, ipserviceClient, *hopAnnotationOutput)
117+
connListener := connectionlistener.New(localIPs, ipCache, hopAnnotator)
118+
eventsocket.MustRun(ctx, *eventsocket.Filename, connListener)
119+
cancel()
154120
wg.Wait()
155121
}

caller_test.go

Lines changed: 11 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"io/ioutil"
66
"log"
77
"os"
8-
"runtime"
98
"testing"
109
"time"
1110

@@ -23,95 +22,25 @@ func TestMetrics(t *testing.T) {
2322
promtest.LintMetrics(t)
2423
}
2524

26-
func TestMain(t *testing.T) {
27-
if runtime.GOOS != "linux" {
28-
t.Skip("Skipping for non-linux environment", runtime.GOOS)
29-
}
30-
dir, err := ioutil.TempDir("", "TestMain")
31-
rtx.Must(err, "Could not create temp dir")
32-
defer os.RemoveAll(dir)
33-
34-
// Verify that main doesn't crash, and that it does exit when the context is canceled.
35-
// TODO: verify more in this test.
36-
*prometheusx.ListenAddress = ":0"
37-
*scamperCtrlSocket = dir + "/scamper.sock"
38-
*waitTime = time.Nanosecond // Run through the loop a few times.
39-
*outputPath = dir
40-
*poll = true
41-
*scamperBin = "scamper"
42-
tracerType.Value = "scamper-daemon"
43-
ctx, cancel = context.WithCancel(context.Background())
44-
go func() {
45-
time.Sleep(1 * time.Second)
46-
cancel()
47-
}()
48-
main()
49-
}
50-
51-
func TestScamper(t *testing.T) {
52-
if runtime.GOOS != "linux" {
53-
t.Skip("Skipping for non-linux environment", runtime.GOOS)
54-
}
55-
dir, err := ioutil.TempDir("", "TestMain")
56-
rtx.Must(err, "Could not create temp dir")
57-
defer os.RemoveAll(dir)
58-
59-
// Verify that main doesn't crash, and that it does exit when the context is canceled.
60-
// TODO: verify more in this test.
61-
*prometheusx.ListenAddress = ":0"
62-
*scamperCtrlSocket = ""
63-
*waitTime = time.Nanosecond // Run through the loop a few times.
64-
*outputPath = dir
65-
*poll = true
66-
*scamperBin = "scamper"
67-
tracerType.Value = "scamper"
68-
ctx, cancel = context.WithCancel(context.Background())
69-
go func() {
70-
time.Sleep(1 * time.Second)
71-
cancel()
72-
}()
73-
main()
74-
}
75-
7625
func TestMainWithConnectionListener(t *testing.T) {
7726
dir, err := ioutil.TempDir("", "TestMainWithConnectionListener")
78-
rtx.Must(err, "Could not create temp dir")
27+
rtx.Must(err, "failed to create temp dir")
7928
defer os.RemoveAll(dir)
8029
srv := eventsocket.New(dir + "/events.sock")
81-
rtx.Must(srv.Listen(), "Could not start the empty server")
30+
rtx.Must(srv.Listen(), "failed to start the empty server")
8231

8332
*prometheusx.ListenAddress = ":0"
8433
*eventsocket.Filename = dir + "/events.sock"
85-
*outputPath = dir
86-
*poll = false
34+
*tracerouteOutput = dir
35+
*hopAnnotationOutput = dir
8736
tracerType.Value = "scamper"
8837

8938
ctx, cancel = context.WithCancel(context.Background())
90-
go srv.Serve(ctx)
91-
go func() {
92-
time.Sleep(1 * time.Second)
93-
cancel()
94-
}()
95-
main()
96-
}
97-
98-
func TestMainWithBackupScamper(t *testing.T) {
99-
dir, err := ioutil.TempDir("", "TestMainWithBackupScamper")
100-
rtx.Must(err, "Could not create temp dir")
101-
defer os.RemoveAll(dir)
102-
srv := eventsocket.New(dir + "/events.sock")
103-
rtx.Must(srv.Listen(), "Could not start the empty server")
104-
105-
*prometheusx.ListenAddress = ":0"
106-
*eventsocket.Filename = dir + "/events.sock"
107-
*outputPath = dir
108-
*poll = false
109-
*scamperCtrlSocket = dir + "/scamper.sock"
110-
*scamperBin = "false"
111-
tracerType.Value = "scamper-daemon-with-scamper-backup"
112-
113-
ctx, cancel = context.WithCancel(context.Background())
114-
go srv.Serve(ctx)
39+
go func(t *testing.T) {
40+
if err := srv.Serve(ctx); err != nil {
41+
t.Logf("failed to start eventsocket server (error: %v)", err)
42+
}
43+
}(t)
11544
go func() {
11645
time.Sleep(1 * time.Second)
11746
cancel()
@@ -122,8 +51,8 @@ func TestMainWithBackupScamper(t *testing.T) {
12251
func TestMainWithBadArgs(t *testing.T) {
12352
tracerType.Value = "scamper"
12453
*eventsocket.Filename = ""
125-
*outputPath = "/tmp/"
126-
*poll = false
54+
*tracerouteOutput = "/tmp/"
55+
*hopAnnotationOutput = "/tmp/"
12756

12857
logFatal = func(_ ...interface{}) {
12958
panic("testpanic")

connection/connection.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ type Connection struct {
2929

3030
// UUID returns uuid from cookie parsed from "ss -e" output.
3131
func (c *Connection) UUID() (string, error) {
32-
// cookie is a hexdecimal string
32+
// cookie is a hexadecimal string
3333
result, err := strconv.ParseUint(c.Cookie, 16, 64)
3434
return uuid.FromCookie(result), err
3535
}
@@ -79,9 +79,9 @@ type Creator interface {
7979
FromSockID(sockid inetdiag.SockID) (Connection, error)
8080
}
8181

82-
// NewCreator makes an object that can convert src and dst into local and remote
83-
// IPs.
84-
func NewCreator() (Creator, error) {
82+
// NewLocalRemoteIPs makes an object that can convert src and dst into local
83+
// and remote IPs.
84+
func NewLocalRemoteIPs() (Creator, error) {
8585
c := &creator{
8686
localIPs: make([]*net.IP, 0),
8787
}
@@ -108,9 +108,9 @@ func NewCreator() (Creator, error) {
108108
return c, err
109109
}
110110

111-
// NewFakeCreator makes a fake creator with hardcoded local IPs to enable
112-
// testing in diverse network environments.
113-
func NewFakeCreator(localIPs []*net.IP) Creator {
111+
// NewFakeLocalIPs makes a fake creator with hardcoded local IPs to
112+
// enable testing in diverse network environments.
113+
func NewFakeLocalIPs(localIPs []*net.IP) Creator {
114114
return &creator{
115115
localIPs: localIPs,
116116
}

0 commit comments

Comments
 (0)