Skip to content

Commit 4c368d4

Browse files
authored
Remove paris traceroute and add latency metric (#103)
* remove paris options * add latency histogram * add comment about cache lifetime * add comment about log spaced bins
1 parent 1074d92 commit 4c368d4

File tree

7 files changed

+51
-235
lines changed

7 files changed

+51
-235
lines changed

caller.go

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// traceroute-caller is a wrapper around the `paris-traceroute` and
1+
// traceroute-caller is a wrapper around the
22
// `scamper` commands and can be invoked in two different poll and
33
// listen modes:
44
//
@@ -45,14 +45,12 @@ var (
4545
scattachBin = flag.String("scamper.sc_attach", "sc_attach", "The path to the sc_attach binary.")
4646
scwarts2jsonBin = flag.String("scamper.sc_warts2json", "sc_warts2json", "The path to the sc_warts2json binary.")
4747
scamperCtrlSocket = flag.String("scamper.unixsocket", "/tmp/scamperctrl", "The name of the UNIX-domain socket that the scamper daemon should listen on")
48-
scamperTimeout = flag.Duration("scamper.timeout", 300*time.Second, "how long to wait to complete a scamper trace.")
49-
parisBin = flag.String("paris.bin", "paris-traceroute", "The path to the paris-traceroute binary.")
50-
parisTimeout = flag.Duration("paris.timeout", 60*time.Second, "how long to wait to complete a paris-traceroute trace.")
48+
scamperTimeout = flag.Duration("scamper.timeout", 900*time.Second, "how long to wait to complete a scamper trace.")
5149
outputPath = flag.String("outputPath", "/var/spool/scamper", "path of output")
5250
waitTime = flag.Duration("waitTime", 5*time.Second, "how long to wait between subsequent listings of open connections")
5351
poll = flag.Bool("poll", true, "Whether the polling method should be used to see new connections.")
5452
tracerType = flagx.Enum{
55-
Options: []string{"paris-traceroute", "scamper", "scamper-daemon", "scamper-daemon-with-paris-backup", "scamper-daemon-with-scamper-backup"},
53+
Options: []string{"scamper", "scamper-daemon", "scamper-daemon-with-scamper-backup"},
5654
Value: "scamper",
5755
}
5856

@@ -62,7 +60,7 @@ var (
6260
)
6361

6462
func init() {
65-
flag.Var(&tracerType, "tracetool", "Choose whether paris-traceroute or scamper should be used.")
63+
flag.Var(&tracerType, "tracetool", "Choose whether scamper or scamper-daemon should be used.")
6664
}
6765

6866
func main() {
@@ -89,18 +87,11 @@ func main() {
8987
Warts2JSONBinary: *scwarts2jsonBin,
9088
ControlSocket: *scamperCtrlSocket,
9189
}
92-
parisTracer := &tracer.Paris{
93-
Binary: *parisBin,
94-
OutputPath: *outputPath,
95-
Timeout: *parisTimeout,
96-
}
9790

9891
var cache *ipcache.RecentIPCache
9992

10093
// Set up the cache three different ways, depending on the trace method requested.
10194
switch tracerType.Value {
102-
case "paris-traceroute":
103-
cache = ipcache.New(ctx, parisTracer, *ipcache.IPCacheTimeout, *ipcache.IPCacheUpdatePeriod)
10495
case "scamper":
10596
cache = ipcache.New(ctx, scamper, *ipcache.IPCacheTimeout, *ipcache.IPCacheUpdatePeriod)
10697
case "scamper-daemon":
@@ -122,15 +113,6 @@ func main() {
122113
cache.UpdateTracer(scamper)
123114
wg.Done()
124115
}()
125-
case "scamper-daemon-with-paris-backup":
126-
cache = ipcache.New(ctx, scamperDaemon, *ipcache.IPCacheTimeout, *ipcache.IPCacheUpdatePeriod)
127-
wg.Add(1)
128-
go func() {
129-
scamperDaemon.MustStart(ctx)
130-
// When the scamper daemon dies, switch to paris-traceroute.
131-
cache.UpdateTracer(parisTracer)
132-
wg.Done()
133-
}()
134116
}
135117

136118
if *poll {

caller_test.go

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ func TestMainWithConnectionListener(t *testing.T) {
7373
*eventsocket.Filename = dir + "/events.sock"
7474
*outputPath = dir
7575
*poll = false
76-
tracerType.Value = "paris-traceroute"
76+
tracerType.Value = "scamper"
7777

7878
ctx, cancel = context.WithCancel(context.Background())
7979
go srv.Serve(ctx)
@@ -108,32 +108,8 @@ func TestMainWithBackupScamper(t *testing.T) {
108108
main()
109109
}
110110

111-
func TestMainWithBackupPT(t *testing.T) {
112-
dir, err := ioutil.TempDir("", "TestMainWithBackupPT")
113-
rtx.Must(err, "Could not create temp dir")
114-
defer os.RemoveAll(dir)
115-
srv := eventsocket.New(dir + "/events.sock")
116-
rtx.Must(srv.Listen(), "Could not start the empty server")
117-
118-
*prometheusx.ListenAddress = ":0"
119-
*eventsocket.Filename = dir + "/events.sock"
120-
*outputPath = dir
121-
*poll = false
122-
*scamperCtrlSocket = dir + "/scamper.sock"
123-
*scamperBin = "false"
124-
tracerType.Value = "scamper-daemon-with-paris-backup"
125-
126-
ctx, cancel = context.WithCancel(context.Background())
127-
go srv.Serve(ctx)
128-
go func() {
129-
time.Sleep(1 * time.Second)
130-
cancel()
131-
}()
132-
main()
133-
}
134-
135111
func TestMainWithBadArgs(t *testing.T) {
136-
tracerType.Value = "paris-traceroute"
112+
tracerType.Value = "scamper"
137113
*eventsocket.Filename = ""
138114
*outputPath = "/tmp/"
139115
*poll = false

ipcache/ipcache.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import (
1212

1313
var (
1414
// IPCacheTimeout sets a lower bound on the amount of time between subsequent traceroutes to a single IP address.
15+
// Since traces typically take 5 to 10 minutes, this cache timeout should probably be adjusted to something
16+
// more like 15 minutes, or perhaps even an hour.
1517
IPCacheTimeout = flag.Duration("IPCacheTimeout", 120*time.Second, "Timeout duration in seconds for IPCache")
1618

1719
// IPCacheUpdatePeriod determines how long to wait between cache-scrubbing attempts.
@@ -91,8 +93,7 @@ func (rc *RecentIPCache) GetCacheLength() int {
9193
return len(rc.cache)
9294
}
9395

94-
// UpdateTracer switches the Tracer being used. This allows us to dynamically
95-
// switch between scamper and paris-traceroute.
96+
// UpdateTracer switches the Tracer being used.
9697
func (rc *RecentIPCache) UpdateTracer(t Tracer) {
9798
rc.mu.Lock()
9899
defer rc.mu.Unlock()
@@ -117,6 +118,10 @@ func New(ctx context.Context, trace Tracer, ipCacheTimeout, ipCacheUpdatePeriod
117118
m.mu.Lock()
118119
for k, v := range m.cache {
119120
if now.Sub(v.timeStamp) > ipCacheTimeout {
121+
// Note that if there is a trace in progress, the events
122+
// waiting for it to complete will still get the result
123+
// and save it. But this allows a new trace to be started
124+
// on the same IP address.
120125
delete(m.cache, k)
121126
}
122127
}

tracer/paris.go

Lines changed: 0 additions & 89 deletions
This file was deleted.

tracer/paris_test.go

Lines changed: 0 additions & 93 deletions
This file was deleted.

tracer/scamper.go

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,21 +91,31 @@ func (s *Scamper) trace(conn connection.Connection, t time.Time) (string, error)
9191
log.Println("Starting a trace to be put in", filename)
9292
buff := bytes.Buffer{}
9393

94-
_, err = buff.WriteString(GetMetaline(conn, false, ""))
95-
// XXX Should not use panic recovery. Convert these to errors.
94+
// WriteString never errors, but may panic on OOM
95+
_, _ = buff.WriteString(GetMetaline(conn, false, ""))
96+
// TODO Should not use panic recovery. Convert these to errors.
9697
rtx.PanicOnError(err, "Could not write to buffer")
9798

9899
cmd := pipe.Line(
99100
pipe.Exec(s.Binary, "-I", "tracelb -P icmp-echo -q 3 -O ptr "+conn.RemoteIP, "-o-", "-O", "json"),
100101
pipe.Write(&buff),
101102
)
103+
start := time.Now()
102104
err = pipe.RunTimeout(cmd, s.ScamperTimeout)
105+
latency := time.Since(start).Seconds()
106+
if err != nil {
107+
traceTimeHistogram.WithLabelValues("error").Observe(latency)
108+
} else {
109+
traceTimeHistogram.WithLabelValues("success").Observe(latency)
110+
}
111+
103112
tracesPerformed.WithLabelValues("scamper").Inc()
104113
if err != nil && err.Error() == pipe.ErrTimeout.Error() {
105114
log.Println("Trace timed out: ", cmd)
106115
return "", err
107116
}
108117

118+
// TODO - should we really panic here?
109119
rtx.PanicOnError(err, "Command %v failed", cmd)
110120
rtx.PanicOnError(ioutil.WriteFile(filename, buff.Bytes(), 0666), "Could not save output to file")
111121
return buff.String(), nil
@@ -213,10 +223,18 @@ func (d *ScamperDaemon) trace(conn connection.Connection, t time.Time) (string,
213223
pipe.Exec(d.Warts2JSONBinary),
214224
pipe.Write(&buff),
215225
)
226+
start := time.Now()
216227
err = pipe.RunTimeout(cmd, d.ScamperTimeout)
228+
latency := time.Since(start).Seconds()
217229
tracesPerformed.WithLabelValues("scamper-daemon").Inc()
230+
if err != nil {
231+
traceTimeHistogram.WithLabelValues("error").Observe(latency)
232+
} else {
233+
traceTimeHistogram.WithLabelValues("success").Observe(latency)
234+
}
235+
218236
if err != nil && err.Error() == pipe.ErrTimeout.Error() {
219-
log.Println("TimeOut for Trace: ", cmd)
237+
log.Println("Trace timed out: ", cmd)
220238
return "", err
221239
}
222240

0 commit comments

Comments
 (0)