Skip to content

Commit 54246ad

Browse files
NormalizeIP for traceroute parser (#1006)
* Rename FixIPv6 for web100 ::: bug * Move NormalizeIP to parser package * Use NormalizeIP for sidestream parser * NormalizeIP for traceroute source and destination
1 parent 02f73ef commit 54246ad

File tree

7 files changed

+72
-56
lines changed

7 files changed

+72
-56
lines changed

parser/parser.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"encoding/base64"
88
"fmt"
99
"log"
10+
"net"
1011
"reflect"
1112
"time"
1213

@@ -17,6 +18,7 @@ import (
1718
"github.com/m-lab/etl/etl"
1819
"github.com/m-lab/etl/metrics"
1920
"github.com/m-lab/etl/row"
21+
"github.com/m-lab/etl/web100"
2022
)
2123

2224
func init() {
@@ -61,6 +63,21 @@ func GitCommit() string {
6163
return gParserGitCommit
6264
}
6365

66+
// NormalizeIP accepts an IPv4 or IPv6 address and returns a normalized version
67+
// of that string. This should be used to fix malformed IPv6 addresses in web100
68+
// datasets (e.g. 2001:::abcd:2) as well as IPv4-mapped IPv6 addresses (e.g. ::ffff:1.2.3.4).
69+
func NormalizeIP(ip string) string {
70+
r, err := web100.FixIPv6(ip)
71+
if err != nil {
72+
return ip
73+
}
74+
n := net.ParseIP(r)
75+
if n == nil {
76+
return r
77+
}
78+
return n.String()
79+
}
80+
6481
// NewSinkParser creates an appropriate parser for a given data type.
6582
// Eventually all datatypes will use this instead of NewParser.
6683
func NewSinkParser(dt etl.DataType, sink row.Sink, table string, ann api.Annotator) etl.Parser {

parser/parser_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,52 @@ func (ti *countingInserter) Flush() error {
4848
return nil
4949
}
5050

51+
func TestNormalizeIP(t *testing.T) {
52+
tests := []struct {
53+
name string
54+
ip string
55+
want string
56+
}{
57+
{
58+
name: "success-noop-ipv4",
59+
ip: "1.2.3.4",
60+
want: "1.2.3.4",
61+
},
62+
{
63+
name: "success-noop-ipv6",
64+
ip: "1:2:3::4",
65+
want: "1:2:3::4",
66+
},
67+
{
68+
name: "success-:::-ipv6",
69+
ip: "1:2:3:::4", // triple-colon format from web100.
70+
want: "1:2:3::4",
71+
},
72+
{
73+
name: "badformat-preserved-::::-ipv6",
74+
ip: "1:2:3::::4", // quad-colon format error, not normalized.
75+
want: "1:2:3::::4",
76+
},
77+
{
78+
name: "badformat-preserved-corrupt",
79+
ip: "1-2-3-4", // this is not an IP, but b/c it can't be fixed, it's preserved.
80+
want: "1-2-3-4",
81+
},
82+
{
83+
name: "success-ipv6-mapped-ipv4",
84+
ip: "::ffff:1.2.3.4", // quad-colon format error, not normalized.
85+
want: "1.2.3.4",
86+
},
87+
}
88+
for _, tt := range tests {
89+
t.Run(tt.name, func(t *testing.T) {
90+
if got := parser.NormalizeIP(tt.ip); got != tt.want {
91+
t.Errorf("NormalizeIP() = %v, want %v", got, tt.want)
92+
}
93+
})
94+
}
95+
}
96+
5197
//------------------------------------------------------------------------------------
5298
// TestParser ignores the content, returns a MapSaver containing meta data and
5399
// "testname":"..."

parser/pt.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -304,8 +304,8 @@ func ParseJSONL(testName string, rawContent []byte, tableName string, taskFilena
304304
StartTime: int64(cycleStart.Start_time),
305305
StopTime: int64(cycleStop.Stop_time),
306306
ScamperVersion: tracelb.Version,
307-
Source: schema.ServerInfo{IP: tracelb.Src},
308-
Destination: schema.ClientInfo{IP: tracelb.Dst},
307+
Source: schema.ServerInfo{IP: NormalizeIP(tracelb.Src)},
308+
Destination: schema.ClientInfo{IP: NormalizeIP(tracelb.Dst)},
309309
ProbeSize: int64(tracelb.Probe_size),
310310
ProbeC: int64(tracelb.Probec),
311311
Hop: hops,

parser/pt_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,11 @@ func TestParseJSONLSimple(t *testing.T) {
100100
if got.UUID != wantUUID {
101101
t.Fatalf("failed to parse UUID, wanted %q, got %q", wantUUID, got.UUID)
102102
}
103-
wantSourceIP := "::ffff:180.87.97.101"
103+
wantSourceIP := "180.87.97.101"
104104
if got.Source.IP != wantSourceIP {
105105
t.Fatalf("failed to parse source IP, wanted %q, got %q", wantSourceIP, got.Source.IP)
106106
}
107-
wantDestinationIP := "::ffff:1.47.236.62"
107+
wantDestinationIP := "1.47.236.62"
108108
if got.Destination.IP != wantDestinationIP {
109109
t.Fatalf("failed to parse destination IP, wanted %q, got %q", wantDestinationIP, got.Destination.IP)
110110
}

parser/ss.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,8 @@ func PackDataIntoSchema(ssValue map[string]string, logTime time.Time, testName s
109109
return schema.SS{}, err
110110
}
111111

112-
ssValue["LocalAddress"] = web100.NormalizeIP(ssValue["LocalAddress"])
113-
ssValue["RemAddress"] = web100.NormalizeIP(ssValue["RemAddress"])
112+
ssValue["LocalAddress"] = NormalizeIP(ssValue["LocalAddress"])
113+
ssValue["RemAddress"] = NormalizeIP(ssValue["RemAddress"])
114114
connSpec := &schema.Web100ConnectionSpecification{
115115
Local_ip: ssValue["LocalAddress"],
116116
Local_af: web100.ParseIPFamily(ssValue["LocalAddress"]),

web100/parse.go

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,10 @@ var (
6969
ErrIPv6QuadColon = errors.New("IP address contains :::: ")
7070
)
7171

72-
// NormalizeIPv6 fixes triple colon ::: which is produced by sidestream.
72+
// FixIPv6 fixes triple colon ::: which is produced by sidestream.
7373
// This error is produced by older versions of the c-web100 library, which is still
7474
// used by sidestream.
75-
func NormalizeIPv6(ipStr string) (string, error) {
75+
func FixIPv6(ipStr string) (string, error) {
7676
split := strings.Split(ipStr, ":::")
7777
switch len(split) {
7878
case 1:
@@ -87,21 +87,10 @@ func NormalizeIPv6(ipStr string) (string, error) {
8787
}
8888
}
8989

90-
// NormalizeIP accepts an IPv4 or IPv6 address and returns a normalized version
91-
// of that string. This should be used to fix malformed IPv6 addresses in
92-
// web100 datasets.
93-
func NormalizeIP(ip string) string {
94-
r, err := NormalizeIPv6(ip)
95-
if err != nil {
96-
return ip
97-
}
98-
return r
99-
}
100-
10190
// ValidateIP validates (and possibly repairs) IP addresses.
10291
// Return nil if it is a valid IPv4 or IPv6 address (or can be repaired), non-nil otherwise.
10392
func ValidateIP(ipStr string) error {
104-
ipStr, err := NormalizeIPv6(ipStr)
93+
ipStr, err := FixIPv6(ipStr)
10594
if err != nil {
10695
return err
10796
}

web100/parse_test.go

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -118,39 +118,3 @@ func BenchmarkValidateIPv4(b *testing.B) {
118118
_ = web100.ValidateIP("1.2.3.4")
119119
}
120120
}
121-
122-
func TestNormalizeIP(t *testing.T) {
123-
tests := []struct {
124-
name string
125-
ip string
126-
want string
127-
}{
128-
{
129-
name: "success-noop-ipv4",
130-
ip: "1.2.3.4",
131-
want: "1.2.3.4",
132-
},
133-
{
134-
name: "success-noop-ipv6",
135-
ip: "1:2:3::4",
136-
want: "1:2:3::4",
137-
},
138-
{
139-
name: "success-:::-ipv6",
140-
ip: "1:2:3:::4", // triple-colon format from web100.
141-
want: "1:2:3::4",
142-
},
143-
{
144-
name: "badformat-preserved-::::-ipv6",
145-
ip: "1:2:3::::4", // quad-colon format error, not normalized.
146-
want: "1:2:3::::4",
147-
},
148-
}
149-
for _, tt := range tests {
150-
t.Run(tt.name, func(t *testing.T) {
151-
if got := web100.NormalizeIP(tt.ip); got != tt.want {
152-
t.Errorf("NormalizeIP() = %v, want %v", got, tt.want)
153-
}
154-
})
155-
}
156-
}

0 commit comments

Comments
 (0)