Skip to content

Commit 89c25ed

Browse files
Fix routeview prefix search (#49)
* Uncompress RouteViewIPv4.corrupt * Fix routeview.Search * Add three new metrics for parsing routeview input and ASN search results
1 parent fbd5c92 commit 89c25ed

File tree

6 files changed

+179
-81
lines changed

6 files changed

+179
-81
lines changed

asnannotator/asn.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"github.com/m-lab/tcp-info/inetdiag"
1313
"github.com/m-lab/uuid-annotator/annotator"
1414
"github.com/m-lab/uuid-annotator/ipinfo"
15+
"github.com/m-lab/uuid-annotator/metrics"
1516
"github.com/m-lab/uuid-annotator/routeview"
1617
"github.com/m-lab/uuid-annotator/tarreader"
1718
)
@@ -93,13 +94,15 @@ func (a *asnAnnotator) annotateIPHoldingLock(src string) *annotator.Network {
9394
ann.ASName = a.asnames[ann.ASNumber]
9495
}
9596
// The annotation succeeded with IPv4.
97+
metrics.ASNSearches.WithLabelValues("ipv4-success").Inc()
9698
return ann
9799
}
98100

99101
ipnet, err = a.asn6.Search(src)
100102
if err != nil {
101103
// In this case, the search has failed twice.
102104
ann.Missing = true
105+
metrics.ASNSearches.WithLabelValues("missing").Inc()
103106
return ann
104107
}
105108

@@ -110,6 +113,7 @@ func (a *asnAnnotator) annotateIPHoldingLock(src string) *annotator.Network {
110113
}
111114
ann.CIDR = ipnet.String()
112115
// The annotation succeeded with IPv6.
116+
metrics.ASNSearches.WithLabelValues("ipv6-success").Inc()
113117
return ann
114118
}
115119

@@ -194,15 +198,15 @@ func NewFake() ASNAnnotator {
194198
rtx.Must(err, "Could not parse fixed string")
195199
asn4Entry.IPNet = *v4net
196200
asn4Entry.Systems = "5"
197-
f.asn4 = routeview.Index{asn4Entry}
201+
f.asn4 = routeview.Index{routeview.NetIndex{asn4Entry}}
198202

199203
// Set up v6 data for 1111:2222:3333:4444:5555:6666:7777:8888.
200204
asn6Entry := routeview.IPNet{}
201205
_, v6net, err := net.ParseCIDR("1111:2222:3333:4444:5555:6666:7777:8888/128")
202206
rtx.Must(err, "Could not parse fixed string")
203207
asn6Entry.IPNet = *v6net
204208
asn6Entry.Systems = "9"
205-
f.asn6 = routeview.Index{asn6Entry}
209+
f.asn6 = routeview.Index{routeview.NetIndex{asn6Entry}}
206210

207211
// Set up AS name entries for AS5 and AS9
208212
f.asnames = ipinfo.ASNames{

metrics/metrics.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,24 @@ var (
4545
},
4646
[]string{"status"},
4747
)
48+
RouteViewRows = promauto.NewCounterVec(
49+
prometheus.CounterOpts{
50+
Name: "uuid_annotator_routeview_rows_total",
51+
Help: "The number of routeview rows parsed or skipped",
52+
},
53+
[]string{"status"},
54+
)
55+
RouteViewParsed = promauto.NewCounter(
56+
prometheus.CounterOpts{
57+
Name: "uuid_annotator_routeview_parsed_total",
58+
Help: "The number of times a routeview file has been parsed",
59+
},
60+
)
61+
ASNSearches = promauto.NewCounterVec(
62+
prometheus.CounterOpts{
63+
Name: "uuid_annotator_asn_search_total",
64+
Help: "The number of ASN annotator searches",
65+
},
66+
[]string{"status"},
67+
)
4868
)

routeview/parse.go

Lines changed: 64 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ import (
1111
"strconv"
1212
"strings"
1313

14+
"github.com/m-lab/go/logx"
1415
"github.com/m-lab/uuid-annotator/annotator"
16+
"github.com/m-lab/uuid-annotator/metrics"
1517
)
1618

1719
// IPNet represents a parsed row in a RouteView file.
@@ -20,17 +22,20 @@ type IPNet struct {
2022
Systems string
2123
}
2224

23-
// Index is a sortable (and searchable) array of IPNets.
24-
type Index []IPNet
25+
// NetIndex is a sortable and searchable array of IPNets.
26+
type NetIndex []IPNet
27+
28+
// Index is searchable array of NetIndexes.
29+
type Index []NetIndex
2530

2631
// Len, Less, and Swap make Index sortable.
27-
func (ns Index) Len() int {
32+
func (ns NetIndex) Len() int {
2833
return len(ns)
2934
}
30-
func (ns Index) Less(i, j int) bool {
35+
func (ns NetIndex) Less(i, j int) bool {
3136
return bytes.Compare(ns[i].IP, ns[j].IP) < 0
3237
}
33-
func (ns Index) Swap(i, j int) {
38+
func (ns NetIndex) Swap(i, j int) {
3439
n := ns[j]
3540
ns[j] = ns[i]
3641
ns[i] = n
@@ -66,62 +71,94 @@ func ParseSystems(s string) []annotator.System {
6671

6772
// ParseRouteView reads the given csv file and generates a sorted IP list.
6873
func ParseRouteView(file []byte) Index {
69-
result := Index{}
7074
sm := map[string]string{}
7175

7276
skip := 0
77+
parsed := 0
7378
b := bytes.NewBuffer(file)
7479
r := csv.NewReader(b)
7580
r.Comma = '\t'
7681
r.ReuseRecord = true
7782

83+
nim := map[int64]NetIndex{}
84+
7885
for {
7986
record, err := r.Read()
8087
if err == io.EOF {
88+
metrics.RouteViewParsed.Inc()
8189
break
8290
}
8391
if len(record) < 3 {
92+
metrics.RouteViewRows.WithLabelValues("missing-fields").Inc()
93+
continue
94+
}
95+
nb, err := strconv.ParseInt(record[1], 10, 32)
96+
if err != nil {
97+
// Skip malformed line.
98+
skip++
99+
log.Println("failed to convert netblock size:", record[1])
100+
metrics.RouteViewRows.WithLabelValues("corrupt-netblock").Inc()
84101
continue
85102
}
86103
_, n, err := net.ParseCIDR(record[0] + "/" + record[1])
104+
if err != nil {
105+
// Skip malformed line.
106+
skip++
107+
log.Println("failed to parse CIDR prefix:", record[0], "with netblock:", record[1])
108+
metrics.RouteViewRows.WithLabelValues("corrupt-prefix").Inc()
109+
continue
110+
}
87111
if _, ok := sm[record[2]]; !ok {
88112
// Break string connection to underlying RAM allocated by the CSV reader.
89113
sm[record[2]] = strings.Repeat(record[2], 1)
90114
}
91-
if len(result) > 1 && result[len(result)-1].Contains(n.IP) && result[len(result)-1].Systems == record[2] {
92-
// If the last network contains the current one with the same systems, skip it.
93-
skip++
94-
continue
95-
}
96-
result = append(result, IPNet{IPNet: *n, Systems: sm[record[2]]})
115+
parsed++
116+
metrics.RouteViewRows.WithLabelValues("parsed").Inc()
117+
nim[nb] = append(nim[nb], IPNet{IPNet: *n, Systems: sm[record[2]]})
97118
}
98-
log.Println("Skipped:", skip, "routeview netblocks of", len(result)+skip)
119+
logx.Debug.Println("Skipped:", skip, "routeview netblocks of", parsed+skip)
99120

100-
// Sort list so that it can be searched.
101-
sort.Sort(result)
102-
return result
121+
// For each netblock, sort each NetIndex array.
122+
netblocks := []int64{}
123+
for k := range nim {
124+
netblocks = append(netblocks, k)
125+
sort.Sort(nim[k])
126+
}
127+
// Sort descending order.
128+
sort.Slice(netblocks, func(i, j int) bool { return netblocks[i] > netblocks[j] })
129+
130+
// Construct the final index, from largest to smallest netblock.
131+
ix := Index{}
132+
for _, k := range netblocks {
133+
ix = append(ix, nim[k])
134+
}
135+
return ix
103136
}
104137

105138
// ErrNoASNFound is returned when search fails to identify a network for the given src IP.
106-
var ErrNoASNFound = errors.New("No ASN found for address")
139+
var ErrNoASNFound = errors.New("no ASN found for address")
107140

108141
// Search attempts to find the given IP in the Index.
109-
func (ns Index) Search(s string) (IPNet, error) {
142+
func (ix Index) Search(s string) (IPNet, error) {
110143
// bytes.Compare will only work correctly when both net.IPs have the same byte count.
111144
ip := net.ParseIP(s)
112145
if ip.To4() != nil {
113146
ip = ip.To4()
114147
}
115-
node := sort.Search(len(ns), func(i int) bool {
116-
if ns[i].Contains(ip) {
117-
// Becaue sort.Search finds the lowest index where f(i) is true, we must return
118-
// true when the IPNet contains the given IP to prevent off by 1 errors.
119-
return true
148+
// Search each set of NetIndexes from longest to shortest, returning the first (longest) match.
149+
for i := range ix {
150+
ns := ix[i]
151+
node := sort.Search(len(ns), func(i int) bool {
152+
if ns[i].Contains(ip) {
153+
// Becaue sort.Search finds the lowest index where f(i) is true, we must return
154+
// true when the IPNet contains the given IP to prevent off by 1 errors.
155+
return true
156+
}
157+
return bytes.Compare(ns[i].IP, ip) >= 0
158+
})
159+
if node < len(ns) && ns[node].Contains(ip) {
160+
return ns[node], nil
120161
}
121-
return bytes.Compare(ns[i].IP, ip) >= 0
122-
})
123-
if node < len(ns) && ns[node].Contains(ip) {
124-
return ns[node], nil
125162
}
126163
return IPNet{}, ErrNoASNFound
127164
}

routeview/parse_test.go

Lines changed: 36 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,19 @@
11
package routeview
22

33
import (
4-
"bytes"
54
"fmt"
65
"io/ioutil"
76
"log"
87
"net"
98
"reflect"
109
"testing"
1110

12-
"github.com/m-lab/annotation-service/api"
13-
"github.com/m-lab/annotation-service/asn"
1411
"github.com/m-lab/go/rtx"
1512
"github.com/m-lab/uuid-annotator/annotator"
1613
"github.com/m-lab/uuid-annotator/tarreader"
1714
)
1815

19-
var ns Index
20-
var an api.Annotator
21-
2216
func init() {
23-
var err error
24-
// Load file in setup for Benchmark.
25-
b, err := ioutil.ReadFile("../testdata/RouteViewIPv4.pfx2as.gz")
26-
rtx.Must(err, "Failed to read routeview data")
27-
b2, err := tarreader.FromGZ(b)
28-
rtx.Must(err, "Failed to decompress routeview")
29-
ns = ParseRouteView(b2)
30-
31-
// Only used for Benchmark.
32-
an, err = asn.LoadASNDatasetFromReader(bytes.NewBuffer(b2))
33-
rtx.Must(err, "Failed to load api.Annotator")
34-
3517
log.SetFlags(0)
3618
}
3719

@@ -44,34 +26,46 @@ func TestParseRouteView(t *testing.T) {
4426
{
4527
name: "success-ipv4",
4628
filename: "../testdata/RouteViewIPv4.pfx2as.gz",
47-
wantCount: 545957,
29+
wantCount: 845161,
4830
},
4931
{
5032
name: "success-ipv6",
5133
filename: "../testdata/RouteViewIPv6.pfx2as.gz",
52-
wantCount: 54317,
34+
wantCount: 83125,
5335
},
5436
{
5537
name: "corrupt-ipv4",
56-
filename: "../testdata/RouteViewIPv4.corrupt.gz",
57-
wantCount: 6,
38+
filename: "../testdata/RouteViewIPv4.corrupt",
39+
wantCount: 50,
5840
},
5941
}
6042
for _, tt := range tests {
6143
t.Run(tt.name, func(t *testing.T) {
62-
gz, err := ioutil.ReadFile(tt.filename)
44+
b, err := ioutil.ReadFile(tt.filename)
6345
rtx.Must(err, "Failed to read routeview data")
64-
b, err := tarreader.FromGZ(gz)
65-
rtx.Must(err, "Failed to decompress routeview")
46+
if tt.filename[len(tt.filename)-3:] == ".gz" {
47+
b, err = tarreader.FromGZ(b)
48+
rtx.Must(err, "Failed to decompress routeview")
49+
}
6650

6751
ns := ParseRouteView(b)
68-
if len(ns) != tt.wantCount {
69-
t.Errorf("Parse() = %v, want %v", len(ns), tt.wantCount)
52+
c := countIndex(ns)
53+
if c != tt.wantCount {
54+
t.Errorf("Parse() = %v, want %v", c, tt.wantCount)
7055
}
7156
})
7257
}
7358
}
7459

60+
// Count returns the total number of networks in the index.
61+
func countIndex(ix Index) int {
62+
total := 0
63+
for i := range ix {
64+
total += len(ix[i])
65+
}
66+
return total
67+
}
68+
7569
func TestParseSystems(t *testing.T) {
7670
tests := []struct {
7771
name string
@@ -138,7 +132,7 @@ func TestIndex_Search(t *testing.T) {
138132
filename: "../testdata/RouteViewIPv4.pfx2as.gz",
139133
src: "1.0.192.1",
140134
want: IPNet{
141-
IPNet: net.IPNet{IP: net.ParseIP("1.0.128.0").To4(), Mask: net.CIDRMask(17, 32)},
135+
IPNet: net.IPNet{IP: net.ParseIP("1.0.192.0").To4(), Mask: net.CIDRMask(21, 32)},
142136
Systems: "23969",
143137
},
144138
},
@@ -192,36 +186,26 @@ func TestIndex_Search(t *testing.T) {
192186
}
193187

194188
func BenchmarkSearch(b *testing.B) {
195-
found := 0
196-
missing := 0
197-
src := "1.0.192.1"
198-
b.ResetTimer()
199-
for i := 0; i < b.N; i++ {
200-
r, err := ns.Search(src)
201-
if err != nil {
202-
missing++
203-
} else {
204-
found++
205-
}
206-
_ = ParseSystems(r.Systems)
207-
}
208-
fmt.Println("f:", found, "m:", missing)
209-
}
189+
gz, err := ioutil.ReadFile("../testdata/RouteViewIPv4.pfx2as.gz")
190+
rtx.Must(err, "Failed to read routeview data")
191+
raw, err := tarreader.FromGZ(gz)
192+
rtx.Must(err, "Failed to decompress routeview")
193+
ns := ParseRouteView(raw)
210194

211-
func BenchmarkAnnotate(b *testing.B) {
212195
found := 0
213196
missing := 0
214-
src := "1.0.192.1"
215-
ann := api.Annotations{}
197+
src := []string{"1.0.192.1", "12.189.157.193"}
216198
b.ResetTimer()
217199
for i := 0; i < b.N; i++ {
218-
err := an.Annotate(src, &ann)
219-
if err != nil {
220-
missing++
221-
} else {
222-
found++
200+
for _, s := range src {
201+
r, err := ns.Search(s)
202+
if err != nil {
203+
missing++
204+
} else {
205+
found++
206+
}
207+
_ = ParseSystems(r.Systems)
223208
}
224-
ann.Network = nil
225209
}
226210
fmt.Println("f:", found, "m:", missing)
227211
}

0 commit comments

Comments
 (0)