Skip to content

Commit f8eb873

Browse files
authored
Merge pull request #134 from m-lab/schema
Add anomolies fields and fix bug.
2 parents f517d7b + 1f23418 commit f8eb873

File tree

2 files changed

+25
-2
lines changed

2 files changed

+25
-2
lines changed

parser/ndt.go

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ const (
2828
// TODO - in future, we should probably detect when the connection state changes
2929
// from established, as there is little reason to parse snapshots beyond that
3030
// point.
31-
MAX_NUM_SNAPSHOTS = 2800
31+
MIN_NUM_SNAPSHOTS = 1600 // If fewer than this, then set anomalies.num_snaps
32+
MAX_NUM_SNAPSHOTS = 2800 // If more than this, truncate, and set anomolies.num_snaps
3233
)
3334

3435
//=========================================================================
@@ -327,12 +328,17 @@ func (n *NDTParser) getAndInsertValues(test *fileInfoAndData, testType string) {
327328
return
328329
}
329330

331+
valid := true
330332
err = snaplog.ValidateSnapshots()
331333
if err != nil {
332334
log.Printf("ValidateSnapshots failed for %s, when processing: %s\n%s\n",
333335
test.fn, n.taskFileName, err)
334336
metrics.WarningCount.WithLabelValues(
335337
n.TableName(), testType, "validate failed").Inc()
338+
// If ValidateSnapshots returns error, it generally means that there
339+
// is a problem with the last snapshot, typically a truncated file.
340+
// In most cases, there are still many valid snapshots.
341+
valid = false
336342
}
337343

338344
// HACK - just to see how expensive the Values() call is...
@@ -358,6 +364,7 @@ func (n *NDTParser) getAndInsertValues(test *fileInfoAndData, testType string) {
358364
return
359365
}
360366

367+
// Delete the constant fields.
361368
delete(delta, "TimeStamps")
362369
delete(delta, "StartTimeStamp")
363370
delete(delta, "StartTimeUsec")
@@ -385,6 +392,12 @@ func (n *NDTParser) getAndInsertValues(test *fileInfoAndData, testType string) {
385392
last = &snap
386393
}
387394

395+
if len(deltas) > 0 {
396+
// We tag some of the deltas with specific tags, to make them easy
397+
// to find. is_last is the first, but more will be added as we work
398+
// out the most useful tags.
399+
deltas[len(deltas)-1]["is_last"] = true
400+
}
388401
final := snaplog.SnapCount() - 1
389402
if final > MAX_NUM_SNAPSHOTS {
390403
final = MAX_NUM_SNAPSHOTS
@@ -419,6 +432,13 @@ func (n *NDTParser) getAndInsertValues(test *fileInfoAndData, testType string) {
419432

420433
results["test_id"] = test.fn
421434
results["task_filename"] = n.taskFileName
435+
if snaplog.SnapCount() > MAX_NUM_SNAPSHOTS || snaplog.SnapCount() < MIN_NUM_SNAPSHOTS {
436+
results["anomalies"].(schema.Web100ValueMap)["num_snaps"] = snaplog.SnapCount()
437+
}
438+
if !valid {
439+
results["anomalies"].(schema.Web100ValueMap)["snaplog_error"] = true
440+
}
441+
422442
// This is the timestamp parsed from the filename.
423443
lt, err := test.info.Timestamp.MarshalText()
424444
if err != nil {
@@ -442,10 +462,11 @@ func (n *NDTParser) getAndInsertValues(test *fileInfoAndData, testType string) {
442462
// TODO - metaFile is currently used only to populate the connection spec.
443463
// Should we be using it for anything else?
444464
n.metaFile.PopulateConnSpec(connSpec)
465+
} else {
445466
// TODO Add a log once noise is reduced.
446467
metrics.WarningCount.WithLabelValues(
447468
n.TableName(), testType, "no meta").Inc()
448-
} else {
469+
results["anomalies"].(schema.Web100ValueMap)["no_meta"] = true
449470
// TODO(dev) - use other information to partially populate
450471
// the connection spec.
451472
}

schema/web100.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ func NewWeb100FullRecord(version string, logTime int64, connSpec, snapValues map
131131
"log_time": 0,
132132
// Can this be part of the metadata service?
133133
"connection_spec": FullConnectionSpec(),
134+
"anomalies": Web100ValueMap{},
134135
"web100_log_entry": map[string]bigquery.Value{
135136
"version": version,
136137
"log_time": logTime,
@@ -210,6 +211,7 @@ func EmptyGeolocation() Web100ValueMap {
210211
// All undefined fields will be set to null after a BQ insert.
211212
func NewWeb100MinimalRecord(version string, logTime int64, connSpec, snapValues Web100ValueMap, deltas []Web100ValueMap) Web100ValueMap {
212213
return Web100ValueMap{
214+
"anomalies": Web100ValueMap{},
213215
"web100_log_entry": Web100ValueMap{
214216
"version": version,
215217
"log_time": logTime,

0 commit comments

Comments
 (0)