Skip to content

Commit 5263b7c

Browse files
NUTCH-3096 HostDB ResolverThread can create too many job counters
(patch contributed by Markus Jelsma)
1 parent e2a29d0 commit 5263b7c

File tree

1 file changed

+20
-3
lines changed

1 file changed

+20
-3
lines changed

src/java/org/apache/nutch/hostdb/ResolverThread.java

+20-3
Original file line numberDiff line numberDiff line change
@@ -114,15 +114,32 @@ public void run() {
114114
}
115115
}
116116

117-
context.getCounter("UpdateHostDb",
118-
Long.toString(datum.numFailures()) + "_times_failed").increment(1);
117+
context.getCounter("UpdateHostDb", createFailureCounterLabel(datum)).increment(1);
119118
} catch (Exception ioe) {
120119
LOG.warn(StringUtils.stringifyException(ioe));
121120
}
122121
} catch (Exception e) {
123122
LOG.warn(StringUtils.stringifyException(e));
124123
}
125-
124+
126125
context.getCounter("UpdateHostDb", "checked_hosts").increment(1);
127126
}
127+
128+
private String createFailureCounterLabel(HostDatum datum) {
129+
// Hadoop will allow no more than 120 distinct counters. If we have a large
130+
// number of distinct failures, we'll exceed the limit, Hadoop will complain,
131+
// the job will fail. Let's limit the amount of possibilities by grouping
132+
// the numFailures in buckets. NUTCH-3096
133+
String label = null;
134+
long n = datum.numFailures();
135+
if (n < 4) {
136+
label = Long.toString(n);
137+
} else if (n > 3 && n < 11) {
138+
label = "4-10";
139+
} else {
140+
label = ">10";
141+
}
142+
143+
return label + "_times_failed";
144+
}
128145
}

0 commit comments

Comments
 (0)