Skip to content

Commit 4e43c0b

Browse files
committed
Improved Documentation via Additional Comments
1 parent 78c2c23 commit 4e43c0b

File tree

7 files changed

+88
-64
lines changed

7 files changed

+88
-64
lines changed

hadoop/webFinder/src/main/java/webFinder/WebFinderDriver.java

+10-11
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ public int run(final String[] args) throws Exception {
3838
final Job job;
3939

4040
conf = new Configuration();
41-
job = Job.getInstance(conf, "Your job name");
41+
job = Job.getInstance(conf, "WebFinder MapReduce");
4242

43-
job.setJarByClass(WebFinderDriver.class);
43+
job.setJarByClass(WebFinderDriver.class);// use current jar
4444

4545
if (args.length < 2) {
4646
return 1;
@@ -49,17 +49,16 @@ public int run(final String[] args) throws Exception {
4949
conf.setInt("maxDepth", Integer.parseInt(args[2]));
5050
}
5151

52-
job.setMapperClass(WebFinderMapper.class);
53-
job.setMapOutputKeyClass(Text.class);
54-
job.setMapOutputValueClass(Text.class);
52+
job.setMapperClass(WebFinderMapper.class);// set mapper
53+
job.setMapOutputKeyClass(Text.class);// set mapper output key type
54+
job.setMapOutputValueClass(Text.class); // set mapper output value type
5555

56-
job.setReducerClass(WebFinderReducer.class);
56+
job.setReducerClass(WebFinderReducer.class);// set reducer
57+
job.setOutputKeyClass(Text.class);// set reducer output key type
58+
job.setOutputValueClass(List.class);// set reducer output value
5759

58-
job.setOutputKeyClass(Text.class);
59-
job.setOutputValueClass(List.class);
60-
61-
job.setInputFormatClass(TextInputFormat.class);
62-
job.setOutputFormatClass(TextOutputFormat.class);
60+
job.setInputFormatClass(TextInputFormat.class);// set input format
61+
job.setOutputFormatClass(TextOutputFormat.class);// set output format
6362

6463
FileInputFormat.setInputPaths(job, new Path(args[0]));
6564
FileOutputFormat.setOutputPath(job, new Path(args[1]));

hadoop/webFinder/src/main/java/webFinder/WebFinderMapper.java

+23-22
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
* resources that are loaded by a given website URL and emits tuples of
2020
* kind {@code <resource URL, website URL>}.
2121
*/
22-
public class WebFinderMapper extends
23-
Mapper<LongWritable, Text, Text, Text> {
22+
public class WebFinderMapper
23+
extends Mapper<LongWritable, Text, Text, Text> {
2424

2525
/** the logger we use */
2626
private static Logger LOGGER = Logger.getLogger(WebFinderMapper.class);
@@ -39,30 +39,31 @@ protected void map(final LongWritable offset, final Text line,
3939
final HashSet<URL> done;
4040
String str;
4141

42-
maxDepth = context.getConfiguration().getInt("maxDepth", 1);
43-
4442
str = WebFinderMapper.__prepare(line.toString(), true);
45-
if (str == null) {
43+
if (str == null) {// prepare base url
4644
return;
4745
}
46+
// set maximum depth of spider
47+
maxDepth = context.getConfiguration().getInt("maxDepth", 1);
4848

4949
baseUri = URI.create(str).normalize();
5050
baseUrl = baseUri.toURL();
51-
done = new HashSet<>();
51+
done = new HashSet<>();// URLs that have been processed
5252
done.add(baseUrl);
5353
try {
5454
done.add(new URL(baseUrl.toString() + '/'));
5555
} catch (@SuppressWarnings("unused") final Throwable error) {
5656
// ignore
5757
}
5858
baseUrlText = new Text(baseUrl.toString());
59-
context.write(baseUrlText, baseUrlText);
59+
context.write(baseUrlText, baseUrlText);// url itself is done
60+
// now recursively spider resources
6061
WebFinderMapper.__load(maxDepth, baseUrl, baseUrlText, baseUrl,
6162
baseUri, new StringBuilder(), new char[16384], done, context);
6263
}
6364

6465
/**
65-
* load a given URL
66+
* load a given URL of a HTML document
6667
*
6768
* @param remainingDepth
6869
* how deep we can still go
@@ -105,7 +106,7 @@ private static final void __load(final int remainingDepth,
105106
int read;
106107

107108
stringBuilder.setLength(0);
108-
uconn = loadUrl.openConnection();
109+
uconn = loadUrl.openConnection(); // setup the connection
109110
uconn.setConnectTimeout(10_000);
110111
uconn.setReadTimeout(10_000);
111112
uconn.setDoInput(true);
@@ -114,26 +115,26 @@ private static final void __load(final int remainingDepth,
114115
uconn.setDefaultUseCaches(true);
115116
try (final InputStream inputStream = loadUrl.openStream()) {
116117
try (final InputStreamReader inputReader = new InputStreamReader(
117-
inputStream)) {
118+
inputStream)) { // load all the data of the text resource
118119
while ((read = inputReader.read(buffer)) > 0) {
119120
stringBuilder.append(buffer, 0, read);
120121
}
121122
}
122123
}
123124

124-
text = stringBuilder.toString().replace('\n', ' ')//
125+
text = stringBuilder.toString().replace('\n', ' ')// delete newlines
125126
.replace('\r', ' ').replace('\t', ' ').replaceAll(" ", " ");
126-
lower = text.toLowerCase();
127+
lower = text.toLowerCase(); // create a lower case version
127128

128129
nextDesc: for (final __LinkDesc desc : WebFinderMapper.DESCS) {
129130

130-
last = 0;// find and load scripts
131-
findDesc: for (;;) {
131+
last = 0;// find and load other resources
132+
findDesc: for (;;) {// find begin tag
132133
index1 = lower.indexOf(desc.m_begin, last);
133134
if (index1 <= last) {
134135
continue nextDesc;
135136
}
136-
last = index1 + desc.m_begin.length();
137+
last = index1 + desc.m_begin.length();// find URL attribute
137138
index1 = lower.indexOf(desc.m_urlIndicatorQuote, last);
138139
index2 = lower.indexOf(desc.m_urlIndicatorPrime, last);
139140
sep = '"';
@@ -144,7 +145,7 @@ private static final void __load(final int remainingDepth,
144145
}
145146
}
146147
index2 = lower.indexOf('>', last);
147-
if (index1 <= last) {
148+
if (index1 <= last) {// check for problem with tag end
148149
continue nextDesc;
149150
}
150151
if ((index2 < index1) && (index2 >= last)) {
@@ -157,7 +158,7 @@ private static final void __load(final int remainingDepth,
157158
continue nextDesc;
158159
}
159160

160-
test = text.substring(last, index1);
161+
test = text.substring(last, index1);// take URL
161162
last = index1;
162163
test = WebFinderMapper.__prepare(test, desc.m_loadRecursive);
163164
if (test == null) {
@@ -181,8 +182,8 @@ private static final void __load(final int remainingDepth,
181182
error.addSuppressed(error2);
182183
error.addSuppressed(error3);
183184
if (WebFinderMapper.LOGGER != null) {
184-
WebFinderMapper.LOGGER.warn(
185-
"Error while trying to build URL with string '"
185+
WebFinderMapper.LOGGER
186+
.warn("Error while trying to build URL with string '"
186187
+ test + "' under load URL '"
187188
+ loadUrl.toString() + "' for base URL '"
188189
+ baseUrl.toString() + "'.", error2);
@@ -225,8 +226,8 @@ private static final void __load(final int remainingDepth,
225226
}
226227
} catch (final Throwable error) {
227228
if (WebFinderMapper.LOGGER != null) {
228-
WebFinderMapper.LOGGER.warn("Error while trying to load URL '"
229-
+ loadUrl + "'.", error);
229+
WebFinderMapper.LOGGER.warn(
230+
"Error while trying to load URL '" + loadUrl + "'.", error);
230231
}
231232
}
232233

@@ -324,7 +325,7 @@ public static final void main(final String[] args) throws Throwable {
324325

325326
/** the link descriptions */
326327
static final __LinkDesc[] DESCS = { //
327-
new __LinkDesc(false, "<link rel=\"stylesheet\"", "href="), //
328+
new __LinkDesc(false, "<link rel=\"stylesheet\"", "href="), //
328329
new __LinkDesc(false, "<link rel='stylesheet'", "href="), //
329330
new __LinkDesc(false, "<img", "src="), //
330331
new __LinkDesc(false, "<script", "src="), //

hadoop/webFinder/src/main/java/webFinder/WebFinderReducer.java

+11-10
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@
1919
* multiple websites. This reducer emits tuples of the form
2020
* {@code <resource URL, list of website urls>}.
2121
*/
22-
public class WebFinderReducer extends
23-
Reducer<Text, Text, Text, List<Text>> {
22+
public class WebFinderReducer
23+
extends Reducer<Text, Text, Text, List<Text>> {
2424

2525
/**
2626
* The actual reduction step: From the tuples of form
2727
* {@code <resource URL, iterable of referencing website URLs>}, select
2828
* all resources referenced by more than one unique website. For these,
29-
* output tuples of the form {@code <resource URL, list of website URLs>}.
29+
* output tuples of the form {@code <resource URL, list of website URLs>}
30+
* .
3031
*/
3132
@Override
3233
protected void reduce(final Text key, final Iterable<Text> values,
@@ -40,7 +41,7 @@ protected void reduce(final Text key, final Iterable<Text> values,
4041

4142
set = new HashSet<>();
4243
looper: for (final Text url : values) {
43-
string = url.toString();
44+
string = url.toString();// convert value to a URL
4445
try {
4546
add = new URI(string).normalize().toURL();
4647
} catch (@SuppressWarnings("unused") final Throwable error) {
@@ -54,19 +55,19 @@ protected void reduce(final Text key, final Iterable<Text> values,
5455
}
5556
}
5657
}
57-
set.add(add);
58+
set.add(add); // store value in set of URLs pointing to this resource
5859
}
5960

60-
if ((size = set.size()) > 1) {
61-
list = new ArrayList(size);
61+
if ((size = set.size()) > 1) {// multiple URLs point to key
62+
list = new ArrayList(size);// let's make a list of them
6263
for (final URL found : set) {
6364
list.add(found.toString());
6465
}
65-
Collections.sort(list);
66-
for (index = list.size(); (--index) >= 0;) {
66+
Collections.sort(list);// and sort them
67+
for (index = list.size(); (--index) >= 0;) {// now convert to Text
6768
list.set(index, new Text((String) (list.get(index))));
6869
}
69-
context.write(key, list);
70+
context.write(key, list);// write <key, list of referers> tuple
7071
}
7172
}
7273
}

hadoop/wordCount/src/main/java/wordCount/WordCountDriver.java

+9-7
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,17 @@ public int run(final String[] args) throws Exception {
3434
return 1;
3535
}
3636

37-
job.setMapperClass(WordCountMapper.class);
38-
job.setReducerClass(WordCountReducer.class);
39-
job.setCombinerClass(WordCountReducer.class);
37+
job.setMapperClass(WordCountMapper.class);// set mapper
38+
job.setReducerClass(WordCountReducer.class);// set reducer
39+
// a combiner performs something like a reduction step right after
40+
// mapping, on the mapper's computer, before sending on the data
41+
job.setCombinerClass(WordCountReducer.class);// set combiner
4042

41-
job.setOutputKeyClass(Text.class);
42-
job.setOutputValueClass(IntWritable.class);
43+
job.setOutputKeyClass(Text.class);// set output key class
44+
job.setOutputValueClass(IntWritable.class);// set output value class
4345

44-
job.setInputFormatClass(TextInputFormat.class);
45-
job.setOutputFormatClass(TextOutputFormat.class);
46+
job.setInputFormatClass(TextInputFormat.class);// set input format
47+
job.setOutputFormatClass(TextOutputFormat.class);// set output format
4648

4749
FileInputFormat.setInputPaths(job, new Path(args[0]));
4850
FileOutputFormat.setOutputPath(job, new Path(args[1]));

hadoop/wordCount/src/main/java/wordCount/WordCountMapper.java

+13-7
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@
77
import org.apache.hadoop.io.Text;
88
import org.apache.hadoop.mapreduce.Mapper;
99

10+
/**
11+
* This is the mapper part of the word count example. The mapper receives
12+
* lines of text. It first replaces all punctuation marks with spaces. Then
13+
* it breaks the line at the spaces into multiple words. For each word, we
14+
* emit a tuple of form {@code <WORD, 1>}.
15+
*/
1016
public class WordCountMapper
1117
extends Mapper<LongWritable, Text, Text, IntWritable> {
1218

@@ -15,16 +21,16 @@ public class WordCountMapper
1521
@Override
1622
protected void map(final LongWritable offset, final Text line,
1723
final Context context) throws IOException, InterruptedException {
18-
for (String word : line.toString()//
19-
.replace('.', ' ').replace(',', ' ').replace('/', ' ')//
20-
.replace(']', ' ').replace('[', ' ').replace('_', ' ')//
21-
.replace(')', ' ').replace('(', ' ').replace('#', ' ')//
22-
.replace('!', ' ').replace('?', ' ').replace("-", "")//
24+
for (String word : line.toString()// replace punctuation and other
25+
.replace('.', ' ').replace(',', ' ').replace('/', ' ')// strange
26+
.replace(']', ' ').replace('[', ' ').replace('_', ' ')// chars
27+
.replace(')', ' ').replace('(', ' ').replace('#', ' ')// with
28+
.replace('!', ' ').replace('?', ' ').replace("-", "")// spaces
2329
.replace("\"", "").replace("\'", "").replaceAll("[0-9]+", " ")//
2430
.replace(':', ' ').replace('\t', ' ').replace('\f', ' ')//
25-
.split("\\s+")) {
31+
.split("\\s+")) {// iterate over all space-separated words
2632
word = word.trim();
27-
if (word.length() > 0) {
33+
if (word.length() > 0) {// emit one tuple <WORD, 1> for each WORD
2834
context.write(new Text(word.toLowerCase()), WordCountMapper.ONE);
2935
}
3036
}

hadoop/wordCount/src/main/java/wordCount/WordCountReducer.java

+19-4
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,33 @@
66
import org.apache.hadoop.io.Text;
77
import org.apache.hadoop.mapreduce.Reducer;
88

9+
/**
10+
* This is the reducer class of the MapReduce Word Count example. It also
11+
* acts as combiner, i.e., it is applied two times:
12+
* <ol>
13+
* <li>As combiner, i.e., directly after the mapping process on each mapper
14+
* node. This way, the data is "reduced" before being sent on. During this
15+
* application, all input tuples of the reducer are of the form
16+
* {@code <WORD, List<1, 1, 1, ... 1>>}. The reducer here creates output
17+
* tuples of the form {@code <WORD, Sum(List...)>}. These tuples have the
18+
* same format as the mapper's output tuples, just with integers which
19+
* might be larger than 1. They are sent on to the real reducer steps.</li>
20+
* <li>As actual reducer: Here all the tuples produced by the combiners
21+
* arrive. There may be multiple entries in the lists of these tuples,
22+
* since the same word may have been found on different computers.</li>
23+
* </ol>
24+
*/
925
public class WordCountReducer
1026
extends Reducer<Text, IntWritable, Text, IntWritable> {
1127

1228
@Override
1329
protected void reduce(final Text key, final Iterable<IntWritable> values,
1430
final Context context) throws IOException, InterruptedException {
15-
31+
// we receive tuples of the type <WORD, IntWritable> for each WORD
1632
int count = 0;
17-
for (final IntWritable current : values) {
33+
for (final IntWritable current : values) { // we add up all the ints
1834
count += current.get();
1935
}
20-
context.write(key, new IntWritable(count));
36+
context.write(key, new IntWritable(count));// and emit the final count
2137
}
22-
2338
}

mpi/structScatter.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,17 @@ int main(int argc, char *argv[]) {
5252
MPI_Comm_size(MPI_COMM_WORLD, &size); // get number of processes
5353

5454
send = (myStruct*)malloc(sizeof(myStruct) * size); // allocate memory
55-
for(i = size; (--i) >= 0; ) {
55+
for(i = size; (--i) >= 0; ) { // store some dummy data
5656
send[i].myIntA = rank;
5757
send[i].myIntB = i;
5858
send[i].myShort = (rank * rank) % size;
59-
send[i].myDouble = *((double*)("Hi you!"));
59+
send[i].myDouble = *((double*)("Hi you!")); // haha
6060
send[i].myChar = 'V';
6161
send[i].myFloat = (i / (float)size);
6262
}
6363
}
6464

65-
MPI_Scatter(send, 1, myStructType, &data, 1, myStructType, 0, MPI_COMM_WORLD);
65+
MPI_Scatter(send, 1, myStructType, &data, 1, myStructType, 0, MPI_COMM_WORLD); //scatter the structs
6666

6767
printf("%d: received: myIntA=%d, myIntB=%d, myShort=%d, myDouble=\"%s\", myChar='%c', myFloat=%f.\n",
6868
rank, data.myIntA, data.myIntB, data.myShort, (char*)&data.myDouble, data.myChar, data.myFloat);

0 commit comments

Comments
 (0)