Improved Documentation via Additional Comments

thomasWeise · thomasWeise · commit 4e43c0b2db15 · 2016-06-03T16:06:31.000+08:00
diff --git a/hadoop/webFinder/src/main/java/webFinder/WebFinderDriver.java b/hadoop/webFinder/src/main/java/webFinder/WebFinderDriver.java
@@ -38,9 +38,9 @@ public int run(final String[] args) throws Exception {
     final Job job;
 
     conf = new Configuration();
-    job = Job.getInstance(conf, "Your job name");
+    job = Job.getInstance(conf, "WebFinder MapReduce");
 
-    job.setJarByClass(WebFinderDriver.class);
+    job.setJarByClass(WebFinderDriver.class);// use current jar
 
     if (args.length < 2) {
       return 1;
@@ -49,17 +49,16 @@ public int run(final String[] args) throws Exception {
       conf.setInt("maxDepth", Integer.parseInt(args[2]));
     }
 
-    job.setMapperClass(WebFinderMapper.class);
-    job.setMapOutputKeyClass(Text.class);
-    job.setMapOutputValueClass(Text.class);
+    job.setMapperClass(WebFinderMapper.class);// set mapper
+    job.setMapOutputKeyClass(Text.class);// set mapper output key type
+    job.setMapOutputValueClass(Text.class); // set mapper output value type
 
-    job.setReducerClass(WebFinderReducer.class);
+    job.setReducerClass(WebFinderReducer.class);// set reducer
+    job.setOutputKeyClass(Text.class);// set reducer output key type
+    job.setOutputValueClass(List.class);// set reducer output value
 
-    job.setOutputKeyClass(Text.class);
-    job.setOutputValueClass(List.class);
-
-    job.setInputFormatClass(TextInputFormat.class);
-    job.setOutputFormatClass(TextOutputFormat.class);
+    job.setInputFormatClass(TextInputFormat.class);// set input format
+    job.setOutputFormatClass(TextOutputFormat.class);// set output format
 
     FileInputFormat.setInputPaths(job, new Path(args[0]));
     FileOutputFormat.setOutputPath(job, new Path(args[1]));
diff --git a/hadoop/webFinder/src/main/java/webFinder/WebFinderMapper.java b/hadoop/webFinder/src/main/java/webFinder/WebFinderMapper.java
@@ -19,8 +19,8 @@
  * resources that are loaded by a given website URL and emits tuples of
  * kind {@code <resource URL, website URL>}.
  */
-public class WebFinderMapper extends
-    Mapper<LongWritable, Text, Text, Text> {
+public class WebFinderMapper
+    extends Mapper<LongWritable, Text, Text, Text> {
 
   /** the logger we use */
   private static Logger LOGGER = Logger.getLogger(WebFinderMapper.class);
@@ -39,30 +39,31 @@ protected void map(final LongWritable offset, final Text line,
     final HashSet<URL> done;
     String str;
 
-    maxDepth = context.getConfiguration().getInt("maxDepth", 1);
-
     str = WebFinderMapper.__prepare(line.toString(), true);
-    if (str == null) {
+    if (str == null) {// prepare base url
       return;
     }
+    // set maximum depth of spider
+    maxDepth = context.getConfiguration().getInt("maxDepth", 1);
 
     baseUri = URI.create(str).normalize();
     baseUrl = baseUri.toURL();
-    done = new HashSet<>();
+    done = new HashSet<>();// URLs that have been processed
     done.add(baseUrl);
     try {
       done.add(new URL(baseUrl.toString() + '/'));
     } catch (@SuppressWarnings("unused") final Throwable error) {
       // ignore
     }
     baseUrlText = new Text(baseUrl.toString());
-    context.write(baseUrlText, baseUrlText);
+    context.write(baseUrlText, baseUrlText);// url itself is done
+    // now recursively spider resources
     WebFinderMapper.__load(maxDepth, baseUrl, baseUrlText, baseUrl,
         baseUri, new StringBuilder(), new char[16384], done, context);
   }
 
   /**
-   * load a given URL
+   * load a given URL of a HTML document
    *
    * @param remainingDepth
    *          how deep we can still go
@@ -105,7 +106,7 @@ private static final void __load(final int remainingDepth,
       int read;
 
       stringBuilder.setLength(0);
-      uconn = loadUrl.openConnection();
+      uconn = loadUrl.openConnection(); // setup the connection
       uconn.setConnectTimeout(10_000);
       uconn.setReadTimeout(10_000);
       uconn.setDoInput(true);
@@ -114,26 +115,26 @@ private static final void __load(final int remainingDepth,
       uconn.setDefaultUseCaches(true);
       try (final InputStream inputStream = loadUrl.openStream()) {
         try (final InputStreamReader inputReader = new InputStreamReader(
-            inputStream)) {
+            inputStream)) { // load all the data of the text resource
           while ((read = inputReader.read(buffer)) > 0) {
             stringBuilder.append(buffer, 0, read);
           }
         }
       }
 
-      text = stringBuilder.toString().replace('\n', ' ')//
+      text = stringBuilder.toString().replace('\n', ' ')// delete newlines
           .replace('\r', ' ').replace('\t', ' ').replaceAll("  ", " ");
-      lower = text.toLowerCase();
+      lower = text.toLowerCase(); // create a lower case version
 
       nextDesc: for (final __LinkDesc desc : WebFinderMapper.DESCS) {
 
-        last = 0;// find and load scripts
-        findDesc: for (;;) {
+        last = 0;// find and load other resources
+        findDesc: for (;;) {// find begin tag
           index1 = lower.indexOf(desc.m_begin, last);
           if (index1 <= last) {
             continue nextDesc;
           }
-          last = index1 + desc.m_begin.length();
+          last = index1 + desc.m_begin.length();// find URL attribute
           index1 = lower.indexOf(desc.m_urlIndicatorQuote, last);
           index2 = lower.indexOf(desc.m_urlIndicatorPrime, last);
           sep = '"';
@@ -144,7 +145,7 @@ private static final void __load(final int remainingDepth,
             }
           }
           index2 = lower.indexOf('>', last);
-          if (index1 <= last) {
+          if (index1 <= last) {// check for problem with tag end
             continue nextDesc;
           }
           if ((index2 < index1) && (index2 >= last)) {
@@ -157,7 +158,7 @@ private static final void __load(final int remainingDepth,
             continue nextDesc;
           }
 
-          test = text.substring(last, index1);
+          test = text.substring(last, index1);// take URL
           last = index1;
           test = WebFinderMapper.__prepare(test, desc.m_loadRecursive);
           if (test == null) {
@@ -181,8 +182,8 @@ private static final void __load(final int remainingDepth,
                 error.addSuppressed(error2);
                 error.addSuppressed(error3);
                 if (WebFinderMapper.LOGGER != null) {
-                  WebFinderMapper.LOGGER.warn(
-                      "Error while trying to build URL with string '"
+                  WebFinderMapper.LOGGER
+                      .warn("Error while trying to build URL with string '"
                           + test + "' under load URL '"
                           + loadUrl.toString() + "' for base URL '"
                           + baseUrl.toString() + "'.", error2);
@@ -225,8 +226,8 @@ private static final void __load(final int remainingDepth,
       }
     } catch (final Throwable error) {
       if (WebFinderMapper.LOGGER != null) {
-        WebFinderMapper.LOGGER.warn("Error while trying to load URL '"
-            + loadUrl + "'.", error);
+        WebFinderMapper.LOGGER.warn(
+            "Error while trying to load URL '" + loadUrl + "'.", error);
       }
     }
 
@@ -324,7 +325,7 @@ public static final void main(final String[] args) throws Throwable {
 
   /** the link descriptions */
   static final __LinkDesc[] DESCS = { //
-  new __LinkDesc(false, "<link rel=\"stylesheet\"", "href="), //
+      new __LinkDesc(false, "<link rel=\"stylesheet\"", "href="), //
       new __LinkDesc(false, "<link rel='stylesheet'", "href="), //
       new __LinkDesc(false, "<img", "src="), //
       new __LinkDesc(false, "<script", "src="), //
diff --git a/hadoop/webFinder/src/main/java/webFinder/WebFinderReducer.java b/hadoop/webFinder/src/main/java/webFinder/WebFinderReducer.java
@@ -19,14 +19,15 @@
  * multiple websites. This reducer emits tuples of the form
  * {@code <resource URL, list of website urls>}.
  */
-public class WebFinderReducer extends
-    Reducer<Text, Text, Text, List<Text>> {
+public class WebFinderReducer
+    extends Reducer<Text, Text, Text, List<Text>> {
 
   /**
    * The actual reduction step: From the tuples of form
    * {@code <resource URL, iterable of referencing website URLs>}, select
    * all resources referenced by more than one unique website. For these,
-   * output tuples of the form {@code <resource URL, list of website URLs>}.
+   * output tuples of the form {@code <resource URL, list of website URLs>}
+   * .
    */
   @Override
   protected void reduce(final Text key, final Iterable<Text> values,
@@ -40,7 +41,7 @@ protected void reduce(final Text key, final Iterable<Text> values,
 
     set = new HashSet<>();
     looper: for (final Text url : values) {
-      string = url.toString();
+      string = url.toString();// convert value to a URL
       try {
         add = new URI(string).normalize().toURL();
       } catch (@SuppressWarnings("unused") final Throwable error) {
@@ -54,19 +55,19 @@ protected void reduce(final Text key, final Iterable<Text> values,
           }
         }
       }
-      set.add(add);
+      set.add(add); // store value in set of URLs pointing to this resource
     }
 
-    if ((size = set.size()) > 1) {
-      list = new ArrayList(size);
+    if ((size = set.size()) > 1) {// multiple URLs point to key
+      list = new ArrayList(size);// let's make a list of them
       for (final URL found : set) {
         list.add(found.toString());
       }
-      Collections.sort(list);
-      for (index = list.size(); (--index) >= 0;) {
+      Collections.sort(list);// and sort them
+      for (index = list.size(); (--index) >= 0;) {// now convert to Text
         list.set(index, new Text((String) (list.get(index))));
       }
-      context.write(key, list);
+      context.write(key, list);// write <key, list of referers> tuple
     }
   }
 }
diff --git a/hadoop/wordCount/src/main/java/wordCount/WordCountDriver.java b/hadoop/wordCount/src/main/java/wordCount/WordCountDriver.java
@@ -34,15 +34,17 @@ public int run(final String[] args) throws Exception {
       return 1;
     }
 
-    job.setMapperClass(WordCountMapper.class);
-    job.setReducerClass(WordCountReducer.class);
-    job.setCombinerClass(WordCountReducer.class);
+    job.setMapperClass(WordCountMapper.class);// set mapper
+    job.setReducerClass(WordCountReducer.class);// set reducer
+    // a combiner performs something like a reduction step right after
+    // mapping, on the mapper's computer, before sending on the data
+    job.setCombinerClass(WordCountReducer.class);// set combiner
 
-    job.setOutputKeyClass(Text.class);
-    job.setOutputValueClass(IntWritable.class);
+    job.setOutputKeyClass(Text.class);// set output key class
+    job.setOutputValueClass(IntWritable.class);// set output value class
 
-    job.setInputFormatClass(TextInputFormat.class);
-    job.setOutputFormatClass(TextOutputFormat.class);
+    job.setInputFormatClass(TextInputFormat.class);// set input format
+    job.setOutputFormatClass(TextOutputFormat.class);// set output format
 
     FileInputFormat.setInputPaths(job, new Path(args[0]));
     FileOutputFormat.setOutputPath(job, new Path(args[1]));
diff --git a/hadoop/wordCount/src/main/java/wordCount/WordCountMapper.java b/hadoop/wordCount/src/main/java/wordCount/WordCountMapper.java
@@ -7,6 +7,12 @@
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Mapper;
 
+/**
+ * This is the mapper part of the word count example. The mapper receives
+ * lines of text. It first replaces all punctuation marks with spaces. Then
+ * it breaks the line at the spaces into multiple words. For each word, we
+ * emit a tuple of form {@code <WORD, 1>}.
+ */
 public class WordCountMapper
     extends Mapper<LongWritable, Text, Text, IntWritable> {
 
@@ -15,16 +21,16 @@ public class WordCountMapper
   @Override
   protected void map(final LongWritable offset, final Text line,
       final Context context) throws IOException, InterruptedException {
-    for (String word : line.toString()//
-        .replace('.', ' ').replace(',', ' ').replace('/', ' ')//
-        .replace(']', ' ').replace('[', ' ').replace('_', ' ')//
-        .replace(')', ' ').replace('(', ' ').replace('#', ' ')//
-        .replace('!', ' ').replace('?', ' ').replace("-", "")//
+    for (String word : line.toString()// replace punctuation and other
+        .replace('.', ' ').replace(',', ' ').replace('/', ' ')// strange
+        .replace(']', ' ').replace('[', ' ').replace('_', ' ')// chars
+        .replace(')', ' ').replace('(', ' ').replace('#', ' ')// with
+        .replace('!', ' ').replace('?', ' ').replace("-", "")// spaces
         .replace("\"", "").replace("\'", "").replaceAll("[0-9]+", " ")//
         .replace(':', ' ').replace('\t', ' ').replace('\f', ' ')//
-        .split("\\s+")) {
+        .split("\\s+")) {// iterate over all space-separated words
       word = word.trim();
-      if (word.length() > 0) {
+      if (word.length() > 0) {// emit one tuple <WORD, 1> for each WORD
         context.write(new Text(word.toLowerCase()), WordCountMapper.ONE);
       }
     }
diff --git a/hadoop/wordCount/src/main/java/wordCount/WordCountReducer.java b/hadoop/wordCount/src/main/java/wordCount/WordCountReducer.java
@@ -6,18 +6,33 @@
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Reducer;
 
+/**
+ * This is the reducer class of the MapReduce Word Count example. It also
+ * acts as combiner, i.e., it is applied two times:
+ * <ol>
+ * <li>As combiner, i.e., directly after the mapping process on each mapper
+ * node. This way, the data is "reduced" before being sent on. During this
+ * application, all input tuples of the reducer are of the form
+ * {@code <WORD, List<1, 1, 1, ... 1>>}. The reducer here creates output
+ * tuples of the form {@code <WORD, Sum(List...)>}. These tuples have the
+ * same format as the mapper's output tuples, just with integers which
+ * might be larger than 1. They are sent on to the real reducer steps.</li>
+ * <li>As actual reducer: Here all the tuples produced by the combiners
+ * arrive. There may be multiple entries in the lists of these tuples,
+ * since the same word may have been found on different computers.</li>
+ * </ol>
+ */
 public class WordCountReducer
     extends Reducer<Text, IntWritable, Text, IntWritable> {
 
   @Override
   protected void reduce(final Text key, final Iterable<IntWritable> values,
       final Context context) throws IOException, InterruptedException {
-
+    // we receive tuples of the type <WORD, IntWritable> for each WORD
     int count = 0;
-    for (final IntWritable current : values) {
+    for (final IntWritable current : values) { // we add up all the ints
       count += current.get();
     }
-    context.write(key, new IntWritable(count));
+    context.write(key, new IntWritable(count));// and emit the final count
   }
-
 }
diff --git a/mpi/structScatter.c b/mpi/structScatter.c
@@ -52,17 +52,17 @@ int main(int argc, char *argv[]) {
     MPI_Comm_size(MPI_COMM_WORLD, &size); // get number of processes
 
     send = (myStruct*)malloc(sizeof(myStruct) * size); // allocate memory
-    for(i = size; (--i) >= 0; ) {
+    for(i = size; (--i) >= 0; ) { // store some dummy data
       send[i].myIntA    = rank;
       send[i].myIntB    = i;
       send[i].myShort   = (rank * rank) % size;
-      send[i].myDouble  = *((double*)("Hi you!"));
+      send[i].myDouble  = *((double*)("Hi you!")); // haha
       send[i].myChar    = 'V';
       send[i].myFloat   = (i / (float)size);
     }
   }
 
-  MPI_Scatter(send, 1, myStructType, &data, 1, myStructType, 0, MPI_COMM_WORLD);
+  MPI_Scatter(send, 1, myStructType, &data, 1, myStructType, 0, MPI_COMM_WORLD); //scatter the structs
 
   printf("%d: received: myIntA=%d, myIntB=%d, myShort=%d, myDouble=\"%s\", myChar='%c', myFloat=%f.\n",
          rank, data.myIntA, data.myIntB, data.myShort, (char*)&data.myDouble, data.myChar, data.myFloat);

Original file line number	Diff line number	Diff line change
`@@ -19,14 +19,15 @@`
`19`	`19`	`* multiple websites. This reducer emits tuples of the form`
`20`	`20`	`* {@code <resource URL, list of website urls>}.`
`21`	`21`	`*/`
`22`		`-public class WebFinderReducer extends`
`23`		`- Reducer<Text, Text, Text, List<Text>> {`
	`22`	`+public class WebFinderReducer`
	`23`	`+ extends Reducer<Text, Text, Text, List<Text>> {`
`24`	`24`
`25`	`25`	`/**`
`26`	`26`	`* The actual reduction step: From the tuples of form`
`27`	`27`	`* {@code <resource URL, iterable of referencing website URLs>}, select`
`28`	`28`	`* all resources referenced by more than one unique website. For these,`
`29`		`- * output tuples of the form {@code <resource URL, list of website URLs>}.`
	`29`	`+ * output tuples of the form {@code <resource URL, list of website URLs>}`
	`30`	`+ * .`
`30`	`31`	`*/`
`31`	`32`	`@Override`
`32`	`33`	`protected void reduce(final Text key, final Iterable<Text> values,`
`@@ -40,7 +41,7 @@ protected void reduce(final Text key, final Iterable<Text> values,`
`40`	`41`
`41`	`42`	`set = new HashSet<>();`
`42`	`43`	`looper: for (final Text url : values) {`
`43`		`- string = url.toString();`
	`44`	`+ string = url.toString();// convert value to a URL`
`44`	`45`	`try {`
`45`	`46`	`add = new URI(string).normalize().toURL();`
`46`	`47`	`} catch (@SuppressWarnings("unused") final Throwable error) {`
`@@ -54,19 +55,19 @@ protected void reduce(final Text key, final Iterable<Text> values,`
`54`	`55`	`}`
`55`	`56`	`}`
`56`	`57`	`}`
`57`		`- set.add(add);`
	`58`	`+ set.add(add); // store value in set of URLs pointing to this resource`
`58`	`59`	`}`
`59`	`60`
`60`		`- if ((size = set.size()) > 1) {`
`61`		`- list = new ArrayList(size);`
	`61`	`+ if ((size = set.size()) > 1) {// multiple URLs point to key`
	`62`	`+ list = new ArrayList(size);// let's make a list of them`
`62`	`63`	`for (final URL found : set) {`
`63`	`64`	`list.add(found.toString());`
`64`	`65`	`}`
`65`		`- Collections.sort(list);`
`66`		`- for (index = list.size(); (--index) >= 0;) {`
	`66`	`+ Collections.sort(list);// and sort them`
	`67`	`+ for (index = list.size(); (--index) >= 0;) {// now convert to Text`
`67`	`68`	`list.set(index, new Text((String) (list.get(index))));`
`68`	`69`	`}`
`69`		`- context.write(key, list);`
	`70`	`+ context.write(key, list);// write <key, list of referers> tuple`
`70`	`71`	`}`
`71`	`72`	`}`
`72`	`73`	`}`