Submitty
diff --git a/‎bin/hash_all.py
+10-3 b/‎bin/hash_all.py
+10-3
diff --git a/‎bin/process_all.sh
+5-3 b/‎bin/process_all.sh
+5-3
diff --git a/‎bin/tokenize_all.py
+17-4 b/‎bin/tokenize_all.py
+17-4
diff --git a/‎install_lichen.sh
+8-1 b/‎install_lichen.sh
+8-1
diff --git a/‎tokenizer/c/README.md
+8 b/‎tokenizer/c/README.md
+8
diff --git a/‎tokenizer/c/c_tokenizer.py
+36 b/‎tokenizer/c/c_tokenizer.py
+36
@@ -52,12 +52,19 @@ def hasher(args,my_tokenized_file,my_hashes_file):
                 if args.plaintext:
                     for j in range(0,args.window):
                         foo+=str(tokens[i+j].get("value"))
+
                 elif args.python:
-                    print("NEED A PYTHON HASHER")
+                    for j in range(0,args.window):
+                        foo+=str(tokens[i+j].get("type"))
+
                 elif args.cpp:
-                    print("NEED A C++ HASHER")
+                    for j in range(0,args.window):
+                        foo+=str(tokens[i+j].get("type"))
+
                 else:
-                    print("UNKNOWN HASHER")
+                    print("\n\nERROR: UNKNOWN HASHER\n\n")
+                    exit(1)
+
                 hash_object = hashlib.md5(foo.encode())
                 hash_object_string=hash_object.hexdigest()
                 #FIXME: this truncation should be adjusted after more full-scale testing
 
@@ -3,11 +3,13 @@
 semester=$1
 course=$2
 gradeable=$3
-window=$4
+language=$4
+window=$5
+
 
 /usr/local/submitty/Lichen/bin/concatenate_all.py  $semester $course $gradeable 
-/usr/local/submitty/Lichen/bin/tokenize_all.py     $semester $course $gradeable  --plaintext
-/usr/local/submitty/Lichen/bin/hash_all.py         $semester $course $gradeable  --window $window  --plaintext
+/usr/local/submitty/Lichen/bin/tokenize_all.py     $semester $course $gradeable  --${language}
+/usr/local/submitty/Lichen/bin/hash_all.py         $semester $course $gradeable  --window $window  --${language}
 
 /usr/local/submitty/Lichen/bin/compare_hashes.out  $semester $course $gradeable  --window $window
 
@@ -34,14 +34,27 @@ def tokenize(args,my_concatenated_file,my_tokenized_file):
     if args.plaintext:
         tokenizer = os.path.join(SUBMITTY_INSTALL_DIR,"Lichen","bin","plaintext_tokenizer.out")
         with open(my_concatenated_file,'r') as infile:
-            with open (my_tokenized_file,'w')as outfile:
+            with open (my_tokenized_file,'w') as outfile:
                 subprocess.call([tokenizer,"--ignore_newlines"],stdin=infile,stdout=outfile)
+
     elif args.python:
-        print("NEED A PYTHON TOKENIZER")
+        tokenizer = os.path.join(SUBMITTY_INSTALL_DIR,"Lichen","bin","python_tokenizer.py")
+        with open(my_concatenated_file,'r') as infile:
+            with open (my_tokenized_file,'w') as outfile:
+                command="python3 "+str(tokenizer)+" "+my_concatenated_file+" > "+my_tokenized_file
+                os.system(command)
+
     elif args.cpp:
-        print("NEED A C++ TOKENIZER")
+        tokenizer = os.path.join(SUBMITTY_INSTALL_DIR,"Lichen","bin","c_tokenizer.py")
+        with open(my_concatenated_file,'r') as infile:
+            with open (my_tokenized_file,'w') as outfile:
+                command="python "+str(tokenizer)+" "+my_concatenated_file+" > "+my_tokenized_file
+                os.system(command)
+
     else:
-        print("UNKNOWN TOKENIZER")
+        print("\n\nERROR: UNKNOWN TOKENIZER\n\n")
+        exit(1)
+
 
 def main():
     args = parse_args()
 
@@ -27,10 +27,13 @@ fi
 
 
 ########################################################################################################################
-# compile & install the tokenizers
+# compile & install the tools
 
 mkdir -p ${lichen_installation_dir}/bin
 
+
+#--------------------
+# plaintext tool
 pushd ${lichen_repository_dir}  > /dev/null
 clang++ -I ${nlohmann_dir}/include/ -std=c++11 -Wall tokenizer/plaintext/plaintext_tokenizer.cpp -o ${lichen_installation_dir}/bin/plaintext_tokenizer.out
 if [ $? -ne 0 ]; then
@@ -40,6 +43,7 @@ fi
 popd > /dev/null
 
 
+#-------------------------------------------
 # compile & install the hash comparison tool
 pushd ${lichen_repository_dir}  > /dev/null
 clang++ -I ${nlohmann_dir}/include/ -lboost_system -lboost_filesystem -Wall -g -std=c++11 -Wall compare_hashes/compare_hashes.cpp -o ${lichen_installation_dir}/bin/compare_hashes.out
@@ -54,6 +58,9 @@ popd > /dev/null
 
 cp ${lichen_repository_dir}/bin/* ${lichen_installation_dir}/bin/
 
+cp ${lichen_repository_dir}/tokenizer/c/c_tokenizer.py ${lichen_installation_dir}/bin/c_tokenizer.py
+cp ${lichen_repository_dir}/tokenizer/python/python_tokenizer.py ${lichen_installation_dir}/bin/python_tokenizer.py
+
 
 ########################################################################################################################
 # fix permissions
 
@@ -0,0 +1,8 @@
+Installation Instruction:-
+
+sudo apt-get install python-clang-3.8
+
+Usage:-
+
+python c_tokenizer.py path/to/inputfile
+
@@ -0,0 +1,36 @@
+import clang.cindex
+import json
+import sys
+import shutil
+import tempfile
+import os
+
+
+# apparently, the file name must end in .cpp (or some standard
+# c/c++ suffix to be successfully tokenized)
+
+# make a temprary filename
+tmp_cpp_file_handle,tmp_cpp_file_name=tempfile.mkstemp(suffix=".cpp")
+# copy the concatenated file to the temporary file location
+shutil.copy(sys.argv[1],tmp_cpp_file_name)
+
+clang.cindex.Config.set_library_file("/usr/lib/llvm-3.8/lib/libclang-3.8.so.1")
+idx = clang.cindex.Index.create()
+
+# parse the input file
+parsed_data = idx.parse(tmp_cpp_file_name)
+
+# remove the temporary file
+os.remove(tmp_cpp_file_name)
+
+tokens = []
+
+for token in parsed_data.get_tokens(extent = parsed_data.cursor.extent):
+	tmp = dict()
+	tmp["line"]=int(token.location.line)
+	tmp["char"]=int(token.location.column)
+	tmp["type"]=(str(token.kind))[10:]
+	tmp["value"]=str(token.spelling)
+	tokens.append(tmp)
+
+print ( json.dumps(tokens, indent=4, sort_keys=True) )