From 095e5f22ebb5fd1efa6cb06b055c4ebb220ca5c8 Mon Sep 17 00:00:00 2001 From: Joe Date: Mon, 1 Jun 2015 10:10:41 +0800 Subject: [PATCH 01/16] change --- mrj-0.1/.classpath | 11 +- mrj-0.1/.gitignore | 2 + mrj-0.1/.idea/.name | 1 + mrj-0.1/.idea/compiler.xml | 23 + mrj-0.1/.idea/copyright/profiles_settings.xml | 3 + mrj-0.1/.idea/encodings.xml | 4 + mrj-0.1/.idea/misc.xml | 178 ++++++ mrj-0.1/.idea/modules.xml | 8 + mrj-0.1/.idea/scopes/scope_settings.xml | 5 + mrj-0.1/.idea/vcs.xml | 6 + mrj-0.1/.idea/workspace.xml | 341 +++++++++++ mrj-0.1/.settings/org.eclipse.jdt.core.prefs | 11 + mrj-0.1/mrj-0.1.iml | 533 ++++++++++++++++++ .../mrj/importtriples/FilesImportTriples.java | 12 +- .../ImportTriplesDeconstructReducer.java | 7 +- .../ImportTriplesSampleReducer.java | 5 +- .../edu/neu/mitt/mrj/io/dbs/CassandraDB.java | 52 +- .../justification/OWLHorstJustification.java | 10 +- .../OWLHorstJustificationMapper.java | 5 + .../OWLHorstJustificationReducer.java | 2 + .../reasoner/MapReduceReasonerJobConfig.java | 12 +- .../reasoner/owl/OWLAllSomeValuesMapper.java | 7 + .../owl/OWLEquivalenceSCSPMapper.java | 29 +- .../owl/OWLEquivalenceSCSPReducer.java | 3 +- .../mrj/reasoner/owl/OWLHasValueMapper.java | 2 + .../mrj/reasoner/owl/OWLHasValueReducer.java | 1 + .../reasoner/owl/OWLNotRecursiveMapper.java | 6 + .../reasoner/owl/OWLNotRecursiveReducer.java | 2 + .../mitt/mrj/reasoner/owl/OWLReasoner.java | 48 +- .../reasoner/rdfs/RDFSSpecialPropsMapper.java | 3 + .../rdfs/RDFSSubPropDomRangeMapper.java | 1 + .../rdfs/RDFSSubPropInheritMapper.java | 2 + .../mrj/reasoner/rdfs/RDFSSubclasReducer.java | 3 + .../rdfs/RDFSSubpropDomRangeReducer.java | 1 + .../rdfs/RDFSSubpropInheritReducer.java | 2 + .../edu/neu/mitt/mrj/utils/Cassandraconf.java | 8 + 36 files changed, 1295 insertions(+), 54 deletions(-) create mode 100644 mrj-0.1/.gitignore create mode 100644 mrj-0.1/.idea/.name create mode 100644 mrj-0.1/.idea/compiler.xml create mode 100644 mrj-0.1/.idea/copyright/profiles_settings.xml create mode 100644 mrj-0.1/.idea/encodings.xml create mode 100644 mrj-0.1/.idea/misc.xml create mode 100644 mrj-0.1/.idea/modules.xml create mode 100644 mrj-0.1/.idea/scopes/scope_settings.xml create mode 100644 mrj-0.1/.idea/vcs.xml create mode 100644 mrj-0.1/.idea/workspace.xml create mode 100644 mrj-0.1/.settings/org.eclipse.jdt.core.prefs create mode 100644 mrj-0.1/mrj-0.1.iml create mode 100644 mrj-0.1/src/cn/edu/neu/mitt/mrj/utils/Cassandraconf.java diff --git a/mrj-0.1/.classpath b/mrj-0.1/.classpath index 569943f..3006bdd 100644 --- a/mrj-0.1/.classpath +++ b/mrj-0.1/.classpath @@ -1,10 +1,11 @@ - - - - - + + + + + + diff --git a/mrj-0.1/.gitignore b/mrj-0.1/.gitignore new file mode 100644 index 0000000..f8d886b --- /dev/null +++ b/mrj-0.1/.gitignore @@ -0,0 +1,2 @@ +/bin +/bin/ diff --git a/mrj-0.1/.idea/.name b/mrj-0.1/.idea/.name new file mode 100644 index 0000000..bb04bae --- /dev/null +++ b/mrj-0.1/.idea/.name @@ -0,0 +1 @@ +mrj-0.1 \ No newline at end of file diff --git a/mrj-0.1/.idea/compiler.xml b/mrj-0.1/.idea/compiler.xml new file mode 100644 index 0000000..a852314 --- /dev/null +++ b/mrj-0.1/.idea/compiler.xml @@ -0,0 +1,23 @@ + + + + + \ No newline at end of file diff --git a/mrj-0.1/.idea/copyright/profiles_settings.xml b/mrj-0.1/.idea/copyright/profiles_settings.xml new file mode 100644 index 0000000..e7bedf3 --- /dev/null +++ b/mrj-0.1/.idea/copyright/profiles_settings.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/mrj-0.1/.idea/encodings.xml b/mrj-0.1/.idea/encodings.xml new file mode 100644 index 0000000..d821048 --- /dev/null +++ b/mrj-0.1/.idea/encodings.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/mrj-0.1/.idea/misc.xml b/mrj-0.1/.idea/misc.xml new file mode 100644 index 0000000..1a5ae83 --- /dev/null +++ b/mrj-0.1/.idea/misc.xml @@ -0,0 +1,178 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1.8 + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mrj-0.1/.idea/modules.xml b/mrj-0.1/.idea/modules.xml new file mode 100644 index 0000000..39bb12e --- /dev/null +++ b/mrj-0.1/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/mrj-0.1/.idea/scopes/scope_settings.xml b/mrj-0.1/.idea/scopes/scope_settings.xml new file mode 100644 index 0000000..922003b --- /dev/null +++ b/mrj-0.1/.idea/scopes/scope_settings.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/mrj-0.1/.idea/vcs.xml b/mrj-0.1/.idea/vcs.xml new file mode 100644 index 0000000..6564d52 --- /dev/null +++ b/mrj-0.1/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/mrj-0.1/.idea/workspace.xml b/mrj-0.1/.idea/workspace.xml new file mode 100644 index 0000000..a30b4df --- /dev/null +++ b/mrj-0.1/.idea/workspace.xml @@ -0,0 +1,341 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1426120853528 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mrj-0.1/.settings/org.eclipse.jdt.core.prefs b/mrj-0.1/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000..7341ab1 --- /dev/null +++ b/mrj-0.1/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,11 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.7 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.source=1.7 diff --git a/mrj-0.1/mrj-0.1.iml b/mrj-0.1/mrj-0.1.iml new file mode 100644 index 0000000..017bf6e --- /dev/null +++ b/mrj-0.1/mrj-0.1.iml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java index c9e2781..7140fbc 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java @@ -193,8 +193,8 @@ private void rewriteTriples(String[] args) throws Exception { CqlConfigHelper.setOutputCql(job.getConfiguration(), query); - ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); - ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner"); + ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); + ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); //Launch long time = System.currentTimeMillis(); @@ -223,6 +223,14 @@ public static void main(String[] args) throws Exception { long time = System.currentTimeMillis(); int res = ToolRunner.run(new Configuration(), new FilesImportTriples(), args); // log.info("Import time: " + (System.currentTimeMillis() - time)); + + //Modified by LiYang 2015/4/10 + CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); + db.init(); + db.createIndexOnTripleType(); + db.createIndexOnRule(); + db.CassandraDBClose(); + System.out.println("Import time: " + (System.currentTimeMillis() - time)); System.exit(res); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java index b2d64d0..8dd5dea 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java @@ -68,7 +68,7 @@ public void reduce(Text key, Iterable values, Context context)thr protected void setup(Context context) throws IOException, InterruptedException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. try { - db = new CassandraDB("localhost", 9160); + db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); db.init(); } catch (InvalidRequestException e) { e.printStackTrace(); @@ -89,4 +89,9 @@ protected void setup(Context context) throws IOException, InterruptedException { counter = (Long.valueOf(taskId) + 1) << 32; log.debug("Start counter " + (Long.valueOf(taskId) + 1)); } + + protected void cleanup(Context context) throws IOException, InterruptedException{ + db.CassandraDBClose(); + } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java index 56f33a1..2738f80 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java @@ -55,7 +55,7 @@ public void reduce(Text key, Iterable values, Context context) th protected void setup(Context context) throws IOException, InterruptedException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. try { - db = new CassandraDB("localhost", 9160); + db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); db.init(); } catch (InvalidRequestException e) { e.printStackTrace(); @@ -77,4 +77,7 @@ protected void setup(Context context) throws IOException, InterruptedException { counter = (Long.valueOf(taskId)) << 13; if (counter == 0) { counter +=100; } } + protected void cleanup(Context context) throws IOException, InterruptedException{ + db.CassandraDBClose(); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index 00877c1..32aea88 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -58,10 +58,15 @@ import com.datastax.driver.core.Row; import com.datastax.driver.core.Statement; +//modified cassandra java 2.0.5 import com.datastax.driver.core.TupleValue; import com.datastax.driver.core.querybuilder.QueryBuilder; +//modified +import org.apache.cassandra.thrift.CassandraServer; + + /** * @author gibeo_000 * @@ -87,14 +92,14 @@ public class CassandraDB { public static final String COLUMN_JUSTIFICATION = "justification"; //mrjks.results.justification public static final String COLUMN_STEP = "step"; // mrjks.results.step - public static final String DEFAULT_HOST = "localhost"; + public static final String DEFAULT_HOST = cn.edu.neu.mitt.mrj.utils.Cassandraconf.host; public static final String DEFAULT_PORT = "9160"; // in version 2.1.2, cql3 port is 9042 - public static final String CQL_PAGE_ROW_SIZE = "10"; //3 + public static final String CQL_PAGE_ROW_SIZE = "10000"; //3 modified by liyang // 2014-12-11, Very strange, this works around. - public static final String CONFIG_LOCATION = "file:///home/gibeon/Software/apache-cassandra-2.1.2/conf/cassandra.yaml"; + public static final String CONFIG_LOCATION = cn.edu.neu.mitt.mrj.utils.Cassandraconf.CassandraConfFile; public static void setConfigLocation(){ setConfigLocation(CONFIG_LOCATION); } @@ -112,13 +117,31 @@ private static Cassandra.Iface createConnection() throws TTransportException{ Integer.valueOf(System.getProperty("cassandra.port", DEFAULT_PORT))); } + + + private static TSocket socket = null; + private static TTransport trans = null; + private static Cassandra.Client client1 = null; private static Cassandra.Client createConnection(String host, Integer port) throws TTransportException { - TSocket socket = new TSocket(host, port); - TTransport trans = new TFramedTransport(socket); + socket = new TSocket(host, port); + trans = new TFramedTransport(socket); trans.open(); TProtocol protocol = new TBinaryProtocol(trans); - return new Cassandra.Client(protocol); + if (client1 != null){ + return client1; + } + client1 = new Cassandra.Client(protocol); + //Modified 2015/5/25 + return client1; + } + + private static void close(){ + if(trans != null) + trans.close(); + if(socket != null) + socket.close(); + return; } @@ -227,6 +250,10 @@ public CassandraDB(String host, Integer port) throws TTransportException { client = createConnection(host, port); } + public void CassandraDBClose(){ + this.close(); + } + public void init() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ setupKeyspace(client); client.set_keyspace(KEYSPACE); @@ -272,9 +299,12 @@ public long getRowCountAccordingTripleType(int tripletype){ * Get the row count according to the type of rule. * @return row count. */ + //modified + /* public long getRowCountAccordingRule(int rule){ String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + " WHERE " + COLUMN_RULE + " = " + rule + " ALLOW FILTERING"; // must use ALLOW FILTERING + //modified long num = 0; try { @@ -294,7 +324,7 @@ public long getRowCountAccordingRule(int rule){ return num; } - +*/ public void insertResources(long id, String label) throws InvalidRequestException, TException{ String query = "INSERT INTO " + COLUMNFAMILY_RESOURCES + @@ -403,6 +433,8 @@ public String idToLabel(long id) throws InvalidRequestException, UnavailableExce return null; } + //modified cassandra java 2.0.5 + public static Set> getJustifications() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException, IOException, ClassNotFoundException, RequestExecutionException{ Set> results = new HashSet>(); @@ -459,7 +491,7 @@ else if (currentResult.containsAll(testResult)){ results.add(testResult); } }*/ - + //modified cassandra java 2.0.5 return results; } @@ -620,7 +652,7 @@ public void createIndexOnRule() throws InvalidRequestException, UnavailableExcep public static void main(String[] args) { try { - CassandraDB db = new CassandraDB("localhost", 9160); + CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); db.init(); db.createIndexOnTripleType(); db.createIndexOnRule(); @@ -629,8 +661,10 @@ public static void main(String[] args) { Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); db.loadSetIntoMemory(schemaTriples, filters, 0); + System.out.println(schemaTriples); + //modified 2015/5/19 System.out.println("Transitive: " + db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE)); System.exit(0); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java index 0b848cd..30a40ad 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java @@ -35,7 +35,7 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.utils.TripleKeyMapComparator; - +//modified cassandra java 2.0.5 import com.datastax.driver.core.TupleValue; /** @@ -171,10 +171,11 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio step++; }while (newExpanded > 0); + //modified cassandra java 2.0.5 CassandraDB db = null; try{ - db = new CassandraDB("localhost", 9160); + db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); db.getDBClient().set_keyspace(CassandraDB.KEYSPACE); Set> justifications = db.getJustifications(); int count = 0; @@ -188,10 +189,13 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio " - <" + db.idToLabel(sub) + ", " + db.idToLabel(pre) + ", " + db.idToLabel(obj) + ">"); } } + + db.CassandraDBClose(); + }catch(Exception e){ System.err.println(e.getMessage()); } - + System.out.println("Time (in seconds): " + (System.currentTimeMillis() - startTime) / 1000); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationMapper.java index df05ca2..d7a4bf0 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationMapper.java @@ -13,6 +13,7 @@ import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer.Context; import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -393,5 +394,9 @@ protected void setup(Context context) throws IOException, InterruptedException { te.printStackTrace(); } } + + protected void cleanup(Context context) throws IOException, InterruptedException{ + db.CassandraDBClose(); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java index d477f56..73e4d54 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java @@ -17,6 +17,7 @@ import cn.edu.neu.mitt.mrj.io.dbs.SimpleClientDataStax; import com.datastax.driver.core.DataType; +//modified cassandra java 2.0.5 import com.datastax.driver.core.TupleType; import com.datastax.driver.core.TupleValue; import com.datastax.driver.core.querybuilder.Insert; @@ -42,6 +43,7 @@ protected void reduce(MapWritable key, Iterable values, Context co total += count.get(); } // System.out.println("Total count is: " + total); + //modified cassandra java 2.0.5 if (total == key.size()){ // Find a candidate justification, output it to the database Set resultJustification = new HashSet(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java index 03a6e64..c18fc0c 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java @@ -33,10 +33,10 @@ public class MapReduceReasonerJobConfig { // Input from CassandraDB.COLUMNFAMILY_JUSTIFICATIONS private static void configureCassandraInput(Job job, Set filters) { //Set the input - ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); + ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); // Should not use 9160 port in cassandra 2.1.2 because new cql3 port is 9042, please refer to conf/cassandra.yaml //ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); - ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner"); + ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); if (filters.size() == 0){ CqlConfigHelper.setInputCql(job.getConfiguration(), @@ -120,7 +120,8 @@ else if (filters.size() == 1){ } CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); - ConfigHelper.setInputSplitSize(job.getConfiguration(), 180); + //Modifide by LiYang + ConfigHelper.setInputSplitSize(job.getConfiguration(), 5000000); job.setInputFormatClass(CqlInputFormat.class); System.out.println("ConfigHelper.getInputSplitSize - input: " + ConfigHelper.getInputSplitSize(job.getConfiguration())); System.out.println("CqlConfigHelper.getInputPageRowSize - input: " + CqlConfigHelper.getInputPageRowSize(job.getConfiguration())); @@ -134,8 +135,9 @@ private static void configureCassandraOutput(Job job) { job.setOutputKeyClass(Map.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(CqlOutputFormat.class); - ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); - ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner"); + ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); + ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); + ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java index cb27ef0..79afc1a 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java @@ -16,6 +16,7 @@ import org.apache.cassandra.thrift.UnavailableException; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer.Context; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -150,6 +151,11 @@ public void setup(Context context) throws IOException { makeJoin(onPropertyTmp, context, someValuesTmp, allValuesTmp, someValues, allValues, onPropertySome, onPropertyAll); + + + db.CassandraDBClose(); + + }catch (TTransportException e) { e.printStackTrace(); } catch (InvalidRequestException e) { @@ -231,4 +237,5 @@ protected void makeJoin(Map> onPropertyTmp, Context contex } } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java index a4afd43..9423dd1 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java @@ -94,25 +94,29 @@ public void setup(Context context) throws IOException { subpropSchemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); - db.loadSetIntoMemory(subpropSchemaTriples, filters, -1); + //modified 2015/5/31 + //db.loadSetIntoMemory(subpropSchemaTriples, filters, -1); } if (subclassSchemaTriples == null) { subclassSchemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBCLASS); - db.loadSetIntoMemory(subclassSchemaTriples, filters, -1); + //modified 2015/5/31 + //db.loadSetIntoMemory(subclassSchemaTriples, filters, -1); } - } catch (TTransportException e) { - e.printStackTrace(); - } catch (InvalidRequestException e) { - e.printStackTrace(); - } catch (UnavailableException e) { - e.printStackTrace(); - } catch (TimedOutException e) { - e.printStackTrace(); - } catch (SchemaDisagreementException e) { - e.printStackTrace(); + db.CassandraDBClose(); + //modified 2015/5/31 +// } catch (TTransportException e) { +// e.printStackTrace(); +// } catch (InvalidRequestException e) { +// e.printStackTrace(); +// } catch (UnavailableException e) { +// e.printStackTrace(); +// } catch (TimedOutException e) { +// e.printStackTrace(); +// } catch (SchemaDisagreementException e) { +// e.printStackTrace(); } catch (TException e) { e.printStackTrace(); } @@ -120,4 +124,5 @@ public void setup(Context context) throws IOException { } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java index e98f5ba..c755300 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java @@ -255,7 +255,8 @@ public void setup(Context context) throws IOException { Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_PROPERTY); equivalencePropertiesSchemaTriples = db.loadMapIntoMemory(filters); - } + } + db.CassandraDBClose(); }catch (TTransportException e) { e.printStackTrace(); } catch (InvalidRequestException e) { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java index b78a782..3cd6514 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java @@ -62,6 +62,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup oValue.set(values, 0, 17); context.write(oKey, oValue); + } // Moved into if-else by WuGang, 20150203 @@ -95,6 +96,7 @@ public void setup(Context context) throws IOException { onPropertyInverted = new HashSet(); db.loadSetIntoMemory(onPropertyInverted, filters, -1, true); } + db.CassandraDBClose(); }catch(TException te){ te.printStackTrace(); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java index cdae522..8a6a562 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java @@ -130,6 +130,7 @@ public void setup(Context context) throws IOException { onPropertyFilter.add(TriplesUtils.SCHEMA_TRIPLE_ON_PROPERTY); onPropertyMap = db.loadMapIntoMemory(onPropertyFilter); onProperty2Map = db.loadMapIntoMemory(onPropertyFilter, true); + db.CassandraDBClose(); }catch (TTransportException e) { e.printStackTrace(); } catch (InvalidRequestException e) { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java index 2fc767b..ef5bce4 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java @@ -40,6 +40,7 @@ public void map(Long key, Row row, Context context) throws IOException,Interrupt /* Check if the triple has the functional property. If yes output * a key value so it can be matched in the reducer. */ + if (schemaFunctionalProperties.contains(value.getPredicate()) && !value.isObjectLiteral()) { //Set as key a particular flag plus the predicate @@ -95,6 +96,9 @@ public void map(Long key, Row row, Context context) throws IOException,Interrupt } context.write(this.key, new LongWritable(predicate)); } + + //System.out.println("Cassandra time :"+(System.currentTimeMillis() - time)); + } protected void setup(Context context) throws IOException { @@ -142,6 +146,8 @@ protected void setup(Context context) throws IOException { filters.add(TriplesUtils.SCHEMA_TRIPLE_TRANSITIVE_PROPERTY); hasSchemaChanged = db.loadSetIntoMemory(schemaTransitiveProperties, filters, previousDerivation); } + + db.CassandraDBClose(); }catch(TException te){ te.printStackTrace(); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java index e098029..cc08af6 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java @@ -239,6 +239,8 @@ public void setup(Context context) throws IOException { // Added by WuGang 2015-01-27, Map> schemaInverseOfProperties_reverse = db.loadMapIntoMemory(filters, true); schemaInverseOfProperties.putAll(schemaInverseOfProperties_reverse); + + db.CassandraDBClose(); }catch (TTransportException e) { e.printStackTrace(); } catch (InvalidRequestException e) { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java index ef48ffc..167fae9 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java @@ -100,7 +100,7 @@ public static void main(String[] args) { try { OWLReasoner owlreasoner = new OWLReasoner(); - owlreasoner.db = new CassandraDB("localhost", 9160); + owlreasoner.db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); owlreasoner.db.init(); ToolRunner.run(new Configuration(), owlreasoner, args); @@ -235,7 +235,12 @@ private long inferTransitivityStatements(String[] args) // We'll not use filesystem but db.getTransitiveStatementsCount() long derivation = 0; int level = 0; + + //modified 2015/5/19 long beforeInferCount = db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE); + + //modified 2015/5/19 + //for(int i = 0;i <= 3; i++){ while ((beforeInferCount > 0) && derivedNewStatements && shouldInferTransitivity) { // System.out.println("��ʼ��inferTransitivityStatements��whileѭ����Ѱ�ҡ�"); level++; @@ -282,7 +287,14 @@ private long inferSameAsStatements(String[] args) { boolean derivedSynonyms = true; int derivationStep = 1; long previousStepDerived = 0; // Added by WuGang 2015-01-30 + + //modified 2015/5/19 + //int i = 0; while (derivedSynonyms) { + //modified 2015/5/19 + //i++; + //if (i == 3) + // return 0; if (db.getRowCountAccordingTripleType(TriplesUtils.DATA_TRIPLE_SAME_AS)==0) // We need not to infer on SameAs return 0; @@ -312,13 +324,17 @@ private long inferSameAsStatements(String[] args) { } //Filter the table. + + //modified 2015/5/19 long tableSize = db.getRowCountAccordingTripleType(TriplesUtils.SYNONYMS_TABLE); // System.out.println("tableSize Ϊ : " + tableSize); // System.out.println("sizeDictionary Ϊ : " + sizeDictionary); // System.out.println("derivedTriples Ϊ : " + derivedTriples); - if (tableSize > sizeDictionary || derivedTriples > 0) { + //modified 2015/5/19 + if (tableSize > sizeDictionary || derivedTriples > 0) { + //for(int j =0 ;j <= 3 ; j++){ //1) Calculate the URIs distribution and get the first 2M. job = MapReduceReasonerJobConfig.createNewJob( OWLReasoner.class, @@ -401,6 +417,7 @@ private long inferSameAsStatements(String[] args) { fs.rename(new Path(args[0] + "/dir-input"), new Path(args[0] + "/_dir-input")); } + //modified 2015/5/19 sizeDictionary = tableSize; } catch (Exception e) { @@ -450,8 +467,8 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, int previousSomeAllValuesDerivation = -1; // Added by Wugang 20150111 - long countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15); // see OWLAllSomeValuesReducer - long countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16); // see OWLAllSomeValuesReducer + //long countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15); // see OWLAllSomeValuesReducer + //long countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16); // see OWLAllSomeValuesReducer while (derivedNewStatements) { step++; @@ -473,17 +490,17 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, job.waitForCompletion(true); // Added by Wugang 20150111 - countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15) - countRule15; // see OWLAllSomeValuesReducer - countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer - totalDerivation = countRule15 + countRule16; + // countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15) - countRule15; // see OWLAllSomeValuesReducer + // countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer + // totalDerivation = countRule15 + countRule16; derivedNewStatements = (totalDerivation > 0); } // Added by Wugang 20150111 - countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15) - countRule15; // see OWLAllSomeValuesReducer - countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer - totalDerivation = countRule15 + countRule16; + //countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15) - countRule15; // see OWLAllSomeValuesReducer + //countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer + //totalDerivation = countRule15 + countRule16; return totalDerivation; } @@ -495,8 +512,8 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup step++; // Added by Wugang 20150111 - long countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a); // see OWLAllSomeValuesReducer - long countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b); // see OWLAllSomeValuesReducer + //long countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a); // see OWLAllSomeValuesReducer + //long countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b); // see OWLAllSomeValuesReducer Job job = MapReduceReasonerJobConfig.createNewJob( @@ -523,9 +540,10 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup // Get inferred count if (job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue() > 0) { - countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a) - countRule14a; // see OWLAllSomeValuesReducer - countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b) - countRule14b; // see OWLAllSomeValuesReducer - return(countRule14a + countRule14b); + // countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a) - countRule14a; // see OWLAllSomeValuesReducer + // countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b) - countRule14b; // see OWLAllSomeValuesReducer + // return(countRule14a + countRule14b); + return 0; } else { return 0; } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java index 065a112..f915446 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java @@ -111,6 +111,9 @@ public void setup(Context context) throws IOException { filters.add(TriplesUtils.SCHEMA_TRIPLE_LITERAL_SUBCLASS); db.loadSetIntoMemory(literalSubclasses, filters, -1); } + + db.CassandraDBClose(); + } catch(TTransportException tte){ tte.printStackTrace(); } catch (InvalidRequestException e) { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java index 1fa8b13..9821e66 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java @@ -104,6 +104,7 @@ protected void setup(Context context) throws IOException { filters.add(TriplesUtils.SCHEMA_TRIPLE_RANGE_PROPERTY); hasSchemaChanged |= db.loadSetIntoMemory(rangeSchemaTriples, filters, previousExecutionStep); + db.CassandraDBClose(); } }catch(TTransportException tte){ tte.printStackTrace(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java index 190743d..8347faf 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java @@ -84,6 +84,8 @@ protected void setup(Context context) throws IOException { hasSchemaChanged = db.loadSetIntoMemory(subpropSchemaTriples, filters, previousExecutionStep); // hasSchemaChanged = FilesTriplesReader.loadSetIntoMemory(subpropSchemaTriples, context, // "FILTER_ONLY_SUBPROP_SCHEMA", previousExecutionStep); + + db.CassandraDBClose(); } catch (TException e) { e.printStackTrace(); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java index 1eb0f42..64f43f2 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java @@ -183,6 +183,7 @@ public void setup(Context context) throws IOException { Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBCLASS); subclassSchemaTriples = db.loadMapIntoMemory(filters); + db.CassandraDBClose(); } catch (TTransportException e) { e.printStackTrace(); } catch (InvalidRequestException e) { @@ -207,6 +208,8 @@ public void setup(Context context) throws IOException { memberProperties = new HashSet(); db.loadSetIntoMemory(memberProperties, filters, -1); + + db.CassandraDBClose(); } catch (TTransportException e) { e.printStackTrace(); } catch (InvalidRequestException e) { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java index 42c5d39..d774a6d 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java @@ -124,6 +124,7 @@ public void setup(Context context) throws IOException { filters.add(TriplesUtils.SCHEMA_TRIPLE_RANGE_PROPERTY); rangeSchemaTriples = db.loadMapIntoMemory(filters); } + db.CassandraDBClose(); }catch(TTransportException tte){ tte.printStackTrace(); } catch (InvalidRequestException e) { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java index b8402e6..6f295a1 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java @@ -154,6 +154,8 @@ public void setup(Context context) throws IOException { filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); subpropSchemaTriples = db.loadMapIntoMemory(filters); // subpropSchemaTriples = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_SUBPROP_SCHEMA", context); + + db.CassandraDBClose(); } catch (TTransportException e) { e.printStackTrace(); } catch (InvalidRequestException e) { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/utils/Cassandraconf.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/utils/Cassandraconf.java new file mode 100644 index 0000000..0c335ef --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/utils/Cassandraconf.java @@ -0,0 +1,8 @@ +package cn.edu.neu.mitt.mrj.utils; + +public class Cassandraconf { + public static final String host = "192.168.171.112"; + public static final String CassandraConfFile = "file:///home/imdb/work/apache-cassandra-2.1.2/conf/cassandra.yaml"; + public static final String partitioner = "Murmur3Partitioner"; + +} From 6b3d383b762de74499bd8b39695752d2494aef18 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 8 Jun 2015 00:07:05 +0800 Subject: [PATCH 02/16] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86=E4=B8=93?= =?UTF-8?q?=E9=97=A8=E7=94=A8=E4=BA=8E=E5=88=A0=E9=99=A4=E9=9D=9E=E6=8E=A8?= =?UTF-8?q?=E7=90=86=E7=BB=93=E6=9E=9C=E7=9A=84=E4=B8=89=E5=85=83=E7=BB=84?= =?UTF-8?q?=E7=9A=84=E5=8A=9F=E8=83=BD=20=E5=92=8CRDFSOWLReasoner?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../edu/neu/mitt/mrj/io/dbs/CassandraDB.java | 22 ++++-- .../mitt/mrj/reasoner/RDFSOWLReasoner.java | 74 +++++++++++++++++++ .../owl/OWLEquivalenceSCSPMapper.java | 4 +- .../mitt/mrj/reasoner/owl/OWLReasoner.java | 18 ++--- 4 files changed, 99 insertions(+), 19 deletions(-) create mode 100644 mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/RDFSOWLReasoner.java diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index 32aea88..c1a459f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -23,9 +23,6 @@ import java.util.Map; import java.util.Set; -import org.apache.cassandra.cql3.QueryProcessor; -import org.apache.cassandra.cql3.UntypedResultSet; -import org.apache.cassandra.db.marshal.TupleType; import org.apache.cassandra.exceptions.RequestExecutionException; import org.apache.cassandra.thrift.Cassandra; import org.apache.cassandra.thrift.Column; @@ -60,11 +57,9 @@ import com.datastax.driver.core.Statement; //modified cassandra java 2.0.5 import com.datastax.driver.core.TupleValue; +import com.datastax.driver.core.querybuilder.Delete.Where; import com.datastax.driver.core.querybuilder.QueryBuilder; - - //modified -import org.apache.cassandra.thrift.CassandraServer; /** @@ -582,7 +577,7 @@ public Map> loadMapIntoMemory(Set filters) throw return loadMapIntoMemory(filters, false); } - // ���ص�key����triple��subject��value��object + // ���ص�key����triple��subject��value��object public Map> loadMapIntoMemory(Set filters, boolean inverted) throws IOException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { long startTime = System.currentTimeMillis(); @@ -648,7 +643,18 @@ public void createIndexOnRule() throws InvalidRequestException, UnavailableExcep client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); } - + // Added by WuGang 2015-06-08 + public static void removeOriginalTriples(){ + SimpleClientDataStax scds = new SimpleClientDataStax(); + scds.connect(DEFAULT_HOST); + + Where dQuery = QueryBuilder.delete() + .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS) + .where(QueryBuilder.eq(COLUMN_RULE, ByteBufferUtil.bytes(0))); + scds.getSession().execute(dQuery); + + scds.close(); + } public static void main(String[] args) { try { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/RDFSOWLReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/RDFSOWLReasoner.java new file mode 100644 index 0000000..f8d90ff --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/RDFSOWLReasoner.java @@ -0,0 +1,74 @@ +package cn.edu.neu.mitt.mrj.reasoner; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import cn.edu.neu.mitt.mrj.reasoner.owl.OWLReasoner; +import cn.edu.neu.mitt.mrj.reasoner.rdfs.RDFSReasoner; + +public class RDFSOWLReasoner { + + protected static Logger log = LoggerFactory.getLogger(RDFSOWLReasoner.class); + + static int step = 0; + + private static void parseArgs(String[] args) { + + for(int i=0;i 0; + firstLoop = false; + } +// log.info("Number triples derived: " + totalDerivation); +// log.info("Time derivation: " + (System.currentTimeMillis() - startTime)); + System.out.println("Number triples derived: " + totalDerivation); + System.out.println("Time derivation: " + (System.currentTimeMillis() - startTime)); + } catch (Exception e) { + log.error(e.getMessage()); + e.printStackTrace(); + } + } + +} \ No newline at end of file diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java index 9423dd1..ab4cfc0 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java @@ -95,7 +95,7 @@ public void setup(Context context) throws IOException { Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); //modified 2015/5/31 - //db.loadSetIntoMemory(subpropSchemaTriples, filters, -1); + db.loadSetIntoMemory(subpropSchemaTriples, filters, -1); } if (subclassSchemaTriples == null) { @@ -103,7 +103,7 @@ public void setup(Context context) throws IOException { Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBCLASS); //modified 2015/5/31 - //db.loadSetIntoMemory(subclassSchemaTriples, filters, -1); + db.loadSetIntoMemory(subclassSchemaTriples, filters, -1); } db.CassandraDBClose(); //modified 2015/5/31 diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java index 167fae9..44802ff 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java @@ -37,7 +37,7 @@ public class OWLReasoner extends Configured implements Tool { public static final String OWL_PROP_INHERITANCE_TMP = "/dir-tmp-prop-inheritance/"; public static final String OWL_PROP_INHERITANCE = "/dir-prop-inheritance/"; public static final String OWL_TRANSITIVITY_BASE = OWL_PROP_INHERITANCE_TMP + "dir-transitivity-base/"; - public static final String OWL_TRANSITIVITY = "dir-transitivity/"; // Added by WuGang 2010-08-25���¼ӵ�Ŀ¼ + public static final String OWL_TRANSITIVITY = "dir-transitivity/"; // Added by WuGang 2010-08-25���¼ӵ�Ŀ¼ public static final String OWL_SYNONYMS_TABLE = "dir-table-synonyms/"; public static final String OWL_SYNONYMS_TABLE_NEW = "_table_synonyms_new/"; @@ -154,7 +154,7 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio break; currentStep++; long hasValueDerivation = inferHasValueStatements(args); - System.out.println("-----------inferHasValueStatements����"); + System.out.println("-----------inferHasValueStatements����"); derivedTriples += hasValueDerivation; if (hasValueDerivation > 0) lastDerivationStep = currentStep; @@ -162,7 +162,7 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio break; currentStep++; long someAllDerivation = inferSomeAndAllValuesStatements(args); - System.out.println("-----------inferSomeAndAllValuesStatements����"); + System.out.println("-----------inferSomeAndAllValuesStatements����"); derivedTriples += someAllDerivation; if (someAllDerivation > 0) lastDerivationStep = currentStep; @@ -230,7 +230,7 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter private long inferTransitivityStatements(String[] args) throws IOException, InterruptedException, ClassNotFoundException { boolean derivedNewStatements = true; -// System.out.println("��inferTransitivityStatements��ͷ��"); +// System.out.println("��inferTransitivityStatements��ͷ��"); // We'll not use filesystem but db.getTransitiveStatementsCount() long derivation = 0; @@ -242,7 +242,7 @@ private long inferTransitivityStatements(String[] args) //modified 2015/5/19 //for(int i = 0;i <= 3; i++){ while ((beforeInferCount > 0) && derivedNewStatements && shouldInferTransitivity) { -// System.out.println("��ʼ��inferTransitivityStatements��whileѭ����Ѱ�ҡ�"); +// System.out.println("��ʼ��inferTransitivityStatements��whileѭ����Ѱ�ҡ�"); level++; //Configure input. Take only the directories that are two levels below @@ -435,10 +435,10 @@ private long inferEquivalenceStatements(String[] args) throws IOException, Inter step++; Set filters = new HashSet(); - filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBCLASS); - filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); - filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_CLASS); - filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_PROPERTY); +// filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBCLASS); +// filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); +// filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_CLASS); +// filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_PROPERTY); Job job = MapReduceReasonerJobConfig.createNewJob( OWLReasoner.class, From 147f65968a038e71f5d93976a7fad8cf960edca0 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 8 Jun 2015 00:47:01 +0800 Subject: [PATCH 03/16] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E5=90=8E=EF=BC=8C?= =?UTF-8?q?=E6=89=A7=E8=A1=8COWLHorstJustification=E7=9A=84=E6=97=B6?= =?UTF-8?q?=E5=80=99=E5=9C=A8=E6=9C=80=E5=90=8E=E5=8A=A0=E4=B8=80=E4=B8=AA?= =?UTF-8?q?=E5=8F=82=E6=95=B0--clearoriginals=EF=BC=8C=E5=BA=94=E8=AF=A5?= =?UTF-8?q?=E5=8F=AF=E4=BB=A5=E5=88=A0=E9=99=A4=E9=82=A3=E4=BA=9B=E4=B8=8D?= =?UTF-8?q?=E7=94=A8=E7=9A=84=E4=B8=89=E5=85=83=E7=BB=84=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../justification/OWLHorstJustification.java | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java index 30a40ad..5552170 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java @@ -2,7 +2,7 @@ * Project Name: mrj-0.1 * File Name: OWLHorstJustification.java * @author Gang Wu - * 2015��2��5�� ����4:58:08 + * 2015��2��5�� ����4:58:08 * * Description: * TODO @@ -53,6 +53,8 @@ public class OWLHorstJustification extends Configured implements Tool { public static long pre = -1; public static long obj = -1; public static Path justificationsDirBase = new Path("/justification"); + + private boolean bClearOriginals = false; /** * @@ -79,6 +81,10 @@ public void parseArgs(String[] args) { numMapTasks = Integer.valueOf(args[++i]); if (args[i].equalsIgnoreCase("--reducetasks")) numReduceTasks = Integer.valueOf(args[++i]); + + // Added by WuGang 2015-06-08 + if (args[i].equalsIgnoreCase("--clearoriginals")) + bClearOriginals = true; } } @@ -93,7 +99,7 @@ public static void prepareInput(long sub, long pre, long obj, boolean literal) { Configuration conf = new Configuration(); try { int step = 0; - Path justificationsDir = new Path(justificationsDirBase, String.valueOf(step)); // ����Ŀ¼�£�����һ����original���ļ����ڴ洢��ʼ��justification��triple + Path justificationsDir = new Path(justificationsDirBase, String.valueOf(step)); // ��Ŀ¼�£����һ����original���ļ����ڴ洢��ʼ��justification��triple FileSystem fs = FileSystem.get(URI.create(justificationsDir.toString()), conf); if (!fs.exists(justificationsDir)) { SequenceFile.Writer writer = SequenceFile.createWriter(fs, @@ -149,12 +155,19 @@ private Job createJustificationJob(int step) throws IOException { public long launchClosure(String[] args) throws IOException, InterruptedException, ClassNotFoundException { + parseArgs(args); + + // Added by WuGang 2015-06-08 + if (bClearOriginals) + CassandraDB.removeOriginalTriples(); + + long total = 0; // Total justifications long newExpanded = -1; // count of explanations that expanded in this loop long startTime = System.currentTimeMillis(); int step = 0; - parseArgs(args); + prepareInput(sub, pre, obj, false); // Default it is not a literal. // find justifications @@ -218,7 +231,7 @@ public int run(String[] args) throws Exception { public static void main(String[] args) { if (args.length < 2) { - System.out.println("USAGE: OWLHorstJustification [DerivedTriples base path] [Justifications base path] [options]"); + System.out.println("USAGE: OWLHorstJustification [options]"); return; } From 3017472d547e232b6699bafc4debeb567a0496fa Mon Sep 17 00:00:00 2001 From: Joe Date: Thu, 2 Jul 2015 10:23:29 +0800 Subject: [PATCH 04/16] 72 --- mrj-0.1/.classpath | 4 +- .../org.eclipse.core.resources.prefs | 2 + .../edu/neu/mitt/mrj/io/dbs/CassandraDB.java | 208 ++++++++++++++++-- .../reasoner/MapReduceReasonerJobConfig.java | 2 +- .../mitt/mrj/reasoner/RDFSOWLReasoner.java | 11 +- .../mrj/reasoner/ReasonedJustifications.java | 159 +++++++++++++ .../ReasonedJustificationsMapper.java | 79 +++++++ .../ReasonedJustificationsReducer.java | 46 ++++ .../reasoner/owl/OWLAllSomeValuesMapper.java | 8 +- .../reasoner/owl/OWLAllSomeValuesReducer.java | 2 +- .../mitt/mrj/reasoner/owl/OWLReasoner.java | 24 +- .../rdfs/RDFSSpecialPropsReducer.java | 2 +- .../src/prejustification/SelectInferRows.java | 143 ++++++++++++ .../prejustification/SelectInferRowsMap.java | 79 +++++++ .../SelectInferRowsReduce.java | 18 ++ 15 files changed, 752 insertions(+), 35 deletions(-) create mode 100644 mrj-0.1/.settings/org.eclipse.core.resources.prefs create mode 100644 mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java create mode 100644 mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java create mode 100644 mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java create mode 100644 mrj-0.1/src/prejustification/SelectInferRows.java create mode 100644 mrj-0.1/src/prejustification/SelectInferRowsMap.java create mode 100644 mrj-0.1/src/prejustification/SelectInferRowsReduce.java diff --git a/mrj-0.1/.classpath b/mrj-0.1/.classpath index 3006bdd..e1c1f9a 100644 --- a/mrj-0.1/.classpath +++ b/mrj-0.1/.classpath @@ -5,7 +5,7 @@ - - + + diff --git a/mrj-0.1/.settings/org.eclipse.core.resources.prefs b/mrj-0.1/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..2dcd9cf --- /dev/null +++ b/mrj-0.1/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,2 @@ +eclipse.preferences.version=1 +encoding//src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java=UTF-8 diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index c1a459f..dbfceca 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -53,13 +53,21 @@ import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.ResultSet; import com.datastax.driver.core.Row; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.SimpleStatement; +import com.datastax.driver.core.SocketOptions; import com.datastax.driver.core.Statement; //modified cassandra java 2.0.5 import com.datastax.driver.core.TupleValue; +import com.datastax.driver.core.Cluster.Builder; import com.datastax.driver.core.querybuilder.Delete.Where; +import com.datastax.driver.core.querybuilder.Insert; import com.datastax.driver.core.querybuilder.QueryBuilder; //modified +import com.datastax.driver.core.querybuilder.Select; /** @@ -231,6 +239,26 @@ private static void setupTables(Cassandra.Iface client) catch (InvalidRequestException e) { logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_RESULTS, e); } + //Create resultrow table + String cquery = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + "." + "resultrows" + + " ( " + + COLUMN_SUB + " bigint, " + // partition key + COLUMN_PRE + " bigint, " + // partition key + COLUMN_OBJ + " bigint, " + // partition key + COLUMN_IS_LITERAL + " boolean, " + // partition key + COLUMN_TRIPLE_TYPE + " int, " + + COLUMN_RULE + " int, " + + COLUMN_V1 + " bigint, " + + COLUMN_V2 + " bigint, " + + COLUMN_V3 + " bigint, " + +// COLUMN_TRIPLE_TYPE + " int, " + + COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key + " PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_IS_LITERAL + "), " + + COLUMN_TRIPLE_TYPE + ", " + COLUMN_RULE + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + //", " + COLUMN_TRIPLE_TYPE + + " ) ) "; + client.execute_cql3_query(ByteBufferUtil.bytes(cquery), Compression.NONE, ConsistencyLevel.ONE); + } @@ -267,8 +295,9 @@ public Cassandra.Iface getDBClient(){ * @return row count. */ public long getRowCountAccordingTripleType(int tripletype){ + //ALLOW FILTERING String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + - " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype; + " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " ALLOW FILTERING"; long num = 0; try { @@ -409,7 +438,6 @@ public static void writeJustificationToMapReduceContext( // variables.add(ByteBuffer.wrap(new byte[]{zero})); variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple context.write(keys, variables); - } public boolean loadSetIntoMemory(Set schemaTriples, Set filters, int previousStep) throws IOException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { @@ -428,14 +456,15 @@ public String idToLabel(long id) throws InvalidRequestException, UnavailableExce return null; } - //modified cassandra java 2.0.5 - public static Set> getJustifications() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException, IOException, ClassNotFoundException, RequestExecutionException{ Set> results = new HashSet>(); // String query = "SELECT " + COLUMN_JUSTIFICATION + " FROM " + KEYSPACE + "." + COLUMNFAMILY_RESULTS; SimpleClientDataStax scds = new SimpleClientDataStax(); scds.connect(DEFAULT_HOST); + + //Modified 2015-6-25 + //From COLUMNFAMILY_RESULTS to justifications ??\\ Statement statement = QueryBuilder.select().all().from(KEYSPACE, COLUMNFAMILY_RESULTS); List rows = scds.getSession().execute(statement).all(); @@ -496,7 +525,7 @@ public Set getTracingEntries(Triple triple) throws InvalidRequestExcepti byte zero = 0; Set tracingEntries = new HashSet(); - String query = "SELECT * FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + " WHERE " + + String query = "SELECT * FROM " + KEYSPACE + "." + "resultrows" + " WHERE " + COLUMN_SUB + "=? AND " + COLUMN_PRE + "=? AND " + COLUMN_OBJ + "=? AND " + COLUMN_IS_LITERAL + "=?"; CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); List list = new ArrayList(); @@ -534,9 +563,14 @@ public boolean loadSetIntoMemory( logger.info("In CassandraDB's loadSetIntoMemory"); // Require an index created on COLUMN_TRIPLE_TYPE column + /* + * Be Attention + * add ALLOW FILTERING + * 2015/6/12 + */ String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS + " FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + - " WHERE " + COLUMN_TRIPLE_TYPE + " = ? "; + " WHERE " + COLUMN_TRIPLE_TYPE + " = ? " + " ALLOW FILTERING"; // System.out.println(query); CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); @@ -578,6 +612,11 @@ public Map> loadMapIntoMemory(Set filters) throw } // ���ص�key����triple��subject��value��object + /* + * Be Attention + * add ALLOW FILTERING + * 2015/6/12 + */ public Map> loadMapIntoMemory(Set filters, boolean inverted) throws IOException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { long startTime = System.currentTimeMillis(); @@ -588,7 +627,7 @@ public Map> loadMapIntoMemory(Set filters, boole // Require an index created on COLUMN_TRIPLE_TYPE column String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS + " FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + - " WHERE " + COLUMN_TRIPLE_TYPE + " = ? "; + " WHERE " + COLUMN_TRIPLE_TYPE + " = ? " + " ALLOW FILTERING"; CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); @@ -644,18 +683,159 @@ public void createIndexOnRule() throws InvalidRequestException, UnavailableExcep } // Added by WuGang 2015-06-08 + + + public static ResultSet getRows(){ + Builder builder = Cluster.builder(); + builder.addContactPoint(DEFAULT_HOST); + SocketOptions socketoptions= new SocketOptions().setKeepAlive(true).setReadTimeoutMillis(10 * 10000).setConnectTimeoutMillis(5 * 10000); + Cluster clu = builder.build(); + Session session = clu.connect(); + SimpleStatement statement = new SimpleStatement("SELECT sub, obj, pre, isliteral FROM mrjks.justifications where inferredsteps = 0"); + statement.setFetchSize(100); + ResultSet results = session.execute(statement); + System.out.println("------------------" + results + "--------------"); + return results; + } + + public static boolean delornot = false; + public static void removeOriginalTriples(){ - SimpleClientDataStax scds = new SimpleClientDataStax(); - scds.connect(DEFAULT_HOST); + if (delornot == true) + return; + delornot = true; + //ִ�в�Ӧ�жϡ� + Builder builder = Cluster.builder(); + builder.addContactPoint(DEFAULT_HOST); + SocketOptions socketoptions= new SocketOptions().setKeepAlive(true).setReadTimeoutMillis(10 * 10000).setConnectTimeoutMillis(5 * 10000); + Cluster clu = builder.build(); + Session session = clu.connect(); + + String cquery1 = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + "." + "ruleiszero" + + " ( " + + COLUMN_SUB + " bigint, " + // partition key + COLUMN_PRE + " bigint, " + // partition key + COLUMN_OBJ + " bigint, " + // partition key + COLUMN_IS_LITERAL + " boolean, " + // partition key + COLUMN_TRIPLE_TYPE + " int, " + + COLUMN_RULE + " int, " + + COLUMN_V1 + " bigint, " + + COLUMN_V2 + " bigint, " + + COLUMN_V3 + " bigint, " + +// COLUMN_TRIPLE_TYPE + " int, " + + COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key + " PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_IS_LITERAL + "), " + + COLUMN_TRIPLE_TYPE + ", " + COLUMN_RULE + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + //", " + COLUMN_TRIPLE_TYPE + + " ) ) "; + session.execute(cquery1); + + //SELECT ALL AND DEL ALL + SimpleStatement statement = new SimpleStatement("SELECT * FROM mrjks.justifications"); + statement.setFetchSize(100); + ResultSet results = session.execute(statement); - Where dQuery = QueryBuilder.delete() - .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS) - .where(QueryBuilder.eq(COLUMN_RULE, ByteBufferUtil.bytes(0))); - scds.getSession().execute(dQuery); + System.out.println("--------DEL ALL----------"); + for (Row row : results){ + + if(row.getInt(COLUMN_RULE) != 0){ + session.execute("INSERT INTO mrjks.ruleiszero(sub, pre, obj, isliteral, tripletype, rule, v1, v2, v3, inferredsteps)" + + "VALUES (" + + row.getLong(COLUMN_SUB) + "," + + row.getLong(COLUMN_PRE) + "," + + row.getLong(COLUMN_OBJ) + "," + + row.getBool(COLUMN_IS_LITERAL) + "," + + row.getInt(COLUMN_TRIPLE_TYPE) + "," + + row.getInt(COLUMN_RULE) + "," + + row.getLong(COLUMN_V1) + "," + + row.getLong(COLUMN_V2) + "," + + row.getLong(COLUMN_V3) + "," + + row.getInt(COLUMN_INFERRED_STEPS) + ");"); + System.out.println("-------Insert ----------"); + System.out.println(row); + } + + Statement delete = QueryBuilder.delete() + .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS) + .where(QueryBuilder.eq(COLUMN_SUB, row.getLong(CassandraDB.COLUMN_SUB))) + .and(QueryBuilder.eq(COLUMN_PRE, row.getLong(CassandraDB.COLUMN_PRE))) + .and(QueryBuilder.eq(COLUMN_OBJ, row.getLong(CassandraDB.COLUMN_OBJ))) + .and(QueryBuilder.eq(COLUMN_IS_LITERAL, row.getBool(COLUMN_IS_LITERAL))); + session.execute(delete); + System.out.println(row); + } + +// SimpleClientDataStax scds = new SimpleClientDataStax(); +// scds.connect(DEFAULT_HOST); +// +// System.out.println("Select Primary Key"); +// //modified select partition key and delete using partition key +// Statement select = QueryBuilder.select() +// .all() +// .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS) +// .where(QueryBuilder.eq(COLUMN_INFERRED_STEPS, 0)); +// select.setFetchSize(100); +// ResultSet result = scds.getSession().execute(select); +// //List rows = scds.getSession().executeAsync(statement); +// //List rows = scds.getSession().execute(select).all(); +// +// while(true){ +// Row delrow = result.one(); +// if(delrow == null) +// break; +// Where dQuery = QueryBuilder.delete() +// .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS) +// .where(QueryBuilder.eq(COLUMN_SUB, delrow.getLong(CassandraDB.COLUMN_SUB))) +// .and(QueryBuilder.eq(COLUMN_PRE, delrow.getLong(CassandraDB.COLUMN_PRE))) +// .and(QueryBuilder.eq(COLUMN_OBJ, delrow.getLong(CassandraDB.COLUMN_OBJ))) +// .and(QueryBuilder.eq(COLUMN_IS_LITERAL, delrow.getBool(COLUMN_IS_LITERAL))); +// System.out.println(delrow); +// session.execute(dQuery); +// } + +// Where dQuery = QueryBuilder.delete() +// .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS) +// .where(QueryBuilder.eq(COLUMN_RULE, ByteBufferUtil.bytes(0))); +// scds.getSession().execute(dQuery); + +// scds.close(); - scds.close(); } + //create by LiYang +// public static void createReasonTable(){ +// SimpleClientDataStax scds = new SimpleClientDataStax(); +// scds.connect(DEFAULT_HOST); +// //Statement st = QueryBuilder +// +// for (int i = 1; i <= 7; i++ ){ +// System.out.println("Select Primary Key"); +// //modified select partition key and delete using partition key +// Statement select = QueryBuilder.select() +// .all() +// .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS) +// .where(QueryBuilder.eq(COLUMN_INFERRED_STEPS, i)); +// select.setFetchSize(100); +// ResultSet result = scds.getSession().execute(select); +// +// Session session = scds.getSession(); +// while(true){ +// Row insertrow = result.one(); +// if(insertrow == null) +// break; +// Insert insert = QueryBuilder +// .insertInto(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS) +// .value(COLUMN_SUB, insertrow.getLong(CassandraDB.COLUMN_SUB)) +// .value(COLUMN_PRE, insertrow.getLong(CassandraDB.COLUMN_PRE)) +// .value(COLUMN_OBJ, insertrow.getLong(CassandraDB.COLUMN_OBJ)) +// .value(COLUMN_IS_LITERAL, insertrow.getBool(COLUMN_IS_LITERAL)) +// .value(COLUMN_TRIPLE_TYPE, insertrow.getLong(CassandraDB.COLUMN_TRIPLE_TYPE)) +// .value(COLUMN_SUB, insertrow.getLong(CassandraDB.COLUMN_SUB)); +// +// } +// } +// } + public static void main(String[] args) { try { CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java index c18fc0c..b0f8c8e 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java @@ -121,7 +121,7 @@ else if (filters.size() == 1){ } CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); //Modifide by LiYang - ConfigHelper.setInputSplitSize(job.getConfiguration(), 5000000); + ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); job.setInputFormatClass(CqlInputFormat.class); System.out.println("ConfigHelper.getInputSplitSize - input: " + ConfigHelper.getInputSplitSize(job.getConfiguration())); System.out.println("CqlConfigHelper.getInputPageRowSize - input: " + CqlConfigHelper.getInputPageRowSize(job.getConfiguration())); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/RDFSOWLReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/RDFSOWLReasoner.java index f8d90ff..64ffe76 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/RDFSOWLReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/RDFSOWLReasoner.java @@ -30,8 +30,7 @@ public static void main(String[] args) { parseArgs(args); - RDFSReasoner rdfsReasoner = new RDFSReasoner(); - OWLReasoner owlReasoner = new OWLReasoner(); + try { long totalDerivation = 0; boolean continueDerivation = true; @@ -42,6 +41,14 @@ public static void main(String[] args) { while (continueDerivation) { //Do RDFS reasoning + /* + * Modified 2015/7/1 + * Move rdfsReasoner owlReasoner inside the loop + */ + + RDFSReasoner rdfsReasoner = new RDFSReasoner(); + OWLReasoner owlReasoner = new OWLReasoner(); + if (owlDerivation == 0 && !firstLoop) { rdfsDerivation = 0; } else { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java new file mode 100644 index 0000000..e27c689 --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java @@ -0,0 +1,159 @@ +package cn.edu.neu.mitt.mrj.reasoner; + + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import org.apache.cassandra.hadoop.ColumnFamilyInputFormat; +import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlInputFormat; +import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; +import org.apache.cassandra.thrift.SlicePredicate; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import prejustification.SelectInferRows; +import prejustification.SelectInferRowsMap; +import prejustification.SelectInferRowsReduce; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.utils.Cassandraconf; + +public class ReasonedJustifications extends Configured implements Tool{ + public int run(String[] args) throws Exception{ + + Configuration conf = new Configuration(); + + Job job = new Job(conf); + job.setJobName(" Test "); + job.setJarByClass(SelectInferRows.class); + job.setNumReduceTasks(8); + + ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); + ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); + ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + CqlConfigHelper.setInputCql(job.getConfiguration(), + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " WHERE TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") > ? AND TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") <= ? ALLOW FILTERING"); + CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); + //Modifide by LiYang + ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); + job.setInputFormatClass(CqlInputFormat.class); + job.setOutputKeyClass(Map.class); + job.setOutputValueClass(List.class); + job.setOutputFormatClass(CqlOutputFormat.class); + ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); + ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); + + ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + String query = "UPDATE " + CassandraDB.KEYSPACE + "." + "resultrows" + + " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; + CqlConfigHelper.setOutputCql(job.getConfiguration(), query); + + job.setMapperClass(ReasonedJustificationsMapper.class); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(IntWritable.class); + job.setReducerClass(ReasonedJustificationsReducer.class); + + +// Configuration conf = getConf(); +// Job job = new Job(conf, "Select Reasoned Rows"); +// job.setJarByClass(ReasonedJustifications.class); +// /* +// //Set the predicate +// List columnNames = new ArrayList(); +// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_SUB)); +// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_PRE)); +// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_PRE)); +// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_IS_LITERAL)); +// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_TRIPLE_TYPE)); +// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_RULE)); +// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_V1)); +// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_V2)); +// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_V3)); +// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_INFERRED_STEPS)); +// SlicePredicate predicate = new SlicePredicate().setColumn_names(columnNames); +// ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); +// */ +// /* +// * Get Attention conf != job.getConfiguration() +// * thread "main" java.lang.NullPointerException at org.apache.cassandra.utils.FBUtilities.newPartitioner(FBUtilities.java:418) +// */ +// //Input +// +// ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); +// ConfigHelper.setInputPartitioner(job.getConfiguration(), Cassandraconf.partitioner); +// ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMN_JUSTIFICATION); +// CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), "1000"); +// +// +// +// CqlConfigHelper.setInputCql(job.getConfiguration(), "SELECT * FROM mrjks.justifications WHERE TOKEN(" + +// CassandraDB.COLUMN_SUB + ", " + +// CassandraDB.COLUMN_PRE + ", " + +// CassandraDB.COLUMN_OBJ + ", " + +// CassandraDB.COLUMN_IS_LITERAL + +// ") > ? AND TOKEN(" + +// CassandraDB.COLUMN_SUB + ", " + +// CassandraDB.COLUMN_PRE + ", " + +// CassandraDB.COLUMN_OBJ + ", " + +// CassandraDB.COLUMN_IS_LITERAL + +// ") <= ? AND " + +// CassandraDB.COLUMN_TRIPLE_TYPE + " = " + "5" + +// " ALLOW FILTERING;"); +// ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); +// job.setInputFormatClass(ColumnFamilyInputFormat.class); +// +// +// //output +// job.setOutputKeyClass(Text.class); +// job.setOutputValueClass(Text.class); +// job.setOutputFormatClass(CqlOutputFormat.class); +// ConfigHelper.setOutputInitialAddress(job.getConfiguration(), CassandraDB.DEFAULT_HOST); +// ConfigHelper.setOutputPartitioner(job.getConfiguration(), Cassandraconf.partitioner); +// +// ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); +// +// //ConfigHelper.setOutputKeyspace(job.getConfiguration(), CassandraDB.KEYSPACE); //*** +// String query = "UPDATE mrjks.resultrows SET " + CassandraDB.COLUMN_INFERRED_STEPS + "= ?"; +// CqlConfigHelper.setOutputCql(job.getConfiguration(), query); +// +// +// job.setMapperClass(ReasonedJustificationsMapper.class); +// job.setMapOutputKeyClass(Text.class); +// job.setMapOutputValueClass(IntWritable.class); +// job.setReducerClass(ReasonedJustificationsReducer.class); + + + job.waitForCompletion(true); + + return 0; + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new ReasonedJustifications(), args); + System.exit(res); + } + +} diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java new file mode 100644 index 0000000..7e719e1 --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java @@ -0,0 +1,79 @@ +package cn.edu.neu.mitt.mrj.reasoner; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Map; + +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Mapper.Context; + +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Metadata; +import com.datastax.driver.core.ResultSet; +import com.datastax.driver.core.Row; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.SimpleStatement; + +public class ReasonedJustificationsMapper extends Mapper{ + private Cluster cluster; + private Session session; + //** + public void map(Long keys, Row rows, Context context) throws IOException, InterruptedException{ + +// SimpleStatement statement = new SimpleStatement("SELECT * FROM mrjks.justifications"); +// statement.setFetchSize(100); +// ResultSet results = session.execute(statement); + + Integer inferredsteps; + // for (Row rows : row){ + if (rows.getInt(CassandraDB.COLUMN_RULE) != 0) { + + String conKey; + //***** + conKey = rows.getLong(CassandraDB.COLUMN_SUB) //��ʹ��ByteBufferUtil�� + + "-" + rows.getLong(CassandraDB.COLUMN_PRE) + + "-" + rows.getLong(CassandraDB.COLUMN_OBJ) + + "-" + rows.getBool(CassandraDB.COLUMN_IS_LITERAL) + + "-" + rows.getInt(CassandraDB.COLUMN_TRIPLE_TYPE) + + "-" + rows.getInt(CassandraDB.COLUMN_RULE) + + "-" + rows.getLong(CassandraDB.COLUMN_V1) + + "-" + rows.getLong(CassandraDB.COLUMN_V2) + + "-" + rows.getLong(CassandraDB.COLUMN_V3); + inferredsteps = rows.getInt(CassandraDB.COLUMN_INFERRED_STEPS); + + context.write(new Text(conKey), new IntWritable(inferredsteps)); + } + //} + + } + + public void setup(Context context) throws IOException, InterruptedException{ + cluster = Cluster.builder().addContactPoint(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host).build(); + Metadata metadata = cluster.getMetadata(); + System.out.printf("-------Connected to cluster: %s\n", metadata.getClusterName()); + session = cluster.connect(); + + String cquery1 = "CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + "." + "resultrows" + + " ( " + + CassandraDB.COLUMN_SUB + " bigint, " + // partition key + CassandraDB.COLUMN_PRE + " bigint, " + // partition key + CassandraDB.COLUMN_OBJ + " bigint, " + // partition key + CassandraDB.COLUMN_IS_LITERAL + " boolean, " + // partition key + CassandraDB.COLUMN_TRIPLE_TYPE + " int, " + + CassandraDB.COLUMN_RULE + " int, " + + CassandraDB.COLUMN_V1 + " bigint, " + + CassandraDB.COLUMN_V2 + " bigint, " + + CassandraDB. COLUMN_V3 + " bigint, " + + CassandraDB.COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key + " PRIMARY KEY ((" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_IS_LITERAL + "), " + + CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 + + " ) ) "; + session.execute(cquery1); + } +} diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java new file mode 100644 index 0000000..cec9547 --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java @@ -0,0 +1,46 @@ +package cn.edu.neu.mitt.mrj.reasoner; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; + +import com.sun.org.apache.xpath.internal.operations.Bool; + +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; + +public class ReasonedJustificationsReducer extends Reducer, List>{ + public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException{ + + for (IntWritable value : values) { + //Prepare the insert keys collection + String[] splitkeys = key.toString().split("-"); + Map keys = new LinkedHashMap(); + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(Long.parseLong(splitkeys[0]))); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(Long.parseLong(splitkeys[1]))); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(Long.parseLong(splitkeys[2]))); + //bool + keys.put(CassandraDB.COLUMN_IS_LITERAL, Boolean.valueOf(splitkeys[3])?ByteBuffer.wrap(new byte[]{1}):ByteBuffer.wrap(new byte[]{0})); + keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(Integer.parseInt(splitkeys[4]))); + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes(Integer.parseInt(splitkeys[5]))); + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(Long.parseLong(splitkeys[6]))); + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(Long.parseLong(splitkeys[7]))); + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(Long.parseLong(splitkeys[8]))); + + //prepare the insert variables collection + List variables = new ArrayList(); + int var = Integer.parseInt(value.toString()); + variables.add(ByteBufferUtil.bytes(var)); + context.write(keys, variables); + } + + } + +} diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java index 79afc1a..5fc2e89 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java @@ -47,15 +47,15 @@ public class OWLAllSomeValuesMapper extends Mapper= previousDerivation) { - - log.info("And I met a triple with RDF_TYPE as predicate: " + value); + //DEL +// log.info("And I met a triple with RDF_TYPE as predicate: " + value); // ��Ҫ���⴫��һ��w if (someValues.containsKey(value.getObject())) { //�ҵ���һ��(x,rdf:type,w)��������Ԫ�飬����w����v owl:someValuesFrom w diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java index 9d56f78..e8bad41 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java @@ -33,7 +33,7 @@ public class OWLAllSomeValuesReducer extends Reducer values, Context context) throws IOException, InterruptedException { - log.info("I'm in OWLAllSomeValuesReducer"); + //log.info("I'm in OWLAllSomeValuesReducer"); types.clear(); resources.clear(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java index 44802ff..85cb0d9 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java @@ -24,6 +24,7 @@ import cn.edu.neu.mitt.mrj.io.files.readers.FilesTriplesReader; import cn.edu.neu.mitt.mrj.partitioners.MyHashPartitioner; import cn.edu.neu.mitt.mrj.reasoner.MapReduceReasonerJobConfig; +import cn.edu.neu.mitt.mrj.utils.Cassandraconf; import cn.edu.neu.mitt.mrj.utils.FileUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; @@ -119,7 +120,16 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio boolean firstCycle = true; int currentStep = 0; - int lastDerivationStep = 0; + int lastDerivationStep = 0; + + //Modified 2015/6/28 + try { + db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); + db.init(); + } catch (Exception e) { + e.printStackTrace(); + } + do { if (!firstCycle && lastDerivationStep == (currentStep - 4)) @@ -128,6 +138,8 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio System.out.println(">>>>>>>>>>> Start new OWL Reasoner loop <<<<<<<<<<<"); long propDerivation = inferPropertiesInheritance(args); System.out.println("----------- End inferPropertiesInheritance"); + //Get Attention! + System.out.println("----------- Start inferTransitivityStatements"); derivedTriples = inferTransitivityStatements(args) + propDerivation; System.out.println("----------- End inferTransitivityStatements"); if (derivedTriples > 0) @@ -239,8 +251,6 @@ private long inferTransitivityStatements(String[] args) //modified 2015/5/19 long beforeInferCount = db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE); - //modified 2015/5/19 - //for(int i = 0;i <= 3; i++){ while ((beforeInferCount > 0) && derivedNewStatements && shouldInferTransitivity) { // System.out.println("��ʼ��inferTransitivityStatements��whileѭ����Ѱ�ҡ�"); level++; @@ -270,10 +280,10 @@ private long inferTransitivityStatements(String[] args) derivation = afterInferCount - beforeInferCount; derivedNewStatements = (derivation > 0); beforeInferCount = afterInferCount; // Update beforeInferCount + //System.out.println(" loop "); } previousTransitiveDerivation = step; - return derivation; } @@ -288,13 +298,7 @@ private long inferSameAsStatements(String[] args) { int derivationStep = 1; long previousStepDerived = 0; // Added by WuGang 2015-01-30 - //modified 2015/5/19 - //int i = 0; while (derivedSynonyms) { - //modified 2015/5/19 - //i++; - //if (i == 3) - // return 0; if (db.getRowCountAccordingTripleType(TriplesUtils.DATA_TRIPLE_SAME_AS)==0) // We need not to infer on SameAs return 0; diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java index dd87d22..fc5ea85 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java @@ -86,7 +86,7 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRobject(TriplesUtils.RDFS_CLASS); context.getCounter("RDFS derived triples", "subclass of resource").increment(1); CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); + //context.write(source, oTriple); break; case 4: // û�ж�Ӧ��rdfs rule�� case 5: // û�ж�Ӧ��rdfs rule�� diff --git a/mrj-0.1/src/prejustification/SelectInferRows.java b/mrj-0.1/src/prejustification/SelectInferRows.java new file mode 100644 index 0000000..2ecbf7a --- /dev/null +++ b/mrj-0.1/src/prejustification/SelectInferRows.java @@ -0,0 +1,143 @@ +package prejustification; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.cassandra.hadoop.ColumnFamilyInputFormat; +import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlInputFormat; +import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; +import org.apache.cassandra.thrift.SlicePredicate; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.reasoner.MapReduceReasonerJobConfig; + + +public class SelectInferRows extends Configured implements Tool{ + //private static final Logger logger = LoggerFactory.getLogger(); + public static void main(String[] args) throws Exception{ + int exitCode = ToolRunner.run(new Configuration(), new SelectInferRows(), args); + System.exit(exitCode); + } + + public int run(String[] args) throws Exception{ + + //Job job = new Job(getConf()); +// Job job = MapReduceReasonerJobConfig.createNewJob(SelectInferRows.class, "Select Rows", new HashSet(), 16, 16, true, true); + +// ConfigHelper.setInputInitialAddress(getConf(), CassandraDB.DEFAULT_HOST); +// ConfigHelper.setInputColumnFamily(getConf(), CassandraDB.KEYSPACE, CassandraDB.COLUMN_JUSTIFICATION); + + // job.setJobName("Del Rows"); +// job.setJarByClass(SelectInferRows.class); + + /* + * Select(map) + */ + + + Configuration conf = new Configuration(); + + Job job = new Job(conf); + job.setJobName(" Test "); + job.setJarByClass(SelectInferRows.class); + job.setNumReduceTasks(8); + + ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); + ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); + ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + CqlConfigHelper.setInputCql(job.getConfiguration(), + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " WHERE TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") > ? AND TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") <= ? ALLOW FILTERING"); + CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); + //Modifide by LiYang + ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); + job.setInputFormatClass(CqlInputFormat.class); + job.setOutputKeyClass(Map.class); + job.setOutputValueClass(List.class); + job.setOutputFormatClass(CqlOutputFormat.class); + ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); + ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); + + ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; + CqlConfigHelper.setOutputCql(job.getConfiguration(), query); + + job.setMapperClass(SelectInferRowsMap.class); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(ByteBuffer.class); + job.setReducerClass(SelectInferRowsReduce.class); + + +// job.setInputFormatClass(ColumnFamilyInputFormat.class); +// ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); +// ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); +// ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); +// CqlConfigHelper.setInputCql(job.getConfiguration(), +// "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + +// " WHERE RULE = 0"); +// CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); +// ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); +// job.setInputFormatClass(CqlInputFormat.class); +// +// +// /* +// * Insert(reduce) +// */ +//// job.setCombinerClass(SelectInferRowsReduce.class); +// job.setOutputKeyClass(Map.class); +// job.setOutputValueClass(List.class); +// //�൱�� ָ�����Ŀ¼ Ҫд�� �������ʾ�Ҳ������Ŀ¼ +// job.setOutputFormatClass(CqlOutputFormat.class); +// +// ConfigHelper.setOutputInitialAddress(job.getConfiguration(), CassandraDB.DEFAULT_HOST); +// ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner"); +// ConfigHelper.setOutputColumnFamily(getConf(), CassandraDB.KEYSPACE, "ruleiszero"); +// ConfigHelper.setOutputKeyspace(job.getConfiguration(), CassandraDB.KEYSPACE);// ** +//// String query = "INSERT INTO mrjks.ruleiszero (" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_IS_LITERAL + ", " + +//// CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 +", " +//// + CassandraDB.COLUMN_INFERRED_STEPS + ") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, )"; +// String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + +// " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; +// CqlConfigHelper.setOutputCql(job.getConfiguration(), query); +// +// +// ConfigHelper.getInputSplitSize(job.getConfiguration()); +// CqlConfigHelper.getInputPageRowSize(job.getConfiguration()); +//// String column_names = CassandraDB.COLUMN_SUB + CassandraDB.COLUMN_PRE + CassandraDB.COLUMN_OBJ + CassandraDB.COLUMN_IS_LITERAL + +//// CassandraDB.COLUMN_TRIPLE_TYPE + CassandraDB.COLUMN_RULE + CassandraDB.COLUMN_V1 + CassandraDB.COLUMN_V2 + CassandraDB.COLUMN_V3; +//// SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBufferUtil.bytes(column_names))); +// +// //��������䣬����������map �� reduce + job.waitForCompletion(true); + + System.out.println("Finished"); + return 0; + + } + +} diff --git a/mrj-0.1/src/prejustification/SelectInferRowsMap.java b/mrj-0.1/src/prejustification/SelectInferRowsMap.java new file mode 100644 index 0000000..201eea0 --- /dev/null +++ b/mrj-0.1/src/prejustification/SelectInferRowsMap.java @@ -0,0 +1,79 @@ +package prejustification; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Map; + +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.hadoop.mapreduce.Mapper; + + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Metadata; +import com.datastax.driver.core.ResultSet; + +import com.datastax.driver.core.Row; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.SimpleStatement; + + + +public class SelectInferRowsMap extends Mapper, ByteBuffer> { + private Cluster cluster; + private Session session; + + + public void map(ByteBuffer key, Row row, Context context) throws IOException, InterruptedException{ + SimpleStatement statement = new SimpleStatement("SELECT * FROM mrjks.justifications"); + statement.setFetchSize(100); + ResultSet results = session.execute(statement); + + System.out.println("---------MAP----------"); + Map keys = new HashMap<>(); + ByteBuffer inferredsteps; + for (Row rows : results){ + if (rows.getInt(CassandraDB.COLUMN_RULE) != 0) { + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_SUB))); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_PRE))); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_SUB))); + keys.put(CassandraDB.COLUMN_IS_LITERAL, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_IS_LITERAL))); + keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_TRIPLE_TYPE))); + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_RULE))); + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_V1))); + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_V2))); + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_V3))); + inferredsteps = ByteBufferUtil.bytes(rows.getInt(CassandraDB.COLUMN_INFERRED_STEPS)); + context.write(keys, inferredsteps); + } + } + } + + public void setup(Context context) throws IOException, InterruptedException{ + + cluster = Cluster.builder().addContactPoint(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host).build(); + Metadata metadata = cluster.getMetadata(); + System.out.printf("-------Connected to cluster: %s\n", metadata.getClusterName()); + session = cluster.connect(); + + String cquery1 = "CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + "." + "ruleiszero" + + " ( " + + CassandraDB.COLUMN_SUB + " bigint, " + // partition key + CassandraDB.COLUMN_PRE + " bigint, " + // partition key + CassandraDB.COLUMN_OBJ + " bigint, " + // partition key + CassandraDB.COLUMN_IS_LITERAL + " boolean, " + // partition key + CassandraDB.COLUMN_TRIPLE_TYPE + " int, " + + CassandraDB.COLUMN_RULE + " int, " + + CassandraDB.COLUMN_V1 + " bigint, " + + CassandraDB.COLUMN_V2 + " bigint, " + + CassandraDB. COLUMN_V3 + " bigint, " + + CassandraDB.COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key + " PRIMARY KEY ((" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_IS_LITERAL + "), " + + CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 + + " ) ) "; + session.execute(cquery1); + } + + +} diff --git a/mrj-0.1/src/prejustification/SelectInferRowsReduce.java b/mrj-0.1/src/prejustification/SelectInferRowsReduce.java new file mode 100644 index 0000000..adcfa19 --- /dev/null +++ b/mrj-0.1/src/prejustification/SelectInferRowsReduce.java @@ -0,0 +1,18 @@ +package prejustification; + +import org.apache.hadoop.mapreduce.Reducer; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Map; +public class SelectInferRowsReduce extends Reducer, ByteBuffer, Map, ByteBuffer> { + public void reduce(Map key, Iterable value, Context context) throws IOException, InterruptedException{ + + for (ByteBuffer inferredsteps : value) { + System.out.println(key); + context.write(key, inferredsteps); + } + + } + +} From c484119a41a9b6f6aee4fa44182dfdef60a9c1f4 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 13 Jul 2015 00:54:08 +0800 Subject: [PATCH 05/16] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86CassandraDB.ge?= =?UTF-8?q?tRowCountAccordingInferredSteps()=EF=BC=8C=E4=BB=A5=E5=8F=8A?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86MapReduceReasonerJobConfig.createNew?= =?UTF-8?q?Job()=E3=80=82=E4=B8=BB=E8=A6=81=E6=98=AF=E4=B8=BA=E4=BA=86?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E9=80=9A=E8=BF=87inferredstep=E6=9D=A5?= =?UTF-8?q?=E7=BB=9F=E8=AE=A1=E6=8E=A8=E7=90=86=E7=9A=84=E7=BB=93=E6=9E=9C?= =?UTF-8?q?=E6=95=B0=E9=87=8F=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../edu/neu/mitt/mrj/io/dbs/CassandraDB.java | 66 ++++++++++++++- .../reasoner/MapReduceReasonerJobConfig.java | 82 +++++++++++++------ .../ReasonedJustificationsReducer.java | 2 - .../mitt/mrj/reasoner/owl/OWLReasoner.java | 62 ++++++++++---- .../mitt/mrj/reasoner/rdfs/RDFSReasoner.java | 4 + .../prejustification/SelectInferRowsMap.java | 2 +- 6 files changed, 175 insertions(+), 43 deletions(-) diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index dbfceca..023aa14 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -287,6 +287,36 @@ public Cassandra.Iface getDBClient(){ return client; } + + /** + * Get the row count according to the COLUMN_INFERRED_STEPS. + * @return row count. + */ + public long getRowCountAccordingInferredSteps(int level){ + //ALLOW FILTERING + String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + " WHERE " + COLUMN_INFERRED_STEPS + " = " + level + " ALLOW FILTERING"; + + long num = 0; + try { + CqlResult cqlresult = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + num = ByteBufferUtil.toLong(cqlresult.getRows().get(0).getColumns().get(0).value); + } catch (InvalidRequestException e) { + e.printStackTrace(); + } catch (UnavailableException e) { + e.printStackTrace(); + } catch (TimedOutException e) { + e.printStackTrace(); + } catch (SchemaDisagreementException e) { + e.printStackTrace(); + } catch (TException e) { + e.printStackTrace(); + } + + return num; + } + + //TriplesUtils.SYNONYMS_TABLE //TriplesUtils.TRANSITIVE_TRIPLE //TriplesUtils.DATA_TRIPLE_SAME_AS @@ -318,6 +348,40 @@ public long getRowCountAccordingTripleType(int tripletype){ return num; } + + /** + * Get the row count according to the triple type. + * @return row count. + */ + public long getRowCountAccordingTripleTypeWithLimitation(int tripletype, int limit){ + //ALLOW FILTERING + String query = ""; + if (limit <= 0) + query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " ALLOW FILTERING"; + else + query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " LIMIT " + limit + " ALLOW FILTERING "; + + long num = 0; + try { + CqlResult cqlresult = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + num = ByteBufferUtil.toLong(cqlresult.getRows().get(0).getColumns().get(0).value); + } catch (InvalidRequestException e) { + e.printStackTrace(); + } catch (UnavailableException e) { + e.printStackTrace(); + } catch (TimedOutException e) { + e.printStackTrace(); + } catch (SchemaDisagreementException e) { + e.printStackTrace(); + } catch (TException e) { + e.printStackTrace(); + } + + return num; + } + /** * Get the row count according to the type of rule. @@ -704,7 +768,7 @@ public static void removeOriginalTriples(){ if (delornot == true) return; delornot = true; - //ִ�в�Ӧ�жϡ� + //ִ�в�Ӧ�жϡ� Builder builder = Cluster.builder(); builder.addContactPoint(DEFAULT_HOST); SocketOptions socketoptions= new SocketOptions().setKeepAlive(true).setReadTimeoutMillis(10 * 10000).setConnectTimeoutMillis(5 * 10000); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java index b0f8c8e..eed7bc4 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java @@ -2,7 +2,7 @@ * Project Name: mrj-0.1 * File Name: MapReduceJobConfig.java * @author Gang Wu - * 2014��12��28�� ����10:44:16 + * 2014��12��28�� ����10:44:16 * * Description: * TODO @@ -31,30 +31,60 @@ public class MapReduceReasonerJobConfig { // Input from CassandraDB.COLUMNFAMILY_JUSTIFICATIONS - private static void configureCassandraInput(Job job, Set filters) { + private static void configureCassandraInput(Job job, Set typeFilters, Set stepFilters) { //Set the input ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); // Should not use 9160 port in cassandra 2.1.2 because new cql3 port is 9042, please refer to conf/cassandra.yaml //ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); - if (filters.size() == 0){ - CqlConfigHelper.setInputCql(job.getConfiguration(), - "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + - " WHERE TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") > ? AND TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") <= ? ALLOW FILTERING"); + if (typeFilters.size() == 0){ + + if (stepFilters.size() == 0) + CqlConfigHelper.setInputCql(job.getConfiguration(), + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " WHERE TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") > ? AND TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") <= ? ALLOW FILTERING"); + else{ + Integer max = java.util.Collections.max(stepFilters); + Integer min = java.util.Collections.min(stepFilters); + + + CqlConfigHelper.setInputCql(job.getConfiguration(), + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " WHERE TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") > ? AND TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") <= ? AND " + + CassandraDB.COLUMN_STEP + " >= " + min + " AND " + + CassandraDB.COLUMN_STEP + " <= " + max + + " ALLOW FILTERING"); + } + } - else if (filters.size() == 1){ + else if (typeFilters.size() == 1){ + if (stepFilters.size() != 0){ // stepFilter is only for handling transitive property + System.err.println("This is not supported!!!"); + return; + } + CqlConfigHelper.setInputCql(job.getConfiguration(), "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + " WHERE TOKEN(" + @@ -68,17 +98,23 @@ else if (filters.size() == 1){ CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_IS_LITERAL + ") <= ? AND " + - CassandraDB.COLUMN_TRIPLE_TYPE + " = " + filters.toArray()[0] + + CassandraDB.COLUMN_TRIPLE_TYPE + " = " + typeFilters.toArray()[0] + " ALLOW FILTERING"); }else{ + if (stepFilters.size() != 0){ // stepFilter is only for handling transitive property + System.err.println("This is not supported!!!"); + return; + } + + // The support of IN clause in cassandra db's SELECT is restricted. // So we have to try to manually cluster the values in the filters. // see http://www.datastax.com/documentation/cql/3.1/cql/cql_reference/select_r.html#reference_ds_d35_v2q_xj__selectIN System.out.println("<<<<<<<>>>>>>>>"); System.out.println("<<<<<<<>>>>>>>>"); - Integer max = java.util.Collections.max(filters); - Integer min = java.util.Collections.min(filters); + Integer max = java.util.Collections.max(typeFilters); + Integer min = java.util.Collections.min(typeFilters); CqlConfigHelper.setInputCql(job.getConfiguration(), @@ -147,12 +183,12 @@ private static void configureCassandraOutput(Job job) { // In each derivation, we may create a set of jobs public static Job createNewJob(Class classJar, String jobName, - Set filters, int numMapTasks, int numReduceTasks, + Set typeFilters, Set stepFilters, int numMapTasks, int numReduceTasks, boolean bConfigCassandraInput, boolean bConfigCassandraOutput) throws IOException { Configuration conf = new Configuration(); conf.setInt("maptasks", numMapTasks); - conf.set("input.filter", filters.toString()); + conf.set("input.filter", typeFilters.toString()); Job job = new Job(conf); job.setJobName(jobName); @@ -160,7 +196,7 @@ public static Job createNewJob(Class classJar, String jobName, job.setNumReduceTasks(numReduceTasks); if (bConfigCassandraInput) - configureCassandraInput(job, filters); + configureCassandraInput(job, typeFilters, stepFilters); if (bConfigCassandraOutput) configureCassandraOutput(job); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java index cec9547..436750b 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java @@ -12,8 +12,6 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; -import com.sun.org.apache.xpath.internal.operations.Bool; - import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; public class ReasonedJustificationsReducer extends Reducer, List>{ diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java index 85cb0d9..6380c3f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java @@ -204,6 +204,7 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter OWLReasoner.class, "OWL reasoner: infer properties inherited statements (not recursive), step " + step, new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), // not supported numMapTasks, numReduceTasks, true, true); job.getConfiguration().setInt("reasoner.step", step); @@ -249,17 +250,23 @@ private long inferTransitivityStatements(String[] args) int level = 0; //modified 2015/5/19 - long beforeInferCount = db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE); + long beforeInferCount = db.getRowCountAccordingTripleTypeWithLimitation(TriplesUtils.TRANSITIVE_TRIPLE, 1); while ((beforeInferCount > 0) && derivedNewStatements && shouldInferTransitivity) { // System.out.println("��ʼ��inferTransitivityStatements��whileѭ����Ѱ�ҡ�"); level++; + Set levels = new HashSet(); + levels.add(new Integer(level-1)); + if (level > 1) + levels.add(new Integer(level-2)); + //Configure input. Take only the directories that are two levels below Job job = MapReduceReasonerJobConfig.createNewJob( OWLReasoner.class, "OWL reasoner: transitivity rule. Level " + level, new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + levels, numMapTasks, numReduceTasks, true, true); job.getConfiguration().setInt("reasoning.baseLevel", step); @@ -272,15 +279,14 @@ private long inferTransitivityStatements(String[] args) job.setReducerClass(OWLTransitivityReducer.class); job.waitForCompletion(true); - - // About duplication, we will modify the checkTransitivity to return transitive triple counts - // and then do subtraction. - - long afterInferCount = db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE); - derivation = afterInferCount - beforeInferCount; - derivedNewStatements = (derivation > 0); - beforeInferCount = afterInferCount; // Update beforeInferCount - //System.out.println(" loop "); + long stepNotFilteredDerivation = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); + + long stepDerivation = 0; + if (stepNotFilteredDerivation > 0) { + stepDerivation = db.getRowCountAccordingInferredSteps(level); + } + derivation += stepDerivation; + derivedNewStatements = stepDerivation > 0; } previousTransitiveDerivation = step; @@ -296,7 +302,7 @@ private long inferSameAsStatements(String[] args) { try { boolean derivedSynonyms = true; int derivationStep = 1; - long previousStepDerived = 0; // Added by WuGang 2015-01-30 +// long previousStepDerived = 0; // Added by WuGang 2015-01-30 while (derivedSynonyms) { if (db.getRowCountAccordingTripleType(TriplesUtils.DATA_TRIPLE_SAME_AS)==0) // We need not to infer on SameAs @@ -308,6 +314,7 @@ private long inferSameAsStatements(String[] args) { OWLReasoner.class, "OWL reasoner: build the synonyms table from same as triples - step " + derivationStep++, filters, // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), // Added by WuGang, 2015-07-12 numMapTasks, numReduceTasks, true, true); @@ -321,10 +328,11 @@ private long inferSameAsStatements(String[] args) { // System.out.println("In FilesOWLReasoner: " + job.getCounters().findCounter("synonyms", "replacements").getValue()); Counter cDerivedSynonyms = job.getCounters().findCounter("synonyms","replacements"); long currentStepDerived = cDerivedSynonyms.getValue(); // Added by WuGang 2015-01-30 - derivedTriples += currentStepDerived; - derivedSynonyms = (currentStepDerived - previousStepDerived) > 0; // Modified by WuGang 2015-01-30 + derivedSynonyms = currentStepDerived > 0; // Added by WuGang 2015-07-12 +// derivedTriples += currentStepDerived; +// derivedSynonyms = (currentStepDerived - previousStepDerived) > 0; // Modified by WuGang 2015-01-30 //derivedSynonyms = currentStepDerived > 0; - previousStepDerived = currentStepDerived; // Added by WuGang 2015-01-30 +// previousStepDerived = currentStepDerived; // Added by WuGang 2015-01-30 } //Filter the table. @@ -344,6 +352,7 @@ private long inferSameAsStatements(String[] args) { OWLReasoner.class, "OWL reasoner: sampling more common resources", new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), // Added by WuGang, 2015-07-12 numMapTasks, numReduceTasks, true, false); // input from cassandra, but output to hdfs job.getConfiguration().setInt("reasoner.samplingPercentage", sampling); //Sampling at 10% @@ -398,6 +407,7 @@ private long inferSameAsStatements(String[] args) { OWLReasoner.class, "OWL reasoner: replace triples using the sameAs synonyms: reconstruct triples", new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), // Added by WuGang, 2015-07-12 numMapTasks, numReduceTasks, false, true); // input from hdfs, but output to cassandra @@ -448,6 +458,7 @@ private long inferEquivalenceStatements(String[] args) throws IOException, Inter OWLReasoner.class, "OWL reasoner: infer equivalence from subclass and subprop. step " + step, filters, + new HashSet(), // Added by WuGang, 20150712 numMapTasks, numReduceTasks, true, true); job.getConfiguration().setInt("maptasks", Math.max(job.getConfiguration().getInt("maptasks", 0) / 10, 1)); @@ -469,6 +480,7 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, boolean derivedNewStatements = true; long totalDerivation = 0; int previousSomeAllValuesDerivation = -1; + boolean firstCycle = true; // Added by Wugang 20150111 //long countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15); // see OWLAllSomeValuesReducer @@ -476,10 +488,12 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, while (derivedNewStatements) { step++; + Job job = MapReduceReasonerJobConfig.createNewJob( OWLReasoner.class, "OWL reasoner: some and all values rule. step " + step, new HashSet(), + new HashSet(), numMapTasks, numReduceTasks, true, true); job.getConfiguration().setInt("reasoner.step", step); @@ -498,7 +512,21 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, // countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer // totalDerivation = countRule15 + countRule16; - derivedNewStatements = (totalDerivation > 0); + + Counter derivedTriples = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS"); + long notFilteredDerivation = derivedTriples.getValue(); + long stepDerivation = 0; + if (firstCycle) + notFilteredDerivation -= previousSomeAllValuesCycleDerivation; + if (notFilteredDerivation > 0) { + previousSomeAllValuesCycleDerivation += notFilteredDerivation; + stepDerivation = db.getRowCountAccordingInferredSteps(step - 1); + totalDerivation += stepDerivation; + derivedNewStatements = stepDerivation > 0; + } else { + derivedNewStatements = false; + } + firstCycle = false; } // Added by Wugang 20150111 @@ -524,6 +552,7 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup OWLReasoner.class, "OWL reasoner: hasValue rule. step " + step, new HashSet(), + new HashSet(), numMapTasks, numReduceTasks, true, true); @@ -547,7 +576,8 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup // countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a) - countRule14a; // see OWLAllSomeValuesReducer // countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b) - countRule14b; // see OWLAllSomeValuesReducer // return(countRule14a + countRule14b); - return 0; + long stepDerivation = db.getRowCountAccordingInferredSteps(step - 1); + return stepDerivation; } else { return 0; } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java index b112445..d0a8940 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java @@ -83,6 +83,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep RDFSReasoner.class, "RDFS subproperty inheritance reasoning", new HashSet(), + new HashSet(), // Added by WuGang, 2015-07-13 numMapTasks, numReduceTasks, true, true); job.setMapperClass(RDFSSubPropInheritMapper.class); @@ -104,6 +105,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep RDFSReasoner.class, "RDFS subproperty domain and range reasoning", new HashSet(), + new HashSet(), // Added by WuGang, 2015-07-13 numMapTasks, numReduceTasks, true, true); job.setMapperClass(RDFSSubPropDomRangeMapper.class); @@ -132,6 +134,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep RDFSReasoner.class, "RDFS subclass reasoning", filters, + new HashSet(), // Added by WuGang, 2015-07-13 numMapTasks, numReduceTasks, true, true); job.setMapperClass(RDFSSubclasMapper.class); @@ -163,6 +166,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep RDFSReasoner.class, "RDFS special properties reasoning", filters, + new HashSet(), // Added by WuGang, 2015-07-13 numMapTasks, numReduceTasks, true, true); job.setMapperClass(RDFSSpecialPropsMapper.class); diff --git a/mrj-0.1/src/prejustification/SelectInferRowsMap.java b/mrj-0.1/src/prejustification/SelectInferRowsMap.java index 201eea0..634a812 100644 --- a/mrj-0.1/src/prejustification/SelectInferRowsMap.java +++ b/mrj-0.1/src/prejustification/SelectInferRowsMap.java @@ -31,7 +31,7 @@ public void map(ByteBuffer key, Row row, Context context) throws IOException, In ResultSet results = session.execute(statement); System.out.println("---------MAP----------"); - Map keys = new HashMap<>(); + Map keys = new HashMap(); ByteBuffer inferredsteps; for (Row rows : results){ if (rows.getInt(CassandraDB.COLUMN_RULE) != 0) { From 314096214013f7aeac1c19930d7522cfae3e58bd Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Thu, 16 Jul 2015 00:01:26 +0800 Subject: [PATCH 06/16] add a new field "transitivelevel" for table justification --- .../edu/neu/mitt/mrj/data/TripleSource.java | 11 ++ .../mrj/importtriples/FilesImportTriples.java | 8 +- ...tTriplesReconstructReducerToCassandra.java | 7 +- .../edu/neu/mitt/mrj/io/dbs/CassandraDB.java | 9 +- .../reasoner/MapReduceReasonerJobConfig.java | 25 +-- .../mrj/reasoner/ReasonedJustifications.java | 16 +- .../ReasonedJustificationsMapper.java | 13 +- .../ReasonedJustificationsReducer.java | 6 +- .../mitt/mrj/reasoner/owl/OWLReasoner.java | 38 ++++- .../reasoner/owl/OWLTransitivityMapper.java | 14 +- .../reasoner/owl/OWLTransitivityReducer.java | 13 +- .../mitt/mrj/reasoner/rdfs/RDFSReasoner.java | 4 + .../src/prejustification/SelectInferRows.java | 143 ------------------ .../prejustification/SelectInferRowsMap.java | 79 ---------- .../SelectInferRowsReduce.java | 18 --- 15 files changed, 104 insertions(+), 300 deletions(-) delete mode 100644 mrj-0.1/src/prejustification/SelectInferRows.java delete mode 100644 mrj-0.1/src/prejustification/SelectInferRowsMap.java delete mode 100644 mrj-0.1/src/prejustification/SelectInferRowsReduce.java diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java index 8d5c320..afbc721 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java @@ -16,17 +16,20 @@ public class TripleSource implements WritableComparable { byte derivation = 0; int step = 0; + int transitive_level = 0; @Override public void readFields(DataInput in) throws IOException { derivation = in.readByte(); step = in.readInt(); + transitive_level = in.readInt(); } @Override public void write(DataOutput out) throws IOException { out.write(derivation); out.writeInt(step); + out.writeInt(transitive_level); } @Override @@ -47,6 +50,14 @@ public void setStep(int step) { this.step = step; } + public int getTransitiveLevel() { + return transitive_level; + } + + public void setTransitiveLevel(int level) { + this.transitive_level = level; + } + public void setDerivation(byte ruleset) { derivation = ruleset; } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java index 7140fbc..b75de2c 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java @@ -101,7 +101,7 @@ public void parseArgs(String[] args) { } public void sampleCommonResources(String[] args) throws Exception { -// System.out.println("��sampleCommonResources�����С�"); +// System.out.println("��sampleCommonResources�����С�"); Job job = createNewJob("Sample common resources"); //Input @@ -127,7 +127,7 @@ public void sampleCommonResources(String[] args) throws Exception { } public void assignIdsToNodes(String[] args) throws Exception { -// System.out.println("��assignIdsToNodes�����С�"); +// System.out.println("��assignIdsToNodes�����С�"); Job job = createNewJob("Deconstruct statements"); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); @@ -156,7 +156,7 @@ public void assignIdsToNodes(String[] args) throws Exception { } private void rewriteTriples(String[] args) throws Exception { -// System.out.println("��rewriteTriples�����С�"); +// System.out.println("��rewriteTriples�����С�"); Job job = createNewJob("Reconstruct statements"); @@ -189,7 +189,7 @@ private void rewriteTriples(String[] args) throws Exception { // is it useful below line? //job.getConfiguration().set(CASSANDRA_PRIMARY_KEY, "(sub, pre, obj)"); String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + - " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; + " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? , " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "=?"; CqlConfigHelper.setOutputCql(job.getConfiguration(), query); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java index 4b7acc3..beff7a7 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java @@ -2,7 +2,7 @@ * Project Name: mrj-0.1 * File Name: ImportTriplesReconstructReducerToCassandra.java * @author Gang Wu - * 2014��10��28�� ����10:35:24 + * 2014��10��28�� ����10:35:24 * * Description: * Send reducer output to Cassandra DB by representing triples with ids @@ -78,7 +78,7 @@ protected void reduce(LongWritable key, Iterable values, Context c } if (counter != 3) { - // Modified by WuGang 2010-12-3, ��������3Ԫ����֣�����Ҫ������ + // Modified by WuGang 2010-12-3, ���?��3Ԫ����֣�����Ҫ������ log.error("Found a non-triple when reconstructing. The count num is " + counter + ", and triple is " + oValue); // throw new IOException("Triple is not reconstructed!"); } @@ -107,7 +107,8 @@ protected void reduce(LongWritable key, Iterable values, Context c // the length of boolean type in cassandra is one byte!!!!!!!! // For column inferred, init it as false i.e. zero // variables.add(ByteBuffer.wrap(new byte[]{zero})); - variables.add(ByteBufferUtil.bytes(0)); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple + variables.add(ByteBufferUtil.bytes(0)); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple + variables.add(ByteBufferUtil.bytes(0)); // Added by WuGang, 2015-07-15, to support transitive level context.write(keys, variables); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index 023aa14..f8e0878 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -93,7 +93,7 @@ public class CassandraDB { public static final String COLUMN_ID = "id"; // mrjks.resources.id public static final String COLUMN_LABEL = "label"; // mrjks.resources.label public static final String COLUMN_JUSTIFICATION = "justification"; //mrjks.results.justification - public static final String COLUMN_STEP = "step"; // mrjks.results.step + public static final String COLUMN_TRANSITIVE_LEVELS = "transitivelevel"; // mrjks.results.step public static final String DEFAULT_HOST = cn.edu.neu.mitt.mrj.utils.Cassandraconf.host; public static final String DEFAULT_PORT = "9160"; // in version 2.1.2, cql3 port is 9042 @@ -197,7 +197,8 @@ private static void setupTables(Cassandra.Iface client) COLUMN_V2 + " bigint, " + COLUMN_V3 + " bigint, " + // COLUMN_TRIPLE_TYPE + " int, " + - COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key + COLUMN_INFERRED_STEPS + " int, " + // from this line, the fields are non-primary key + COLUMN_TRANSITIVE_LEVELS + " int, " + " PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_IS_LITERAL + "), " + COLUMN_TRIPLE_TYPE + ", " + COLUMN_RULE + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + //", " + COLUMN_TRIPLE_TYPE + @@ -501,6 +502,7 @@ public static void writeJustificationToMapReduceContext( // For column inferred, init it as false i.e. zero // variables.add(ByteBuffer.wrap(new byte[]{zero})); variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple + variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); // It corresponds to COLUMN_TRANSITIVE_LEVEL, only useful in owl's transitive context.write(keys, variables); } @@ -787,7 +789,8 @@ public static void removeOriginalTriples(){ COLUMN_V2 + " bigint, " + COLUMN_V3 + " bigint, " + // COLUMN_TRIPLE_TYPE + " int, " + - COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key + COLUMN_INFERRED_STEPS + " int, " + // from this line is non-primary key + COLUMN_TRANSITIVE_LEVELS + " int, " + " PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_IS_LITERAL + "), " + COLUMN_TRIPLE_TYPE + ", " + COLUMN_RULE + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + //", " + COLUMN_TRIPLE_TYPE + diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java index eed7bc4..8814250 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java @@ -31,7 +31,7 @@ public class MapReduceReasonerJobConfig { // Input from CassandraDB.COLUMNFAMILY_JUSTIFICATIONS - private static void configureCassandraInput(Job job, Set typeFilters, Set stepFilters) { + private static void configureCassandraInput(Job job, Set typeFilters, Set transitiveLevelFilters, int certainStep) { //Set the input ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); // Should not use 9160 port in cassandra 2.1.2 because new cql3 port is 9042, please refer to conf/cassandra.yaml @@ -40,7 +40,7 @@ private static void configureCassandraInput(Job job, Set typeFilters, S ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); if (typeFilters.size() == 0){ - if (stepFilters.size() == 0) + if (transitiveLevelFilters.size() == 0) CqlConfigHelper.setInputCql(job.getConfiguration(), "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + " WHERE TOKEN(" + @@ -55,8 +55,8 @@ private static void configureCassandraInput(Job job, Set typeFilters, S CassandraDB.COLUMN_IS_LITERAL + ") <= ? ALLOW FILTERING"); else{ - Integer max = java.util.Collections.max(stepFilters); - Integer min = java.util.Collections.min(stepFilters); + Integer max = java.util.Collections.max(transitiveLevelFilters); + Integer min = java.util.Collections.min(transitiveLevelFilters); CqlConfigHelper.setInputCql(job.getConfiguration(), @@ -72,15 +72,16 @@ private static void configureCassandraInput(Job job, Set typeFilters, S CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_IS_LITERAL + ") <= ? AND " + - CassandraDB.COLUMN_STEP + " >= " + min + " AND " + - CassandraDB.COLUMN_STEP + " <= " + max + + CassandraDB.COLUMN_INFERRED_STEPS + " = " + certainStep + " AND " + + CassandraDB.COLUMN_TRANSITIVE_LEVELS + " >= " + min + " AND " + + CassandraDB.COLUMN_TRANSITIVE_LEVELS + " <= " + max + " ALLOW FILTERING"); } } else if (typeFilters.size() == 1){ - if (stepFilters.size() != 0){ // stepFilter is only for handling transitive property + if (transitiveLevelFilters.size() != 0){ // stepFilter is only for handling transitive property System.err.println("This is not supported!!!"); return; } @@ -101,7 +102,7 @@ else if (typeFilters.size() == 1){ CassandraDB.COLUMN_TRIPLE_TYPE + " = " + typeFilters.toArray()[0] + " ALLOW FILTERING"); }else{ - if (stepFilters.size() != 0){ // stepFilter is only for handling transitive property + if (transitiveLevelFilters.size() != 0){ // stepFilter is only for handling transitive property System.err.println("This is not supported!!!"); return; } @@ -176,14 +177,16 @@ private static void configureCassandraOutput(Job job) { ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + - " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; + " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=?, " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "=?"; CqlConfigHelper.setOutputCql(job.getConfiguration(), query); } // In each derivation, we may create a set of jobs + // certainStep is optional, if it is specified then we can use it to filter transitiveLevel with non-equal operator + // (see cql specification) public static Job createNewJob(Class classJar, String jobName, - Set typeFilters, Set stepFilters, int numMapTasks, int numReduceTasks, + Set typeFilters, Set transitiveLevelFilters, int certainStep, int numMapTasks, int numReduceTasks, boolean bConfigCassandraInput, boolean bConfigCassandraOutput) throws IOException { Configuration conf = new Configuration(); @@ -196,7 +199,7 @@ public static Job createNewJob(Class classJar, String jobName, job.setNumReduceTasks(numReduceTasks); if (bConfigCassandraInput) - configureCassandraInput(job, typeFilters, stepFilters); + configureCassandraInput(job, typeFilters, transitiveLevelFilters, certainStep); if (bConfigCassandraOutput) configureCassandraOutput(job); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java index e27c689..2868182 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java @@ -1,34 +1,22 @@ package cn.edu.neu.mitt.mrj.reasoner; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Map; -import org.apache.cassandra.hadoop.ColumnFamilyInputFormat; import org.apache.cassandra.hadoop.ConfigHelper; import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; import org.apache.cassandra.hadoop.cql3.CqlInputFormat; import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; -import org.apache.cassandra.thrift.SlicePredicate; -import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import prejustification.SelectInferRows; -import prejustification.SelectInferRowsMap; -import prejustification.SelectInferRowsReduce; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.utils.Cassandraconf; public class ReasonedJustifications extends Configured implements Tool{ public int run(String[] args) throws Exception{ @@ -37,7 +25,7 @@ public int run(String[] args) throws Exception{ Job job = new Job(conf); job.setJobName(" Test "); - job.setJarByClass(SelectInferRows.class); + job.setJarByClass(ReasonedJustifications.class); job.setNumReduceTasks(8); ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); @@ -68,7 +56,7 @@ public int run(String[] args) throws Exception{ ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); String query = "UPDATE " + CassandraDB.KEYSPACE + "." + "resultrows" + - " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; + " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=?, " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "=?"; CqlConfigHelper.setOutputCql(job.getConfiguration(), query); job.setMapperClass(ReasonedJustificationsMapper.class); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java index 7e719e1..dcd2230 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java @@ -31,12 +31,13 @@ public void map(Long keys, Row rows, Context context) throws IOException, Interr // ResultSet results = session.execute(statement); Integer inferredsteps; + Integer transitivelevel; // for (Row rows : row){ if (rows.getInt(CassandraDB.COLUMN_RULE) != 0) { String conKey; //***** - conKey = rows.getLong(CassandraDB.COLUMN_SUB) //��ʹ��ByteBufferUtil�� + conKey = rows.getLong(CassandraDB.COLUMN_SUB) //��ʹ��ByteBufferUtil�� + "-" + rows.getLong(CassandraDB.COLUMN_PRE) + "-" + rows.getLong(CassandraDB.COLUMN_OBJ) + "-" + rows.getBool(CassandraDB.COLUMN_IS_LITERAL) @@ -44,10 +45,11 @@ public void map(Long keys, Row rows, Context context) throws IOException, Interr + "-" + rows.getInt(CassandraDB.COLUMN_RULE) + "-" + rows.getLong(CassandraDB.COLUMN_V1) + "-" + rows.getLong(CassandraDB.COLUMN_V2) - + "-" + rows.getLong(CassandraDB.COLUMN_V3); - inferredsteps = rows.getInt(CassandraDB.COLUMN_INFERRED_STEPS); + + "-" + rows.getLong(CassandraDB.COLUMN_V3) + + "-" + rows.getInt(CassandraDB.COLUMN_INFERRED_STEPS); // Modified by WuGang, 2015-07-15 + transitivelevel = rows.getInt(CassandraDB.COLUMN_TRANSITIVE_LEVELS); // Added by WuGang, 2015-07-15 - context.write(new Text(conKey), new IntWritable(inferredsteps)); + context.write(new Text(conKey), new IntWritable(transitivelevel)); } //} @@ -70,7 +72,8 @@ public void setup(Context context) throws IOException, InterruptedException{ CassandraDB.COLUMN_V1 + " bigint, " + CassandraDB.COLUMN_V2 + " bigint, " + CassandraDB. COLUMN_V3 + " bigint, " + - CassandraDB.COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key + CassandraDB.COLUMN_INFERRED_STEPS + " int, " + // from this line, fields are non-primary key + CassandraDB.COLUMN_TRANSITIVE_LEVELS + " int, " + " PRIMARY KEY ((" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_IS_LITERAL + "), " + CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 + " ) ) "; diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java index 436750b..83d36db 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java @@ -34,8 +34,10 @@ public void reduce(Text key, Iterable values, Context context) thro //prepare the insert variables collection List variables = new ArrayList(); - int var = Integer.parseInt(value.toString()); - variables.add(ByteBufferUtil.bytes(var)); + int var_inferredsteps = Integer.parseInt(value.toString()); + variables.add(ByteBufferUtil.bytes(var_inferredsteps)); + int var_transitivelevel = Integer.parseInt(splitkeys[9]); + variables.add(ByteBufferUtil.bytes(var_transitivelevel)); context.write(keys, variables); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java index 6380c3f..df0446c 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java @@ -205,6 +205,7 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter "OWL reasoner: infer properties inherited statements (not recursive), step " + step, new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), new HashSet(), // not supported + step, // not used here numMapTasks, numReduceTasks, true, true); job.getConfiguration().setInt("reasoner.step", step); @@ -262,13 +263,30 @@ private long inferTransitivityStatements(String[] args) levels.add(new Integer(level-2)); //Configure input. Take only the directories that are two levels below - Job job = MapReduceReasonerJobConfig.createNewJob( - OWLReasoner.class, - "OWL reasoner: transitivity rule. Level " + level, - new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), - levels, - numMapTasks, - numReduceTasks, true, true); + Job job = null; + + // for the first two level, we use the whole data in the database + if (level <= 2) + job = MapReduceReasonerJobConfig.createNewJob( + OWLReasoner.class, + "OWL reasoner: transitivity rule. Level " + level, + new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), + 0, + numMapTasks, + numReduceTasks, true, true); + // for the level more than two, we only consider the last two level derived data in the current step + if (level > 2) + job = MapReduceReasonerJobConfig.createNewJob( + OWLReasoner.class, + "OWL reasoner: transitivity rule. Level " + level, + new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + levels, + step, + numMapTasks, + numReduceTasks, true, true); + + job.getConfiguration().setInt("reasoning.baseLevel", step); job.getConfiguration().setInt("reasoning.transitivityLevel", level); job.getConfiguration().setInt("maptasks", Math.max(numMapTasks / 10, 1)); @@ -315,6 +333,7 @@ private long inferSameAsStatements(String[] args) { "OWL reasoner: build the synonyms table from same as triples - step " + derivationStep++, filters, // FileUtils.FILTER_ONLY_HIDDEN.getClass(), new HashSet(), // Added by WuGang, 2015-07-12 + step, // not used here numMapTasks, numReduceTasks, true, true); @@ -353,6 +372,7 @@ private long inferSameAsStatements(String[] args) { "OWL reasoner: sampling more common resources", new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), new HashSet(), // Added by WuGang, 2015-07-12 + step, // not used here numMapTasks, numReduceTasks, true, false); // input from cassandra, but output to hdfs job.getConfiguration().setInt("reasoner.samplingPercentage", sampling); //Sampling at 10% @@ -408,6 +428,7 @@ private long inferSameAsStatements(String[] args) { "OWL reasoner: replace triples using the sameAs synonyms: reconstruct triples", new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), new HashSet(), // Added by WuGang, 2015-07-12 + step, // not used here numMapTasks, numReduceTasks, false, true); // input from hdfs, but output to cassandra @@ -459,6 +480,7 @@ private long inferEquivalenceStatements(String[] args) throws IOException, Inter "OWL reasoner: infer equivalence from subclass and subprop. step " + step, filters, new HashSet(), // Added by WuGang, 20150712 + step, // not used here numMapTasks, numReduceTasks, true, true); job.getConfiguration().setInt("maptasks", Math.max(job.getConfiguration().getInt("maptasks", 0) / 10, 1)); @@ -494,6 +516,7 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, "OWL reasoner: some and all values rule. step " + step, new HashSet(), new HashSet(), + step, // not used here numMapTasks, numReduceTasks, true, true); job.getConfiguration().setInt("reasoner.step", step); @@ -553,6 +576,7 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup "OWL reasoner: hasValue rule. step " + step, new HashSet(), new HashSet(), + step, // not used here numMapTasks, numReduceTasks, true, true); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java index b2b04bd..69035e4 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java @@ -28,7 +28,9 @@ public class OWLTransitivityMapper extends Mapper minLevel) { + if (level > minLevel) { NumberUtils.encodeLong(keys,0,value.getPredicate()); NumberUtils.encodeLong(keys,8,value.getSubject()); oKey.set(keys, 0, 16); @@ -63,14 +65,14 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup values[0] = 3; else values[0] = 2; - NumberUtils.encodeLong(values, 1, step); + NumberUtils.encodeLong(values, 1, level); NumberUtils.encodeLong(values, 9, value.getObject()); oValue.set(values, 0, 17); context.write(oKey, oValue); } - //����u p w, w p v���������key��(p, w),�����value��(value[0], key.getStep(), value.getObject) + //����u p w, w p v���������key��(p, w),�����value��(value[0], key.getStep(), value.getObject) } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java index 7ad71eb..6b4bd7f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java @@ -69,9 +69,9 @@ public void reduce(BytesWritable key, Iterable values, Context co triple.setPredicate(NumberUtils.decodeLong(key.getBytes(),0)); - // Added by WuGang,���extended triple������Ϊu p w,����w��һ���ؼ���resource�������ع�ԭʼ��ruleǰ�� + // Added by WuGang,���extended triple������Ϊu p w,����w��һ���ؼ��resource�������ع�ԭʼ��ruleǰ�� triple.setType(TriplesUtils.OWL_HORST_4); -// triple.setRsubject(rsubject); // �������������������ģ���μ�����Ĵ��� +// triple.setRsubject(rsubject); // �����������������ģ���μ�����Ĵ��� triple.setRpredicate(NumberUtils.decodeLong(key.getBytes(),0)); triple.setRobject(NumberUtils.decodeLong(key.getBytes(), 8)); @@ -87,10 +87,12 @@ public void reduce(BytesWritable key, Iterable values, Context co triple.setSubject(entry.getKey()); triple.setObject(entry2.getKey()); - // Added by Wugang, ���extended triple��ʵ�������rsubject�費���ö�����ν����Ϊ�˱��������������ð� - triple.setRsubject(triple.getSubject()); // ��Ϊ��ѡȡu p w��Ϊ���triple��������е�u�������������ĺ�������� + // Added by Wugang, ���extended triple��ʵ�������rsubject�費���ö�����ν����Ϊ�˱������������ð� + triple.setRsubject(triple.getSubject()); // ��Ϊ��ѡȡu p w��Ϊ���triple��������е�u������������ĺ�������� - source.setStep((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel)); + // Modified by WuGang, 2015-07-15 + //source.setStep((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel)); + source.setTransitiveLevel((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel)); // context.write(source, triple); CassandraDB.writeJustificationToMapReduceContext(triple, source, context); @@ -109,6 +111,7 @@ public void setup(Context context) { level = context.getConfiguration().getInt("reasoning.transitivityLevel", -1); // Modified by WuGang 2015-01-28 //source.setDerivation(TripleSource.OWL_DERIVED); + source.setStep(baseLevel + 1); // Added by WuGang, 2015-07-15 source.setDerivation(TripleSource.TRANSITIVE_ENABLED); triple.setObjectLiteral(false); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java index d0a8940..4c7827f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java @@ -84,6 +84,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep "RDFS subproperty inheritance reasoning", new HashSet(), new HashSet(), // Added by WuGang, 2015-07-13 + step, // not used here numMapTasks, numReduceTasks, true, true); job.setMapperClass(RDFSSubPropInheritMapper.class); @@ -106,6 +107,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep "RDFS subproperty domain and range reasoning", new HashSet(), new HashSet(), // Added by WuGang, 2015-07-13 + step, // not used here numMapTasks, numReduceTasks, true, true); job.setMapperClass(RDFSSubPropDomRangeMapper.class); @@ -135,6 +137,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep "RDFS subclass reasoning", filters, new HashSet(), // Added by WuGang, 2015-07-13 + step, // not used here numMapTasks, numReduceTasks, true, true); job.setMapperClass(RDFSSubclasMapper.class); @@ -167,6 +170,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep "RDFS special properties reasoning", filters, new HashSet(), // Added by WuGang, 2015-07-13 + step, // not used here numMapTasks, numReduceTasks, true, true); job.setMapperClass(RDFSSpecialPropsMapper.class); diff --git a/mrj-0.1/src/prejustification/SelectInferRows.java b/mrj-0.1/src/prejustification/SelectInferRows.java deleted file mode 100644 index 2ecbf7a..0000000 --- a/mrj-0.1/src/prejustification/SelectInferRows.java +++ /dev/null @@ -1,143 +0,0 @@ -package prejustification; - -import java.nio.ByteBuffer; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import org.apache.cassandra.hadoop.ColumnFamilyInputFormat; -import org.apache.cassandra.hadoop.ConfigHelper; -import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; -import org.apache.cassandra.hadoop.cql3.CqlInputFormat; -import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; -import org.apache.cassandra.thrift.SlicePredicate; -import org.apache.cassandra.utils.ByteBufferUtil; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; - -import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.reasoner.MapReduceReasonerJobConfig; - - -public class SelectInferRows extends Configured implements Tool{ - //private static final Logger logger = LoggerFactory.getLogger(); - public static void main(String[] args) throws Exception{ - int exitCode = ToolRunner.run(new Configuration(), new SelectInferRows(), args); - System.exit(exitCode); - } - - public int run(String[] args) throws Exception{ - - //Job job = new Job(getConf()); -// Job job = MapReduceReasonerJobConfig.createNewJob(SelectInferRows.class, "Select Rows", new HashSet(), 16, 16, true, true); - -// ConfigHelper.setInputInitialAddress(getConf(), CassandraDB.DEFAULT_HOST); -// ConfigHelper.setInputColumnFamily(getConf(), CassandraDB.KEYSPACE, CassandraDB.COLUMN_JUSTIFICATION); - - // job.setJobName("Del Rows"); -// job.setJarByClass(SelectInferRows.class); - - /* - * Select(map) - */ - - - Configuration conf = new Configuration(); - - Job job = new Job(conf); - job.setJobName(" Test "); - job.setJarByClass(SelectInferRows.class); - job.setNumReduceTasks(8); - - ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); - ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); - ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); - CqlConfigHelper.setInputCql(job.getConfiguration(), - "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + - " WHERE TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") > ? AND TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") <= ? ALLOW FILTERING"); - CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); - //Modifide by LiYang - ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); - job.setInputFormatClass(CqlInputFormat.class); - job.setOutputKeyClass(Map.class); - job.setOutputValueClass(List.class); - job.setOutputFormatClass(CqlOutputFormat.class); - ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); - ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); - - ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); - String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + - " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; - CqlConfigHelper.setOutputCql(job.getConfiguration(), query); - - job.setMapperClass(SelectInferRowsMap.class); - job.setMapOutputKeyClass(Text.class); - job.setMapOutputValueClass(ByteBuffer.class); - job.setReducerClass(SelectInferRowsReduce.class); - - -// job.setInputFormatClass(ColumnFamilyInputFormat.class); -// ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); -// ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); -// ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); -// CqlConfigHelper.setInputCql(job.getConfiguration(), -// "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + -// " WHERE RULE = 0"); -// CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); -// ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); -// job.setInputFormatClass(CqlInputFormat.class); -// -// -// /* -// * Insert(reduce) -// */ -//// job.setCombinerClass(SelectInferRowsReduce.class); -// job.setOutputKeyClass(Map.class); -// job.setOutputValueClass(List.class); -// //�൱�� ָ�����Ŀ¼ Ҫд�� �������ʾ�Ҳ������Ŀ¼ -// job.setOutputFormatClass(CqlOutputFormat.class); -// -// ConfigHelper.setOutputInitialAddress(job.getConfiguration(), CassandraDB.DEFAULT_HOST); -// ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner"); -// ConfigHelper.setOutputColumnFamily(getConf(), CassandraDB.KEYSPACE, "ruleiszero"); -// ConfigHelper.setOutputKeyspace(job.getConfiguration(), CassandraDB.KEYSPACE);// ** -//// String query = "INSERT INTO mrjks.ruleiszero (" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_IS_LITERAL + ", " + -//// CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 +", " -//// + CassandraDB.COLUMN_INFERRED_STEPS + ") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, )"; -// String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + -// " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; -// CqlConfigHelper.setOutputCql(job.getConfiguration(), query); -// -// -// ConfigHelper.getInputSplitSize(job.getConfiguration()); -// CqlConfigHelper.getInputPageRowSize(job.getConfiguration()); -//// String column_names = CassandraDB.COLUMN_SUB + CassandraDB.COLUMN_PRE + CassandraDB.COLUMN_OBJ + CassandraDB.COLUMN_IS_LITERAL + -//// CassandraDB.COLUMN_TRIPLE_TYPE + CassandraDB.COLUMN_RULE + CassandraDB.COLUMN_V1 + CassandraDB.COLUMN_V2 + CassandraDB.COLUMN_V3; -//// SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBufferUtil.bytes(column_names))); -// -// //��������䣬����������map �� reduce - job.waitForCompletion(true); - - System.out.println("Finished"); - return 0; - - } - -} diff --git a/mrj-0.1/src/prejustification/SelectInferRowsMap.java b/mrj-0.1/src/prejustification/SelectInferRowsMap.java deleted file mode 100644 index 634a812..0000000 --- a/mrj-0.1/src/prejustification/SelectInferRowsMap.java +++ /dev/null @@ -1,79 +0,0 @@ -package prejustification; -import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.HashMap; -import java.util.Map; - -import org.apache.cassandra.utils.ByteBufferUtil; -import org.apache.hadoop.mapreduce.Mapper; - - -import com.datastax.driver.core.Cluster; -import com.datastax.driver.core.Metadata; -import com.datastax.driver.core.ResultSet; - -import com.datastax.driver.core.Row; -import com.datastax.driver.core.Session; -import com.datastax.driver.core.SimpleStatement; - - - -public class SelectInferRowsMap extends Mapper, ByteBuffer> { - private Cluster cluster; - private Session session; - - - public void map(ByteBuffer key, Row row, Context context) throws IOException, InterruptedException{ - SimpleStatement statement = new SimpleStatement("SELECT * FROM mrjks.justifications"); - statement.setFetchSize(100); - ResultSet results = session.execute(statement); - - System.out.println("---------MAP----------"); - Map keys = new HashMap(); - ByteBuffer inferredsteps; - for (Row rows : results){ - if (rows.getInt(CassandraDB.COLUMN_RULE) != 0) { - keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_SUB))); - keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_PRE))); - keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_SUB))); - keys.put(CassandraDB.COLUMN_IS_LITERAL, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_IS_LITERAL))); - keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_TRIPLE_TYPE))); - keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_RULE))); - keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_V1))); - keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_V2))); - keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_V3))); - inferredsteps = ByteBufferUtil.bytes(rows.getInt(CassandraDB.COLUMN_INFERRED_STEPS)); - context.write(keys, inferredsteps); - } - } - } - - public void setup(Context context) throws IOException, InterruptedException{ - - cluster = Cluster.builder().addContactPoint(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host).build(); - Metadata metadata = cluster.getMetadata(); - System.out.printf("-------Connected to cluster: %s\n", metadata.getClusterName()); - session = cluster.connect(); - - String cquery1 = "CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + "." + "ruleiszero" + - " ( " + - CassandraDB.COLUMN_SUB + " bigint, " + // partition key - CassandraDB.COLUMN_PRE + " bigint, " + // partition key - CassandraDB.COLUMN_OBJ + " bigint, " + // partition key - CassandraDB.COLUMN_IS_LITERAL + " boolean, " + // partition key - CassandraDB.COLUMN_TRIPLE_TYPE + " int, " + - CassandraDB.COLUMN_RULE + " int, " + - CassandraDB.COLUMN_V1 + " bigint, " + - CassandraDB.COLUMN_V2 + " bigint, " + - CassandraDB. COLUMN_V3 + " bigint, " + - CassandraDB.COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key - " PRIMARY KEY ((" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_IS_LITERAL + "), " + - CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 + - " ) ) "; - session.execute(cquery1); - } - - -} diff --git a/mrj-0.1/src/prejustification/SelectInferRowsReduce.java b/mrj-0.1/src/prejustification/SelectInferRowsReduce.java deleted file mode 100644 index adcfa19..0000000 --- a/mrj-0.1/src/prejustification/SelectInferRowsReduce.java +++ /dev/null @@ -1,18 +0,0 @@ -package prejustification; - -import org.apache.hadoop.mapreduce.Reducer; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Map; -public class SelectInferRowsReduce extends Reducer, ByteBuffer, Map, ByteBuffer> { - public void reduce(Map key, Iterable value, Context context) throws IOException, InterruptedException{ - - for (ByteBuffer inferredsteps : value) { - System.out.println(key); - context.write(key, inferredsteps); - } - - } - -} From 4061c834a8322ad0001f445926cb0b7da93f512e Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Thu, 16 Jul 2015 00:09:37 +0800 Subject: [PATCH 07/16] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86inferredsteps?= =?UTF-8?q?=E5=92=8Ctransitivelevel=E7=B4=A2=E5=BC=95=E7=9A=84=E5=88=9B?= =?UTF-8?q?=E5=BB=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index f8e0878..9cdde30 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -748,6 +748,15 @@ public void createIndexOnRule() throws InvalidRequestException, UnavailableExcep client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); } + public void createIndexOnInferredSteps() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_INFERRED_STEPS + ")"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } + + public void createIndexOnTransitiveLevel() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_TRANSITIVE_LEVELS + ")"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } // Added by WuGang 2015-06-08 @@ -909,6 +918,8 @@ public static void main(String[] args) { db.init(); db.createIndexOnTripleType(); db.createIndexOnRule(); + db.createIndexOnInferredSteps(); + db.createIndexOnTransitiveLevel(); // db.insertResources(100, "Hello World!"); Set schemaTriples = new HashSet(); Set filters = new HashSet(); From 0ecd6b82e4e94b8b6a93d022f29c61dcf788d084 Mon Sep 17 00:00:00 2001 From: Joe Date: Thu, 10 Dec 2015 16:16:59 +0800 Subject: [PATCH 08/16] =?UTF-8?q?Revert=20"=E9=8F=87=E5=AD=98=E6=9F=8A?= =?UTF-8?q?=E9=8D=9A=E5=BA=AF=E7=B4=9D=E9=8E=B5=D1=86=3F=E5=AD=ABWLHorstJu?= =?UTF-8?q?stification=E9=90=A8=E5=8B=AC=E6=A4=82=E9=8D=8A=E6=AC=8F?= =?UTF-8?q?=E6=B9=AA=E9=8F=88=E2=82=AC=E9=8D=9A=E5=BA=A1=E5=A7=9E=E6=B6=93?= =?UTF-8?q?=E2=82=AC=E6=B6=93=3F=E9=8D=99=E5=82=9B=E6=9A=9F--clearoriginal?= =?UTF-8?q?s=E9=94=9B=E5=B1=BD=E7=B0=B2=E7=92=87=E3=83=A5=E5=BD=B2?= =?UTF-8?q?=E6=B5=A0=E3=83=A5=E5=9E=B9=E9=97=84=E3=82=89=E5=81=85=E6=B5=9C?= =?UTF-8?q?=E6=B6=97=E7=AC=89=E9=90=A2=E3=84=A7=E6=AE=91=E6=B6=93=E5=A4=8A?= =?UTF-8?q?=E5=8E=93=E7=BC=81=E5=8B=A9=E2=82=AC=3F"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit bfa4af43004d2c23733455b3ff8d306cb13ca68d. --- mrj-0.1/.classpath | 4 +- .../org.eclipse.core.resources.prefs | 1 + .../mrj/importtriples/FilesImportTriples.java | 17 ++++-- ...tTriplesReconstructReducerToCassandra.java | 37 ++++++++----- .../edu/neu/mitt/mrj/io/dbs/CassandraDB.java | 54 ++++++++++++++++--- .../reasoner/MapReduceReasonerJobConfig.java | 4 +- .../mitt/mrj/reasoner/rdfs/RDFSReasoner.java | 13 +++-- .../reasoner/rdfs/RDFSSpecialPropsMapper.java | 11 ++++ .../rdfs/RDFSSpecialPropsReducer.java | 8 +-- .../rdfs/RDFSSubPropDomRangeMapper.java | 29 ++++++++-- .../rdfs/RDFSSubPropInheritMapper.java | 19 +++++++ .../mrj/reasoner/rdfs/RDFSSubclasMapper.java | 23 +++++++- .../rdfs/RDFSSubpropDomRangeReducer.java | 25 ++++++--- 13 files changed, 201 insertions(+), 44 deletions(-) diff --git a/mrj-0.1/.classpath b/mrj-0.1/.classpath index e1c1f9a..229a9be 100644 --- a/mrj-0.1/.classpath +++ b/mrj-0.1/.classpath @@ -5,7 +5,7 @@ - - + + diff --git a/mrj-0.1/.settings/org.eclipse.core.resources.prefs b/mrj-0.1/.settings/org.eclipse.core.resources.prefs index 2dcd9cf..83eb0de 100644 --- a/mrj-0.1/.settings/org.eclipse.core.resources.prefs +++ b/mrj-0.1/.settings/org.eclipse.core.resources.prefs @@ -1,2 +1,3 @@ eclipse.preferences.version=1 +encoding//src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java=UTF-8 encoding//src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java=UTF-8 diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java index b75de2c..2704d86 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java @@ -225,11 +225,18 @@ public static void main(String[] args) throws Exception { // log.info("Import time: " + (System.currentTimeMillis() - time)); //Modified by LiYang 2015/4/10 - CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); - db.init(); - db.createIndexOnTripleType(); - db.createIndexOnRule(); - db.CassandraDBClose(); +// CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); +// db.init(); +// db.createIndexOnTripleType(); +// //db.createIndexOnRule(); +// +// /* +// * Add by LiYang +// * 2015.7.19 +// */ +// //db.createIndexOnInferredSteps(); +// //db.createIndexOnTransitiveLevel(); +// db.CassandraDBClose(); System.out.println("Import time: " + (System.currentTimeMillis() - time)); System.exit(res); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java index beff7a7..8b4f8f8 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java @@ -2,7 +2,7 @@ * Project Name: mrj-0.1 * File Name: ImportTriplesReconstructReducerToCassandra.java * @author Gang Wu - * 2014��10��28�� ����10:35:24 + * 2014锟斤拷10锟斤拷28锟斤拷 锟斤拷锟斤拷10:35:24 * * Description: * Send reducer output to Cassandra DB by representing triples with ids @@ -78,7 +78,7 @@ protected void reduce(LongWritable key, Iterable values, Context c } if (counter != 3) { - // Modified by WuGang 2010-12-3, ���?��3Ԫ����֣�����Ҫ������ + // Modified by WuGang 2010-12-3, 锟斤拷锟�?锟斤拷3元锟斤拷锟斤拷郑锟斤拷锟斤拷锟揭拷锟斤拷锟斤拷锟� log.error("Found a non-triple when reconstructing. The count num is " + counter + ", and triple is " + oValue); // throw new IOException("Triple is not reconstructed!"); } @@ -89,17 +89,30 @@ protected void reduce(LongWritable key, Iterable values, Context c byte one = 1; byte zero = 0; + /* + * Modified WHY??? + */ // Prepare composite key (sub, pre, obj) - keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(oValue.getSubject())); - keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(oValue.getPredicate())); - keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(oValue.getObject())); +// keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(oValue.getSubject())); +// keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(oValue.getPredicate())); +// keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(oValue.getObject())); +// // the length of boolean type in cassandra is one byte!!!!!!!! +// keys.put(CassandraDB.COLUMN_IS_LITERAL, oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); +// keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); +// keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes(0)); // for original triple set 0 int +// keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(0L)); // for original triple set 0 long +// keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(0L)); // for original triple set 0 long +// keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(0L)); // for original triple set 0 long + keys.put("sub", ByteBufferUtil.bytes(oValue.getSubject())); + keys.put("pre", ByteBufferUtil.bytes(oValue.getPredicate())); + keys.put("obj", ByteBufferUtil.bytes(oValue.getObject())); // the length of boolean type in cassandra is one byte!!!!!!!! - keys.put(CassandraDB.COLUMN_IS_LITERAL, oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); - keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); - keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes(0)); // for original triple set 0 int - keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(0L)); // for original triple set 0 long - keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(0L)); // for original triple set 0 long - keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(0L)); // for original triple set 0 long + keys.put("isliteral", oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + keys.put("tripletype", ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); + keys.put("rule", ByteBufferUtil.bytes(0)); // for original triple set 0 int + keys.put("v1", ByteBufferUtil.bytes(0L)); // for original triple set 0 long + keys.put("v2", ByteBufferUtil.bytes(0L)); // for original triple set 0 long + keys.put("v3", ByteBufferUtil.bytes(0L)); // for original triple set 0 long // Prepare variables, here is a boolean value for CassandraDB.COLUMN_IS_LITERAL List variables = new ArrayList(); @@ -108,7 +121,7 @@ protected void reduce(LongWritable key, Iterable values, Context c // For column inferred, init it as false i.e. zero // variables.add(ByteBuffer.wrap(new byte[]{zero})); variables.add(ByteBufferUtil.bytes(0)); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple - variables.add(ByteBufferUtil.bytes(0)); // Added by WuGang, 2015-07-15, to support transitive level + variables.add(ByteBufferUtil.bytes(0)); // Added by WuGang, 2015-07-15, to support transitive level context.write(keys, variables); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index 9cdde30..298b5a3 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -49,6 +49,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import ch.qos.logback.classic.db.DBAppender; import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; @@ -731,7 +732,7 @@ public Map> loadMapIntoMemory(Set filters, boole } } - logger.debug("Time for CassandraDB's loadMapIntoMemory " + (System.currentTimeMillis() - startTime)); + logger.info("Time for CassandraDB's loadMapIntoMemory " + (System.currentTimeMillis() - startTime)); return schemaTriples; } @@ -757,8 +758,49 @@ public void createIndexOnTransitiveLevel() throws InvalidRequestException, Unava String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_TRANSITIVE_LEVELS + ")"; client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); } + + public void Index() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + + this.createIndexOnInferredSteps(); + this.createIndexOnRule(); + this.createIndexOnTransitiveLevel(); + this.createIndexOnTripleType(); + + } + + /* + * Add by L + * Drop index + */ + public void DropTripleTypeIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "DROP INDEX mrjks.justifications_tripletype_idx"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } + + public void DropRuleIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "DROP INDEX mrjks.justifications_rule_idx"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } + + public void DropInferredStepsIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "DROP INDEX mrjks.justifications_inferredSteps_idx"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } + + public void DropTransitiveLevelIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "DROP INDEX mrjks.justifications_transitiveLevel_idx"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } + + public void UnIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + + this.DropInferredStepsIndex(); + this.DropRuleIndex(); + this.DropTransitiveLevelIndex(); + this.DropTripleTypeIndex(); + } + // Added by WuGang 2015-06-08 - public static ResultSet getRows(){ Builder builder = Cluster.builder(); @@ -916,10 +958,10 @@ public static void main(String[] args) { try { CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); db.init(); - db.createIndexOnTripleType(); - db.createIndexOnRule(); - db.createIndexOnInferredSteps(); - db.createIndexOnTransitiveLevel(); +// db.createIndexOnTripleType(); +// db.createIndexOnRule(); +// db.createIndexOnInferredSteps(); +// db.createIndexOnTransitiveLevel(); // db.insertResources(100, "Hello World!"); Set schemaTriples = new HashSet(); Set filters = new HashSet(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java index 8814250..01b499a 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java @@ -192,12 +192,14 @@ public static Job createNewJob(Class classJar, String jobName, Configuration conf = new Configuration(); conf.setInt("maptasks", numMapTasks); conf.set("input.filter", typeFilters.toString()); - + Job job = new Job(conf); job.setJobName(jobName); job.setJarByClass(classJar); job.setNumReduceTasks(numReduceTasks); + job.setNumReduceTasks(16); + if (bConfigCassandraInput) configureCassandraInput(job, typeFilters, transitiveLevelFilters, certainStep); if (bConfigCassandraOutput) diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java index 4c7827f..510be7d 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java @@ -5,6 +5,10 @@ import java.util.HashSet; import java.util.Set; +import org.apache.cassandra.thrift.InvalidRequestException; +import org.apache.cassandra.thrift.SchemaDisagreementException; +import org.apache.cassandra.thrift.TimedOutException; +import org.apache.cassandra.thrift.UnavailableException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.BytesWritable; @@ -13,9 +17,12 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.apache.thrift.TException; +import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.reasoner.MapReduceReasonerJobConfig; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; @@ -69,14 +76,13 @@ public static void main(String[] args) { // The derivation will be launched in run() - public long launchDerivation(String[] args) throws IOException, InterruptedException, ClassNotFoundException { + public long launchDerivation(String[] args) throws IOException, InterruptedException, ClassNotFoundException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { long time = System.currentTimeMillis(); - parseArgs(args); Job job = null; long derivation = 0; - + // RDFS subproperty inheritance reasoning // job = createNewJob("RDFS subproperty inheritance reasoning", "FILTER_ONLY_HIDDEN"); job = MapReduceReasonerJobConfig.createNewJob( @@ -87,6 +93,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep step, // not used here numMapTasks, numReduceTasks, true, true); + job.setMapperClass(RDFSSubPropInheritMapper.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java index f915446..5439b49 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java @@ -91,6 +91,7 @@ public void setup(Context context) throws IOException { try{ CassandraDB db = new CassandraDB(); + db.Index(); if (memberProperties == null) { memberProperties = new HashSet(); Set filters = new HashSet(); @@ -128,4 +129,14 @@ public void setup(Context context) throws IOException { e.printStackTrace(); } } + + protected void cleanup(Context context) throws IOException, InterruptedException{ + try { + CassandraDB db = new CassandraDB(); + db.UnIndex(); + db.CassandraDBClose(); + } catch (Exception e) { + // TODO: handle exception + } + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java index fc5ea85..2cbd51f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java @@ -6,6 +6,7 @@ import java.util.List; import java.util.Map; +import org.apache.cassandra.thrift.Cassandra; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; @@ -88,8 +89,8 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); //context.write(source, oTriple); break; - case 4: // û�ж�Ӧ��rdfs rule�� - case 5: // û�ж�Ӧ��rdfs rule�� + case 4: // û�ж�Ӧ��rdfs rule�� + case 5: // û�ж�Ӧ��rdfs rule�� oTriple.setSubject(NumberUtils.decodeLong(bKey, 1)); oTriple.setPredicate(TriplesUtils.RDFS_MEMBER); // oTriple.setPredicate(NumberUtils.decodeLong(bKey, 9)); @@ -108,7 +109,8 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) @Override public void setup(Context context) { - CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java index 9821e66..78e1fa4 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java @@ -11,6 +11,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Mapper.Context; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -67,8 +68,8 @@ public void map(Long key, Row row, Context context) throws IOException, Interru NumberUtils.encodeLong(bKey,8,value.getObject()); // Added by WuGang, 2010-08-26 // oKey.set(value.getSubject()); oKey.set(bKey, 0, 16); // Modified by WuGang, 2010-08-26 - oValue.set(value.getPredicate() << 1); // ����ͨ��oValue�����һλ��0��ȷ������ǰ��������domain - context.write(oKey, oValue); // ��<, p>����ȥ, for rule 2 + oValue.set(value.getPredicate() << 1); // ����ͨ��oValue�����һλ��0��ȷ������ǰ�������domain + context.write(oKey, oValue); // ��<, p>����ȥ, for rule 2 } //Check if the predicate has a range @@ -78,8 +79,8 @@ public void map(Long key, Row row, Context context) throws IOException, Interru NumberUtils.encodeLong(bKey,8,value.getSubject()); // Added by WuGang, 2010-08-26 // oKey.set(value.getObject()); oKey.set(bKey, 0, 16); // Modified by WuGang, 2010-08-26 - oValue.set((value.getPredicate() << 1) | 1); // ����ͨ��oValue�����һλ��1��ȷ������ǰ��������range - context.write(oKey, oValue); // ��<, p>����ȥ, for rule 3 + oValue.set((value.getPredicate() << 1) | 1); // ����ͨ��oValue�����һλ��1��ȷ������ǰ�������range + context.write(oKey, oValue); // ��<, p>����ȥ, for rule 3 } } @@ -120,6 +121,14 @@ protected void setup(Context context) throws IOException { e.printStackTrace(); } + try { + CassandraDB db = new CassandraDB(); + db.Index(); + db.CassandraDBClose(); + } catch (Exception e) { + // TODO: handle exception + } + // Some debug codes // System.out.println("In mapper setup, peviousExecutionStep= " + previousExecutionStep + " and hasSchemaChanged status: " + hasSchemaChanged); // System.out.println("Input split: " + context.getInputSplit()); @@ -130,4 +139,16 @@ protected void setup(Context context) throws IOException { // } } + + + protected void cleanup(Context context) throws IOException, InterruptedException{ + try { + CassandraDB db = new CassandraDB(); + db.UnIndex(); + db.CassandraDBClose(); + } catch (Exception e) { + // TODO: handle exception + } + } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java index 8347faf..0f03564 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java @@ -7,6 +7,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Mapper.Context; import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -92,5 +93,23 @@ protected void setup(Context context) throws IOException { } else { log.debug("Subprop schema triples already loaded in memory"); } + + try { + CassandraDB db = new CassandraDB(); + db.Index(); + db.CassandraDBClose(); + } catch (Exception e) { + // TODO: handle exception + } + } + + protected void cleanup(Context context) throws IOException, InterruptedException{ + try { + CassandraDB db = new CassandraDB(); + db.UnIndex(); + db.CassandraDBClose(); + } catch (Exception e) { + // TODO: handle exception + } } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java index 67ffb1f..1274ea5 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java @@ -5,6 +5,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Mapper.Context; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,6 +37,26 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup oKey.set(bKey, 0, 9); context.write(oKey, oValue); -// System.out.println("׼����RDFSSubclasMapper-"+value); +// System.out.println("׼����RDFSSubclasMapper-"+value); } + + protected void setup(Context context) throws IOException, InterruptedException{ + try { + CassandraDB db = new CassandraDB(); + db.Index(); + db.CassandraDBClose(); + } catch (Exception e) { + // TODO: handle exception + } + } + protected void cleanup(Context context) throws IOException, InterruptedException{ + try { + CassandraDB db = new CassandraDB(); + db.UnIndex(); + db.CassandraDBClose(); + } catch (Exception e) { + // TODO: handle exception + } + } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java index d774a6d..18fa915 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java @@ -46,17 +46,23 @@ public class RDFSSubpropDomRangeReducer extends Reducer values, Context context) throws IOException, InterruptedException { byte[] bKey = key.getBytes(); // Added by Wugang, 2010-08-26 -// long uri = key.get(); //��domain���ԣ���s����range���ԣ���o - long uri = NumberUtils.decodeLong(bKey, 0); //��domain������s����range������o - long uri_opposite = NumberUtils.decodeLong(bKey, 8); //��domain������o����range������s - - derivedProps.clear(); //���x +// long uri = key.get(); //��domain���ԣ���s����range���ԣ���o + long uri = NumberUtils.decodeLong(bKey, 0); //��domain������s����range������o + long uri_opposite = NumberUtils.decodeLong(bKey, 8); //��domain������o����range������s + + derivedProps.clear(); //���x + + //Logger logger = LoggerFactory.getLogger(CassandraDB.class); + //long time = System.currentTimeMillis(); //Get the predicates with a range or domain associated to this URIs propURIs.clear(); Iterator itr = values.iterator(); while (itr.hasNext()) - propURIs.add(itr.next().get()); //���p + propURIs.add(itr.next().get()); //���p + + + //logger.info("add " + (System.currentTimeMillis() - time)); Iterator itrProp = propURIs.iterator(); while (itrProp.hasNext()) { @@ -78,6 +84,8 @@ public void reduce(BytesWritable key, Iterable values, Context con } } + //logger.info("loop " + (System.currentTimeMillis() - time)); + //Derive the new statements // Iterator itr2 = derivedProps.iterator(); Iterator> itr2 = derivedProps.iterator(); // Modified by WuGang, 2010-08-26 @@ -90,7 +98,7 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setObject(entry.getKey()); // Modified by WuGang, 2010-08-26 // Added by WuGang, 2010-08-26 long propURI = entry.getValue(); - oTriple.setRpredicate(propURI >> 1); // Modified by WuGang 2010-12-03����RDFSSubPropDomRangeMapper�������ˣ����ڱ��������ƻ��� + oTriple.setRpredicate(propURI >> 1); // Modified by WuGang 2010-12-03����RDFSSubPropDomRangeMapper�������ˣ����ڱ��������ƻ��� if ((propURI & 0x1) == 1) { // Rule 3, for range oTriple.setType(TriplesUtils.RDFS_3); oTriple.setRsubject(uri_opposite); @@ -104,7 +112,10 @@ public void reduce(BytesWritable key, Iterable values, Context con CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); //context.write(source, oTriple); } + //logger.info(" " + (System.currentTimeMillis() - time)); + context.getCounter("RDFS derived triples", "subprop range and domain rule").increment(derivedProps.size()); + //logger.info("finish " + (System.currentTimeMillis() - time)); } @Override From 8dcecf018adf5be6411236816a8dc6a9672eee09 Mon Sep 17 00:00:00 2001 From: Joe Date: Thu, 10 Dec 2015 16:18:54 +0800 Subject: [PATCH 09/16] Signed-off-by: Joe --- mrj-0.1/.classpath | 3 +- .../mrj/importtriples/FilesImportTriples.java | 12 +- ...tTriplesReconstructReducerToCassandra.java | 39 ++- .../ImportTriplesSampleMapper.java | 2 +- .../edu/neu/mitt/mrj/io/dbs/CassandraDB.java | 254 ++++++++++++++---- .../neu/mitt/mrj/io/dbs/MrjMultioutput.java | 86 ++++++ .../justification/OWLHorstJustification.java | 4 +- .../reasoner/MapReduceReasonerJobConfig.java | 188 +++++++++---- .../mrj/reasoner/ReasonedJustifications.java | 8 +- .../reasoner/owl/OWLAllSomeValuesMapper.java | 40 +-- .../reasoner/owl/OWLAllSomeValuesReducer.java | 18 +- .../owl/OWLEquivalenceSCSPMapper.java | 2 +- .../owl/OWLEquivalenceSCSPReducer.java | 14 +- .../mrj/reasoner/owl/OWLHasValueMapper.java | 6 +- .../mrj/reasoner/owl/OWLHasValueReducer.java | 6 +- .../reasoner/owl/OWLNotRecursiveMapper.java | 2 +- .../reasoner/owl/OWLNotRecursiveReducer.java | 16 +- .../mitt/mrj/reasoner/owl/OWLReasoner.java | 119 +++++++- .../owl/OWLSameAsDeconstructMapper.java | 8 +- .../owl/OWLSameAsDeconstructReducer.java | 23 +- .../mrj/reasoner/owl/OWLSameAsMapper.java | 11 +- .../owl/OWLSameAsReconstructMapper.java | 18 +- .../owl/OWLSameAsReconstructReducer.java | 33 +-- .../mrj/reasoner/owl/OWLSameAsReducer.java | 6 +- .../owl/OWLSampleResourcesMapper.java | 1 + .../owl/OWLSampleResourcesReducer.java | 4 + .../reasoner/owl/OWLTransitivityMapper.java | 1 + .../reasoner/owl/OWLTransitivityReducer.java | 3 +- .../mitt/mrj/reasoner/rdfs/RDFSReasoner.java | 102 ++++++- .../reasoner/rdfs/RDFSSpecialPropsMapper.java | 23 +- .../rdfs/RDFSSpecialPropsReducer.java | 10 +- .../rdfs/RDFSSubPropDomRangeMapper.java | 50 ++-- .../rdfs/RDFSSubPropInheritMapper.java | 37 ++- .../mrj/reasoner/rdfs/RDFSSubclasMapper.java | 26 +- .../mrj/reasoner/rdfs/RDFSSubclasReducer.java | 21 +- .../rdfs/RDFSSubpropDomRangeReducer.java | 187 ++++++++----- .../rdfs/RDFSSubpropInheritReducer.java | 22 +- 37 files changed, 972 insertions(+), 433 deletions(-) create mode 100644 mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java diff --git a/mrj-0.1/.classpath b/mrj-0.1/.classpath index 229a9be..62a1d79 100644 --- a/mrj-0.1/.classpath +++ b/mrj-0.1/.classpath @@ -5,7 +5,8 @@ + - + diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java index 2704d86..a647241 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java @@ -184,13 +184,12 @@ private void rewriteTriples(String[] args) throws Exception { job.setOutputValueClass(List.class); job.setOutputFormatClass(CqlOutputFormat.class); - ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_ALLTRIPLES); // is it useful below line? //job.getConfiguration().set(CASSANDRA_PRIMARY_KEY, "(sub, pre, obj)"); - String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + - " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? , " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "=?"; - + String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + + " SET " + CassandraDB.COLUMN_IS_LITERAL + "=? ,"+ CassandraDB.COLUMN_TRIPLE_TYPE + "=?" + ","+ CassandraDB.COLUMN_INFERRED_STEPS + "=0"; CqlConfigHelper.setOutputCql(job.getConfiguration(), query); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); @@ -223,10 +222,11 @@ public static void main(String[] args) throws Exception { long time = System.currentTimeMillis(); int res = ToolRunner.run(new Configuration(), new FilesImportTriples(), args); // log.info("Import time: " + (System.currentTimeMillis() - time)); - - //Modified by LiYang 2015/4/10 +// +// //Modified by LiYang 2015/4/10 // CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); // db.init(); +// // Modified // db.createIndexOnTripleType(); // //db.createIndexOnRule(); // diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java index 8b4f8f8..9933365 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java @@ -16,8 +16,11 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.UUID; +import org.apache.cassandra.cli.CliParser.rowKey_return; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.UUIDGen; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; import org.slf4j.Logger; @@ -28,6 +31,7 @@ import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; + /** * @author gibeo_000 * @@ -90,29 +94,21 @@ protected void reduce(LongWritable key, Iterable values, Context c byte zero = 0; /* - * Modified WHY??? - */ - // Prepare composite key (sub, pre, obj) -// keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(oValue.getSubject())); -// keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(oValue.getPredicate())); -// keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(oValue.getObject())); -// // the length of boolean type in cassandra is one byte!!!!!!!! -// keys.put(CassandraDB.COLUMN_IS_LITERAL, oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); -// keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); -// keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes(0)); // for original triple set 0 int -// keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(0L)); // for original triple set 0 long -// keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(0L)); // for original triple set 0 long -// keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(0L)); // for original triple set 0 long keys.put("sub", ByteBufferUtil.bytes(oValue.getSubject())); keys.put("pre", ByteBufferUtil.bytes(oValue.getPredicate())); keys.put("obj", ByteBufferUtil.bytes(oValue.getObject())); // the length of boolean type in cassandra is one byte!!!!!!!! - keys.put("isliteral", oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); - keys.put("tripletype", ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); - keys.put("rule", ByteBufferUtil.bytes(0)); // for original triple set 0 int - keys.put("v1", ByteBufferUtil.bytes(0L)); // for original triple set 0 long - keys.put("v2", ByteBufferUtil.bytes(0L)); // for original triple set 0 long - keys.put("v3", ByteBufferUtil.bytes(0L)); // for original triple set 0 long + keys.put(CassandraDB.COLUMN_IS_LITERAL, oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); +// keys.put("id", ByteBufferUtil.bytes(UUIDGen.getTimeUUID())); + */ + + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(oValue.getSubject())); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(oValue.getPredicate())); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(oValue.getObject())); + keys.put(CassandraDB.COLUMN_IS_LITERAL, oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); + // Prepare variables, here is a boolean value for CassandraDB.COLUMN_IS_LITERAL List variables = new ArrayList(); @@ -120,8 +116,9 @@ protected void reduce(LongWritable key, Iterable values, Context c // the length of boolean type in cassandra is one byte!!!!!!!! // For column inferred, init it as false i.e. zero // variables.add(ByteBuffer.wrap(new byte[]{zero})); - variables.add(ByteBufferUtil.bytes(0)); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple - variables.add(ByteBufferUtil.bytes(0)); // Added by WuGang, 2015-07-15, to support transitive level + variables.add(oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + variables.add(ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); + context.write(keys, variables); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java index c1153f9..8614816 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java @@ -19,7 +19,7 @@ public class ImportTriplesSampleMapper extends Mapper preloadedURIs = TriplesUtils.getInstance().getPreloadedURIs(); protected void map(Text key, Text value, Context context) { - System.out.println("��ImportTriplesSampleMapper��"); + //System.out.println("��ImportTriplesSampleMapper��"); try { String[] uris = TriplesUtils.parseTriple(value.toString(), key.toString()); for(String uri : uris) { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index 298b5a3..e0e59b8 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -22,8 +22,11 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.UUID; import org.apache.cassandra.exceptions.RequestExecutionException; +import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; +import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; import org.apache.cassandra.thrift.Cassandra; import org.apache.cassandra.thrift.Column; import org.apache.cassandra.thrift.Compression; @@ -69,6 +72,7 @@ import com.datastax.driver.core.querybuilder.QueryBuilder; //modified import com.datastax.driver.core.querybuilder.Select; +import com.datastax.driver.core.utils.UUIDs; /** @@ -77,14 +81,17 @@ */ public class CassandraDB { private static final Logger logger = LoggerFactory.getLogger(CassandraDB.class); - public static final String KEYSPACE = "mrjks"; // mr.j keyspace + public static final String KEYSPACE = "mrjkss"; // mr.j keyspace public static final String COLUMNFAMILY_JUSTIFICATIONS = "justifications"; // mr.j keyspace public static final String COLUMNFAMILY_RESOURCES = "resources"; // mr.j keyspace public static final String COLUMNFAMILY_RESULTS = "results"; // mr.j keyspace + + public static final String COLUMNFAMILY_ALLTRIPLES = "alltriples"; + public static final String COLUMN_SUB = "sub"; // mrjks.justifications.sub public static final String COLUMN_PRE = "pre"; // mrjks.justifications.pre public static final String COLUMN_OBJ = "obj"; // mrjks.justifications.obj - public static final String COLUMN_TRIPLE_TYPE = "tripletype" ; // mrjks.justifications.tripletype + public static final String COLUMN_TRIPLE_TYPE = "tripletype" ; // mrjks.justifications.tripletype public static final String COLUMN_IS_LITERAL = "isliteral" ; // mrjks.justifications.isliteral public static final String COLUMN_INFERRED_STEPS = "inferredsteps" ; // mrjks.justifications.inferredsteps public static final String COLUMN_RULE = "rule"; // mrjks.justifications.rule @@ -101,7 +108,7 @@ public class CassandraDB { public static final String CQL_PAGE_ROW_SIZE = "10000"; //3 modified by liyang - + // 2014-12-11, Very strange, this works around. public static final String CONFIG_LOCATION = cn.edu.neu.mitt.mrj.utils.Cassandraconf.CassandraConfFile; public static void setConfigLocation(){ @@ -178,13 +185,47 @@ private static void setupKeyspace(Cassandra.Iface client) } } + public static String getAlltripleSchema(){ + String ALLTRIPLE_SCHEMA = "CREATE TABLE " + KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + + " ( " + + COLUMN_SUB + " bigint, " + // partition key + COLUMN_PRE + " bigint, " + // partition key + COLUMN_OBJ + " bigint, " + // partition key + COLUMN_IS_LITERAL + " boolean, " + // partition key + COLUMN_TRIPLE_TYPE + " int, " + + COLUMN_INFERRED_STEPS + " int, " + + "PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + "," + COLUMN_OBJ + + ")) )"; + return (ALLTRIPLE_SCHEMA); + } + + public static String getStepsSchema(Integer step){ + String query = "CREATE TABLE " + CassandraDB.KEYSPACE + ".step" + step + + " ( " + + "sub" + " bigint, " + + "pre" + " bigint, " + + "obj" + " bigint, " + + "rule int, " + + "v1" + " bigint, " + + "v2" + " bigint, " + + "v3" + " bigint, " + + "transitivelevel int" + + ", primary key((sub, pre, obj, rule) ,v1, v2, v3 ))"; + return query; + } + + public static String getAlltripleStatement(){ + return ("INSERT INTO mrjks.alltriples () VALUES(?, ?)"); + } + private static void setupTables(Cassandra.Iface client) throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { - + + /* // Create justifications table String query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + " ( " + @@ -200,8 +241,8 @@ private static void setupTables(Cassandra.Iface client) // COLUMN_TRIPLE_TYPE + " int, " + COLUMN_INFERRED_STEPS + " int, " + // from this line, the fields are non-primary key COLUMN_TRANSITIVE_LEVELS + " int, " + - " PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_IS_LITERAL + "), " + - COLUMN_TRIPLE_TYPE + ", " + COLUMN_RULE + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + " PRIMARY KEY ((" + COLUMN_IS_LITERAL + ", " + COLUMN_RULE + ", " + COLUMN_SUB + "), " + COLUMN_TRIPLE_TYPE + + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + //", " + COLUMN_TRIPLE_TYPE + " ) ) "; @@ -211,9 +252,10 @@ private static void setupTables(Cassandra.Iface client) } catch (InvalidRequestException e) { logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS, e); } - + */ + // Create resources table - query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_RESOURCES + + String query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_RESOURCES + " ( " + COLUMN_ID + " bigint, " + COLUMN_LABEL + " text, " + @@ -227,6 +269,7 @@ private static void setupTables(Cassandra.Iface client) logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_RESOURCES, e); } + /* // Create results table query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_RESULTS + " ( " + @@ -241,6 +284,8 @@ private static void setupTables(Cassandra.Iface client) catch (InvalidRequestException e) { logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_RESULTS, e); } + + //Create resultrow table String cquery = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + "." + "resultrows" + " ( " + @@ -260,8 +305,50 @@ private static void setupTables(Cassandra.Iface client) //", " + COLUMN_TRIPLE_TYPE + " ) ) "; client.execute_cql3_query(ByteBufferUtil.bytes(cquery), Compression.NONE, ConsistencyLevel.ONE); - + */ + //创建所有三元组的表 + String cquery = "CREATE TABLE " + KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + + " ( " + + COLUMN_SUB + " bigint, " + // partition key + COLUMN_PRE + " bigint, " + // partition key + COLUMN_OBJ + " bigint, " + // partition key + COLUMN_IS_LITERAL + " boolean, " + // partition key + COLUMN_TRIPLE_TYPE + " int, " + + COLUMN_INFERRED_STEPS + " int, " + + "PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + "," + COLUMN_OBJ + + ")) )"; + + try { + logger.info("set up table " + "all triples"); + client.execute_cql3_query(ByteBufferUtil.bytes(cquery), Compression.NONE, ConsistencyLevel.ONE); + } catch (InvalidRequestException e) { + logger.error("failed to create table " + KEYSPACE + "." + "AllTriples", e); + } + + for (int step = 1; step <= 2; step++) { + query = "CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + ".step" + step + + " ( " + + "sub" + " bigint, " + + "pre" + " bigint, " + + "obj" + " bigint, " + + "rule int, " + + "v1" + " bigint, " + + "v2" + " bigint, " + + "v3" + " bigint, " + + "transitivelevel int" + + ", primary key((sub, pre, obj, rule) ,v1, v2, v3 ))"; + try { + logger.info("set up table " + "step"); + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } catch (InvalidRequestException e) { + logger.error("failed to create table " + KEYSPACE + "." + "step", e); + } + } + + query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + "(" + COLUMN_TRIPLE_TYPE + ")"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } @@ -276,7 +363,7 @@ public CassandraDB(String host, Integer port) throws TTransportException { } public void CassandraDBClose(){ - this.close(); + //this.close(); } public void init() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ @@ -294,9 +381,14 @@ public Cassandra.Iface getDBClient(){ * Get the row count according to the COLUMN_INFERRED_STEPS. * @return row count. */ + + /* + * Need to change + */ + public long getRowCountAccordingInferredSteps(int level){ //ALLOW FILTERING - String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + " WHERE " + COLUMN_INFERRED_STEPS + " = " + level + " ALLOW FILTERING"; long num = 0; @@ -328,7 +420,7 @@ public long getRowCountAccordingInferredSteps(int level){ */ public long getRowCountAccordingTripleType(int tripletype){ //ALLOW FILTERING - String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " ALLOW FILTERING"; long num = 0; @@ -425,7 +517,7 @@ public void insertResources(long id, String label) throws InvalidRequestExceptio args.add(ByteBufferUtil.bytes(label)); CqlPreparedResult p_result = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); CqlResult result = client.execute_prepared_cql3_query(p_result.itemId, args, ConsistencyLevel.ANY); - logger.info("Number of results: " + result.getNum()); + //logger.info("Number of results: " + result.getNum()); } // TODO it's wrong!!!!!!!!!! @@ -447,10 +539,10 @@ public static Triple readJustificationFromMapReduceRow(Row row){ long pre = row.getLong(CassandraDB.COLUMN_PRE); long obj = row.getLong(CassandraDB.COLUMN_OBJ); boolean isObjectLiteral = row.getBool(CassandraDB.COLUMN_IS_LITERAL); - long v1 = row.getLong(CassandraDB.COLUMN_V1); - long v2 = row.getLong(CassandraDB.COLUMN_V2); - long v3 = row.getLong(CassandraDB.COLUMN_V3); - int rule = row.getInt(CassandraDB.COLUMN_RULE); + long v1 = -1; + long v2 = -2; + long v3 = -3; + int rule = -4; result.setObject(obj); result.setObjectLiteral(isObjectLiteral); @@ -471,40 +563,80 @@ public static int readStepFromMapReduceRow(Row row){ public static void writeJustificationToMapReduceContext( Triple triple, TripleSource source, - Context context) throws IOException, InterruptedException{ + Context context, + String stepname) throws IOException, InterruptedException{ Map keys = new LinkedHashMap(); + Map allkeys = new LinkedHashMap(); + List allvariables = new ArrayList(); + long time = System.currentTimeMillis(); byte one = 1; byte zero = 0; - + MrjMultioutput _output; + _output = new MrjMultioutput, List>(context); // Prepare composite key (sub, pre, obj) keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); // the length of boolean type in cassandra is one byte!!!!!!!! - keys.put(CassandraDB.COLUMN_IS_LITERAL, - triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); - int tripletype = TriplesUtils.DATA_TRIPLE; - if (triple.getType()==TriplesUtils.OWL_HORST_SYNONYMS_TABLE){ - tripletype = TriplesUtils.SYNONYMS_TABLE; // In this way, we can provide a special triple type for triples in synonyms table - }else{ - tripletype = TriplesUtils.getTripleType(source, triple.getSubject(), triple.getPredicate(), triple.getObject()); - } - keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(tripletype)); // Modified by WuGang 20150109 keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long + allkeys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + allkeys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + allkeys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + + allvariables.add(ByteBufferUtil.bytes(source.getStep())); + allvariables.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + allvariables.add(ByteBufferUtil.bytes((int)triple.getType())); + // Prepare variables List variables = new ArrayList(); // variables.add(ByteBufferUtil.bytes(oValue.getSubject())); // the length of boolean type in cassandra is one byte!!!!!!!! // For column inferred, init it as false i.e. zero -// variables.add(ByteBuffer.wrap(new byte[]{zero})); - variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple - variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); // It corresponds to COLUMN_TRANSITIVE_LEVEL, only useful in owl's transitive - context.write(keys, variables); + //variables.add(ByteBuffer.wrap(new byte[]{zero})); + + variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); + + + + + time = System.currentTimeMillis(); + _output.write(stepname, keys, variables); + System.out.println("write step" + (System.currentTimeMillis() - time)); + time = System.currentTimeMillis(); + _output.write("alltriples", allkeys, allvariables); + System.out.println("write all " + (System.currentTimeMillis() - time)); + + } + + public static void writealltripleToMapReduceContext( + Triple triple, + TripleSource source, + Context context) throws IOException, InterruptedException{ + Map keys = new LinkedHashMap(); + + byte one = 1; + byte zero = 0; + + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + + List variables = new ArrayList(); + variables.add(ByteBufferUtil.bytes(source.getStep())); + variables.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + int tripletype = TriplesUtils.DATA_TRIPLE; + if (triple.getType()==TriplesUtils.OWL_HORST_SYNONYMS_TABLE){ + tripletype = TriplesUtils.SYNONYMS_TABLE; // In this way, we can provide a special triple type for triples in synonyms table + }else{ + tripletype = TriplesUtils.getTripleType(source, triple.getSubject(), triple.getPredicate(), triple.getObject()); + } + variables.add(ByteBufferUtil.bytes(tripletype)); + context.write(keys,variables); } public boolean loadSetIntoMemory(Set schemaTriples, Set filters, int previousStep) throws IOException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { @@ -635,20 +767,24 @@ public boolean loadSetIntoMemory( * add ALLOW FILTERING * 2015/6/12 */ - String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS + - " FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + - " WHERE " + COLUMN_TRIPLE_TYPE + " = ? " + " ALLOW FILTERING"; -// System.out.println(query); + + + String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS + + " FROM " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + + " WHERE " + COLUMN_TRIPLE_TYPE + " = ? "; + System.out.println(query); CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); for (int filter : filters){ List list = new ArrayList(); list.add(ByteBufferUtil.bytes(filter)); +// System.out.println("filter " + filter); CqlResult result = client.execute_prepared_cql3_query(preparedResult.itemId, list, ConsistencyLevel.ONE); for(CqlRow row : result.rows){ Iterator columnsIt = row.getColumnsIterator(); Long sub = null, obj = null; + System.out.println("row : " + row); while (columnsIt.hasNext()) { Column column = columnsIt.next(); if (new String(column.getName()).equals(COLUMN_SUB)) @@ -662,9 +798,11 @@ public boolean loadSetIntoMemory( } } if (!inverted) - schemaTriples.add(sub); + schemaTriples.add(sub); else schemaTriples.add(obj); + + System.out.println("schema : " + schemaTriples); } } @@ -693,8 +831,8 @@ public Map> loadMapIntoMemory(Set filters, boole // Require an index created on COLUMN_TRIPLE_TYPE column String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS + - " FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + - " WHERE " + COLUMN_TRIPLE_TYPE + " = ? " + " ALLOW FILTERING"; + " FROM " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + + " WHERE " + COLUMN_TRIPLE_TYPE + " = ? ALLOW FILTERING"; //partitonkey CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); @@ -740,7 +878,7 @@ public Map> loadMapIntoMemory(Set filters, boole // Created index on COLUMN_TRIPLE_TYPE column public void createIndexOnTripleType() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ - String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_TRIPLE_TYPE + ")"; + String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + "(" + COLUMN_TRIPLE_TYPE + ")"; client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); } @@ -748,30 +886,27 @@ public void createIndexOnRule() throws InvalidRequestException, UnavailableExcep String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_RULE + ")"; client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); } - - public void createIndexOnInferredSteps() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ - String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_INFERRED_STEPS + ")"; - client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); - } +// +// public void createIndexOnInferredSteps() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ +// String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_INFERRED_STEPS + ")"; +// client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); +// } public void createIndexOnTransitiveLevel() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_TRANSITIVE_LEVELS + ")"; client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); } + /* + public void Index() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ - - this.createIndexOnInferredSteps(); - this.createIndexOnRule(); - this.createIndexOnTransitiveLevel(); - this.createIndexOnTripleType(); - + //createIndexOnInferredSteps(); + createIndexOnRule(); + createIndexOnTransitiveLevel(); + createIndexOnTripleType(); + System.out.println("IndexED"); } - /* - * Add by L - * Drop index - */ public void DropTripleTypeIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ String query = "DROP INDEX mrjks.justifications_tripletype_idx"; client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); @@ -799,7 +934,7 @@ public void UnIndex() throws InvalidRequestException, UnavailableException, Time this.DropTransitiveLevelIndex(); this.DropTripleTypeIndex(); } - + */ // Added by WuGang 2015-06-08 public static ResultSet getRows(){ @@ -816,7 +951,7 @@ public static ResultSet getRows(){ } public static boolean delornot = false; - +/* public static void removeOriginalTriples(){ if (delornot == true) return; @@ -882,7 +1017,7 @@ public static void removeOriginalTriples(){ session.execute(delete); System.out.println(row); } - + */ // SimpleClientDataStax scds = new SimpleClientDataStax(); // scds.connect(DEFAULT_HOST); // @@ -918,7 +1053,7 @@ public static void removeOriginalTriples(){ // scds.close(); - } +// } //create by LiYang // public static void createReasonTable(){ @@ -967,6 +1102,7 @@ public static void main(String[] args) { Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); db.loadSetIntoMemory(schemaTriples, filters, 0); + //db.loadMapIntoMemory(filters, inverted) System.out.println(schemaTriples); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java new file mode 100644 index 0000000..5202fc7 --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java @@ -0,0 +1,86 @@ +/** + * + */ +package cn.edu.neu.mitt.mrj.io.dbs; + +import java.io.IOException; + +import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; +import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskInputOutputContext; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; +import org.apache.hadoop.util.ReflectionUtils; + +/** + * @author L + * + */ +public class MrjMultioutput extends MultipleOutputs { + + public MrjMultioutput(TaskInputOutputContext context) { + super(context); + } + + @Override + protected synchronized RecordWriter getRecordWriter( + TaskAttemptContext taskContext, String columnFamilyNameName) + throws IOException, InterruptedException { + + +// CqlBulkOutputFormat.setColumnFamilySchema(taskContext.getConfiguration(), "step1", CassandraDB.getStepsSchema(1)); +// CqlBulkOutputFormat.setColumnFamilyInsertStatement(taskContext.getConfiguration(), "step1", CassandraDB.getAlltripleStatement()); + + // look for record-writer in the cache + RecordWriter writer = recordWriters.get(columnFamilyNameName); + + System.out.println("get Record Writer"); + + // If not in cache, create a new one + if (writer == null) { + // get the record writer from context output format +// FileOutputFormat.setOutputName(taskContext, baseFileName); + System.out.println("Before ConfigHelper.setOutputColumnFamily"); + System.out.println(ConfigHelper.getOutputColumnFamily(taskContext.getConfiguration())); + + + ConfigHelper.setOutputColumnFamily(taskContext.getConfiguration(), columnFamilyNameName); + CqlConfigHelper.setOutputCql(taskContext.getConfiguration(), getCql(columnFamilyNameName)); + + try { + System.out.println(taskContext.getOutputFormatClass()); + writer = ((OutputFormat) ReflectionUtils.newInstance( + taskContext.getOutputFormatClass(), taskContext.getConfiguration())) + .getRecordWriter(taskContext); + + System.out.println(writer.getClass()); + } catch (ClassNotFoundException e) { + throw new IOException(e); + } + + // if counters are enabled, wrap the writer with context + // to increment counters + if (countersEnabled) { + writer = new MultipleOutputs.RecordWriterWithCounter(writer, columnFamilyNameName, context); + } + + // add the record-writer to the cache + recordWriters.put(columnFamilyNameName, writer); + } + return writer; + } + + + String getCql(String columnFamilyNameName){ + if (columnFamilyNameName == "alltriples") { + System.out.println("get cql allt"); + return ("UPDATE alltriples SET inferredsteps =? , isliteral =? , tripletype =?"); + } + System.out.println("get cql step"); + return("UPDATE " + columnFamilyNameName + " SET transitivelevel =? "); + } + +} diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java index 5552170..369dc7a 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java @@ -158,8 +158,8 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio parseArgs(args); // Added by WuGang 2015-06-08 - if (bClearOriginals) - CassandraDB.removeOriginalTriples(); +// if (bClearOriginals) +// CassandraDB.removeOriginalTriples(); long total = 0; // Total justifications diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java index 01b499a..a3b4edc 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java @@ -9,16 +9,19 @@ */ package cn.edu.neu.mitt.mrj.reasoner; + import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; import org.apache.cassandra.hadoop.cql3.CqlInputFormat; import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; @@ -37,44 +40,52 @@ private static void configureCassandraInput(Job job, Set typeFilters, S // Should not use 9160 port in cassandra 2.1.2 because new cql3 port is 9042, please refer to conf/cassandra.yaml //ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); - ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_ALLTRIPLES); if (typeFilters.size() == 0){ if (transitiveLevelFilters.size() == 0) CqlConfigHelper.setInputCql(job.getConfiguration(), - "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + " WHERE TOKEN(" + CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ") > ? AND TOKEN(" + CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ") <= ? ALLOW FILTERING"); +// "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + +// " WHERE TOKEN(" + +// CassandraDB.COLUMN_SUB + ", " + +// CassandraDB.COLUMN_PRE + ", " + +// CassandraDB.COLUMN_OBJ + ", " + +// CassandraDB.COLUMN_IS_LITERAL + +// ") > ? AND TOKEN(" + +// CassandraDB.COLUMN_SUB + ", " + +// CassandraDB.COLUMN_PRE + ", " + +// CassandraDB.COLUMN_OBJ + ", " + +// CassandraDB.COLUMN_IS_LITERAL + +// ") <= ? ALLOW FILTERING"); else{ Integer max = java.util.Collections.max(transitiveLevelFilters); Integer min = java.util.Collections.min(transitiveLevelFilters); CqlConfigHelper.setInputCql(job.getConfiguration(), - "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + " WHERE TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") > ? AND TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") <= ? AND " + - CassandraDB.COLUMN_INFERRED_STEPS + " = " + certainStep + " AND " + - CassandraDB.COLUMN_TRANSITIVE_LEVELS + " >= " + min + " AND " + - CassandraDB.COLUMN_TRANSITIVE_LEVELS + " <= " + max + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + + ") > ? AND TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + + ") <= ? " + +// CassandraDB.COLUMN_INFERRED_STEPS + " = " + certainStep + " AND " + +// CassandraDB.COLUMN_TRANSITIVE_LEVELS + " >= " + min + " AND " + +// CassandraDB.COLUMN_TRANSITIVE_LEVELS + " <= " + max + " ALLOW FILTERING"); } @@ -87,20 +98,19 @@ else if (typeFilters.size() == 1){ } CqlConfigHelper.setInputCql(job.getConfiguration(), - "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + " WHERE TOKEN(" + CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ") > ? AND TOKEN(" + CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") <= ? AND " + - CassandraDB.COLUMN_TRIPLE_TYPE + " = " + typeFilters.toArray()[0] + - " ALLOW FILTERING"); + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + + ") <= ? "); +// ") <= ? AND " + +// CassandraDB.COLUMN_TRIPLE_TYPE + " = " + typeFilters.toArray()[0] + +// " ALLOW FILTERING"); }else{ if (transitiveLevelFilters.size() != 0){ // stepFilter is only for handling transitive property System.err.println("This is not supported!!!"); @@ -119,21 +129,20 @@ else if (typeFilters.size() == 1){ CqlConfigHelper.setInputCql(job.getConfiguration(), - "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + " WHERE TOKEN(" + CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ") > ? AND TOKEN(" + CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") <= ? AND " + - CassandraDB.COLUMN_TRIPLE_TYPE + " >= " + min + " AND " + - CassandraDB.COLUMN_TRIPLE_TYPE + " <= " + max + - " ALLOW FILTERING"); + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + + ") <= ? "); +// + "AND " + +// CassandraDB.COLUMN_TRIPLE_TYPE + " >= " + min + " AND " + +// CassandraDB.COLUMN_TRIPLE_TYPE + " <= " + max + +// " ALLOW FILTERING"); // String strFilter = filters.toString(); // String strInFilterClause = strFilter.substring(1, strFilter.length()-1); // remove "[" and "]" characters of Set.toString() @@ -167,32 +176,62 @@ else if (typeFilters.size() == 1){ // Output to CassandraDB.COLUMNFAMILY_JUSTIFICATIONS - private static void configureCassandraOutput(Job job) { + private static void configureCassandraOutput(Job job, Integer step) { //Set the output job.setOutputKeyClass(Map.class); job.setOutputValueClass(List.class); - job.setOutputFormatClass(CqlOutputFormat.class); + + job.setOutputFormatClass(CqlBulkOutputFormat.class); + CqlBulkOutputFormat.setColumnFamilySchema(job.getConfiguration(), CassandraDB.KEYSPACE + ".step1", CassandraDB.getStepsSchema(1)); + System.out.println("Schema : " + CassandraDB.getStepsSchema(1)); +// job.setOutputFormatClass(ColumnFamilyOutputFormat.class); + ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); - ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); - String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + - " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=?, " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "=?"; - CqlConfigHelper.setOutputCql(job.getConfiguration(), query); + //ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, "alltriples"); + ConfigHelper.setOutputKeyspace(job.getConfiguration(), CassandraDB.KEYSPACE); + + /* + * addMultiNamedOutput + * + */ + JobConf jobconf = new JobConf(job.getConfiguration()); +// MultipleOutputs.addNamedOutput(jobconf, "step" + step, ColumnFamilyOutputFormat.class, ByteBuffer.class, List.class); +// MultipleOutputs.addNamedOutput(jobconf, CassandraDB.COLUMNFAMILY_ALLTRIPLES, ColumnFamilyOutputFormat.class, ByteBuffer.class, List.class); + +// ConfigHelper.setOutputColumnFamily(job.getConfiguration(), "step" + step); +// ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); +// job.getConfiguration().set(CONF_COLUMN_NAME, "sum"); + +// ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.COLUMNFAMILY_ALLTRIPLES); + + // String query = "UPDATE " + CassandraDB.KEYSPACE + ".step" + step + + // " SET " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "=? " ; +// "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + +// " SET " + CassandraDB.COLUMN_IS_LITERAL + "=?" + CassandraDB.COLUMN_TRIPLE_TYPE + "=?"; +// String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + +// " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=?, " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "=?"; + // CqlConfigHelper.setOutputCql(job.getConfiguration(), query); +// String querysString = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + +// " SET " + CassandraDB.COLUMN_INFERRED_STEPS + " =? ";// + CassandraDB.COLUMN_IS_LITERAL + "=?, " + CassandraDB.COLUMN_TRIPLE_TYPE + " =? "; + //CqlConfigHelper.setOutputCql(job.getConfiguration(), querysString); + + } - // In each derivation, we may create a set of jobs + // In each derivation, we may create a set of jobs // certainStep is optional, if it is specified then we can use it to filter transitiveLevel with non-equal operator // (see cql specification) public static Job createNewJob(Class classJar, String jobName, Set typeFilters, Set transitiveLevelFilters, int certainStep, int numMapTasks, int numReduceTasks, - boolean bConfigCassandraInput, boolean bConfigCassandraOutput) + boolean bConfigCassandraInput, boolean bConfigCassandraOutput, Integer step) throws IOException { Configuration conf = new Configuration(); conf.setInt("maptasks", numMapTasks); conf.set("input.filter", typeFilters.toString()); - + Job job = new Job(conf); job.setJobName(jobName); job.setJarByClass(classJar); @@ -203,7 +242,8 @@ public static Job createNewJob(Class classJar, String jobName, if (bConfigCassandraInput) configureCassandraInput(job, typeFilters, transitiveLevelFilters, certainStep); if (bConfigCassandraOutput) - configureCassandraOutput(job); + configureCassandraOutput(job, step); + // Added by WuGang 2010-05-25 System.out.println("Create a job - " + jobName); @@ -212,6 +252,44 @@ public static Job createNewJob(Class classJar, String jobName, return job; } - - +/* + public static void CreateTables(String jobname){ + Builder builder = Cluster.builder(); + builder.addContactPoint(CassandraDB.DEFAULT_HOST); + SocketOptions socketoptions= new SocketOptions().setKeepAlive(true).setReadTimeoutMillis(10 * 10000).setConnectTimeoutMillis(5 * 10000); + Cluster clu = builder.build(); + Session session = clu.connect(); + + String query = ""; + if(jobname == "RDFS special properties reasoning"){ + query = "CREATE TABLE IF NOT EXISTS " + "mrjks" + "." + jobname + + " ( " + + "sub" + " bigint, " + + "pre" + " bigint, " + + "obj" + " bigint, " + + "rule int, " + + "v1" + " bigint, " + + "v2" + " bigint, " + + "v3" + " bigint, " + + "transitiveleves int" + + ", primary key((sub, pre, obj, rule) ,v1, v2, v3 ))"; + } + else { + query = "CREATE TABLE IF NOT EXISTS " + "mrjks" + "." + jobname + + " ( " + + "sub" + " bigint, " + + "pre" + " bigint, " + + "obj" + " bigint, " + + "rule int, " + + "v1" + " bigint, " + + "v2" + " bigint, " + + "v3" + " bigint, " + + ", primary key((id, rule) ,v1, v2, v3))"; + } + + session.execute(query); + System.out.println(query); + System.out.println("--------Create Table----------"); + } + */ } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java index 2868182..adea7f8 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java @@ -36,13 +36,13 @@ public int run(String[] args) throws Exception{ " WHERE TOKEN(" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + + CassandraDB.COLUMN_OBJ + + //CassandraDB.COLUMN_IS_LITERAL + ") > ? AND TOKEN(" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + + CassandraDB.COLUMN_OBJ + + //CassandraDB.COLUMN_IS_LITERAL + ") <= ? ALLOW FILTERING"); CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); //Modifide by LiYang diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java index 5fc2e89..8cad3d8 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java @@ -36,7 +36,7 @@ public class OWLAllSomeValuesMapper extends Mapper values = someValues.get(value.getObject()); Iterator itr = values.iterator(); bKey[0] = 2; bValue[0] = 1; - bValue[17] = 0; // ��������һ��someValues + bValue[17] = 0; // ��������һ��someValues NumberUtils.encodeLong(bKey, 9, value.getSubject()); - NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��wд��value�� + NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��wд��value�� while (itr.hasNext()) { byte[] bytes = itr.next(); System.arraycopy(bytes, 0, bKey, 1, 8); System.arraycopy(bytes, 8, bValue, 1, 8); - context.write(oKey, oValue); //�������((p,x),v) -> ((p,x),(v,w,0)) + context.write(oKey, oValue); //�������((p,x),v) -> ((p,x),(v,w,0)) } } - // ��Ҫ���⴫��һ��v - if (allValues.containsKey(value.getObject())) { //�ҵ���һ��(w,rdf:type,v)��������Ԫ�飬����v����v owl:allValuesFrom u + // ��Ҫ���⴫��һ��v + if (allValues.containsKey(value.getObject())) { //�ҵ���һ��(w,rdf:type,v)��������Ԫ�飬����v����v owl:allValuesFrom u log.info("I met allValuesFrom: " + value); Collection values = allValues.get(value.getObject()); Iterator itr = values.iterator(); bKey[0] = 1; bValue[0] = 1; - bValue[17] = 1; // ��������һ��allValues + bValue[17] = 1; // ��������һ��allValues NumberUtils.encodeLong(bKey, 9, value.getSubject()); - NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��vд��value�� + NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��vд��value�� while (itr.hasNext()) { byte[] bytes = itr.next(); System.arraycopy(bytes, 0, bKey, 1, 8); System.arraycopy(bytes, 8, bValue, 1, 8); - context.write(oKey, oValue); //�������((p,w),u) -> ((p,w),(u,v,1)) + context.write(oKey, oValue); //�������((p,w),u) -> ((p,w),(u,v,1)) } } } else { - // onPropertySome�з��õ������е���onPropertySome�����õ����ԣ������е� v owl:someValuesFrom w��ʽ����Ԫ���е�w - if (onPropertySome.contains(value.getPredicate())) {//ijһ����Ԫ��u p x�е�p��һ��onPropertySome�����õ������� + // onPropertySome�з��õ������е���onPropertySome�����õ����ԣ������е� v owl:someValuesFrom w��ʽ����Ԫ���е�w + if (onPropertySome.contains(value.getPredicate())) {//ijһ����Ԫ��u p x�е�p��һ��onPropertySome�����õ������� //Rule 15 - someValuesFrom log.info("I met onPropertySome: " + value); bKey[0] = 2; @@ -103,19 +103,19 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup NumberUtils.encodeLong(bKey, 1, value.getPredicate()); NumberUtils.encodeLong(bKey, 9, value.getObject()); NumberUtils.encodeLong(bValue, 1, value.getSubject()); - context.write(oKey, oValue); //�����((p,x),(u,,)) value�ĺ�����field������ֵ,û�и�ֵ + context.write(oKey, oValue); //�����((p,x),(u,,)) value�ĺ�����field������ֵ,û�и�ֵ } - // onPropertyAll�з��õ������е���onPropertyAll�����õ����ԣ������е� v owl:allValuesFrom u��ʽ����Ԫ���е�u - if (onPropertyAll.contains(value.getPredicate())) {//ijһ����Ԫ��w p x�е�p��һ��onPropertyAll�����õ������� + // onPropertyAll�з��õ������е���onPropertyAll�����õ����ԣ������е� v owl:allValuesFrom u��ʽ����Ԫ���е�u + if (onPropertyAll.contains(value.getPredicate())) {//ijһ����Ԫ��w p x�е�p��һ��onPropertyAll�����õ������� //Rule 16 - allValuesFrom log.info("I met onPropertyAll: " + value); bKey[0] = 1; - bValue[0] = 0; // Added by WuGang, ԭ������������һ�У����ж����������Ԫ��ʱ�������������reduce�����л���ɴ��� + bValue[0] = 0; // Added by WuGang, ԭ������������һ�У����ж����������Ԫ��ʱ�������������reduce�����л���ɴ��� NumberUtils.encodeLong(bKey, 1, value.getPredicate()); NumberUtils.encodeLong(bKey, 9, value.getSubject()); NumberUtils.encodeLong(bValue, 1, value.getObject()); - context.write(oKey, oValue); //�������((p,w),(x,,)) value�ĺ�����field������ֵ,û�и�ֵ + context.write(oKey, oValue); //�������((p,w),(x,,)) value�ĺ�����field������ֵ,û�и�ֵ } } } @@ -123,7 +123,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup @Override public void setup(Context context) throws IOException { previousDerivation = context.getConfiguration().getInt("reasoner.previousDerivation", -1); - + // List filesProperty = MultiFilesReader.recursiveListStatus(context, "FILTER_ONLY_OWL_ON_PROPERTY"); // Map> allValuesTmp = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_OWL_ALL_VALUES", context); // Map> someValuesTmp = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_OWL_SOME_VALUES", context); @@ -212,7 +212,7 @@ protected void makeJoin(Map> onPropertyTmp, Context contex } if (allValuesTmp.containsKey(sub)) { - // col��Ӧ�������е�subject��Ӧ��object������subject��object����subject, owl:allValuesFrom, object + // col��Ӧ�������е�subject��Ӧ��object������subject��object����subject, owl:allValuesFrom, object Collection col = allValuesTmp.get(sub); if (col != null) { Iterator itr = col.iterator(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java index e8bad41..0d8bfdc 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java @@ -28,8 +28,8 @@ public class OWLAllSomeValuesReducer extends Reducer resources = new LinkedList(); // Added by WuGang - private LinkedList others = new LinkedList(); // ��types����һ�� - private LinkedList s_a_types = new LinkedList(); // ��types����һ��,���ڴ洢��someValues(0)����allValues(1)���� + private LinkedList others = new LinkedList(); // ��types����һ�� + private LinkedList s_a_types = new LinkedList(); // ��types����һ��,���ڴ洢��someValues(0)����allValues(1)���� @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { @@ -39,7 +39,7 @@ public void reduce(BytesWritable key, Iterable values, Context co resources.clear(); byte[] bKey = key.getBytes(); - long rSubject = NumberUtils.decodeLong(bKey, 9); // rSubject����key�ĵڶ���Long����ʼλ��Ϊ9���ͷ����һ��byte�� + long rSubject = NumberUtils.decodeLong(bKey, 9); // rSubject����key�ĵڶ���Long����ʼλ��Ϊ9���ͷ����һ��byte�� long predicate = NumberUtils.decodeLong(bKey, 1); // Added by WuGang 2010-07-14 Iterator itr = values.iterator(); @@ -48,7 +48,7 @@ public void reduce(BytesWritable key, Iterable values, Context co byte[] bValue = value.getBytes(); if (bValue[0] == 1) { //Type triple types.add(NumberUtils.decodeLong(bValue, 1)); - others.add(NumberUtils.decodeLong(bValue, 9)); // Added by WuGang, ��types����Ҫ���⴫��һ��long�ͣ���һ��byte + others.add(NumberUtils.decodeLong(bValue, 9)); // Added by WuGang, ��types����Ҫ���⴫��һ��long�ͣ���һ��byte s_a_types.add(bValue[17]); } else { //Resource triple resources.add(NumberUtils.decodeLong(bValue, 1)); @@ -66,7 +66,7 @@ public void reduce(BytesWritable key, Iterable values, Context co while (itrResource.hasNext()) { long resource = itrResource.next(); triple.setSubject(resource); - // ����Types���͵�value����someValuesΪ����((p,x),(v,w))����allValuesΪ����((p,w),(u,v)) + // ����Types���͵�value����someValuesΪ����((p,x),(v,w))����allValuesΪ����((p,w),(u,v)) Iterator itrTypes = types.listIterator(); Iterator itrOthers = others.listIterator(); Iterator itrSATypes = s_a_types.listIterator(); @@ -74,14 +74,14 @@ public void reduce(BytesWritable key, Iterable values, Context co long type = itrTypes.next(); triple.setObject(type); - // Added by WuGang����triple��ֵ + // Added by WuGang����triple��ֵ long other = itrOthers.next(); byte s_a_type = itrSATypes.next(); - triple.setRsubject(rSubject); // ��someValues������x,��allValues������w + triple.setRsubject(rSubject); // ��someValues������x,��allValues������w // Modified by WuGang 2010-07-14 // triple.setRpredicate(TriplesUtils.RDF_TYPE); //rdf:type triple.setRpredicate(predicate); - triple.setRobject(other); // ��someValues������w,��allValues������v + triple.setRobject(other); // ��someValues������w,��allValues������v switch (s_a_type) { case 0: triple.setType(TriplesUtils.OWL_HORST_15); @@ -95,7 +95,7 @@ public void reduce(BytesWritable key, Iterable values, Context co // System.out.println("Generate an extended triple for OWLAllSomeValues: " + triple); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step12"); } } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java index ab4cfc0..3323bd6 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java @@ -85,7 +85,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup @Override public void setup(Context context) throws IOException { - + CassandraDB db; try { db = new CassandraDB(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java index c755300..093fb2c 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java @@ -90,7 +90,7 @@ public void reduce(LongWritable key, Iterable values, Context con } } - if (!found) { // ���������ó��Ľ�� + if (!found) { // ��������ó��Ľ�� triple.setObject(resource); triple.setSubject(key.get()); triple.setPredicate(TriplesUtils.RDFS_SUBCLASS); @@ -109,7 +109,7 @@ public void reduce(LongWritable key, Iterable values, Context con } // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step11"); } } @@ -146,12 +146,12 @@ public void reduce(LongWritable key, Iterable values, Context con } // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step11"); } } //Subproperties - // Modified by WuGang,����ò��Ӧ����superProperties + // Modified by WuGang,����ò��Ӧ����superProperties // itr2 = equivalenceProperties.iterator(); itr2 = superProperties.iterator(); while (itr2.hasNext()) { @@ -180,12 +180,12 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRobject(triple.getObject()); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step11"); } } //Subclasses - // Modified by WuGang,����ò��Ӧ����superClasses + // Modified by WuGang,����ò��Ӧ����superClasses // itr2 = equivalenceClasses.iterator(); itr2 = superClasses.iterator(); while (itr2.hasNext()) { @@ -215,7 +215,7 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRobject(triple.getObject()); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step11"); } } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java index 3cd6514..2ca8a07 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java @@ -43,7 +43,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup } //TODO: check whether also the schema is modified oKey.set(value.getSubject()); - if (value.getPredicate() == TriplesUtils.RDF_TYPE && // ����14b������value����(u rdf:type v)��Ŀ������reduce������(u p w)�����ǻ�Ҫ����14b(v owl:hasValue w) + if (value.getPredicate() == TriplesUtils.RDF_TYPE && // ����14b������value����(u rdf:type v)��Ŀ������reduce������(u p w)�����ǻ�Ҫ���14b(v owl:hasValue w) hasValue.contains(value.getObject()) && onProperty.contains(value.getObject())) { // System.out.println("In OWLHasValueMapper for 14b: " + value); // Added by Wugang @@ -52,7 +52,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup oValue.set(values, 0, 9); context.write(oKey, oValue); - } else if (value.getPredicate() != TriplesUtils.RDF_TYPE // ����14a������value����(u p w)��Ŀ������reduce������(u rdf:type v)�����ǻ�Ҫ����14a(v owl:hasValue w) + } else if (value.getPredicate() != TriplesUtils.RDF_TYPE // ����14a������value����(u p w)��Ŀ������reduce������(u rdf:type v)�����ǻ�Ҫ���14a(v owl:hasValue w) && hasValueInverted.contains(value.getObject()) && onPropertyInverted.contains(value.getPredicate())) { // System.out.println("In OWLHasValueMapper for 14a: " + value); // Added by Wugang @@ -71,7 +71,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup public void setup(Context context) throws IOException { previousStep = context.getConfiguration().getInt("reasoner.previousStep", -1); - + try{ CassandraDB db = new CassandraDB(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java index 8a6a562..67d5c53 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java @@ -71,7 +71,7 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRobject(triple.getObject()); // w // System.out.println("In OWLHasValueReducer for 14b output: "+triple); // Added by Wugang - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step13"); // context.write(source, triple); } } @@ -97,11 +97,11 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setType(TriplesUtils.OWL_HORST_14a); triple.setRsubject(triple.getObject()); // v // triple.setRpredicate(TriplesUtils.OWL_HAS_VALUE); // owl:hasValue - triple.setRpredicate(predicate); // p // Modified by WuGang, 2010-08-26,�����Ϣ�������»ָ��� + triple.setRpredicate(predicate); // p // Modified by WuGang, 2010-08-26,�����Ϣ�������»ָ��� triple.setRobject(object); // w // System.out.println("In OWLHasValueReducer for 14a output: "+triple); // Added by Wugang - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step13"); // context.write(source, triple); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java index ef5bce4..d6bf4a6 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java @@ -105,7 +105,7 @@ protected void setup(Context context) throws IOException { previousTransDerivation = context.getConfiguration().getInt("reasoner.previosTransitiveDerivation", -1); previousDerivation = context.getConfiguration().getInt("reasoner.previousDerivation", -1); hasSchemaChanged = false; - + try{ CassandraDB db = new CassandraDB(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java index cc08af6..33e5df7 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java @@ -49,7 +49,7 @@ protected void reduce(BytesWritable key, Iterable values, Context case 0: // Modified by WuGang, Functional case 1: // Modified by WuGang, Inverse Functional // System.out.println("Processing Functional & Inverse Functional Property."); - key1 = NumberUtils.decodeLong(bytes, 1); // ����Functional������subject������Inverse Functional������object + key1 = NumberUtils.decodeLong(bytes, 1); // ����Functional������subject������Inverse Functional������object key2 = NumberUtils.decodeLong(bytes, 9); // predicate long minimum = Long.MAX_VALUE; @@ -57,7 +57,7 @@ protected void reduce(BytesWritable key, Iterable values, Context Iterator itr = values.iterator(); while (itr.hasNext()) { long value = itr.next().get(); - value1 = value; // Added by Wugang���������ֵ������Functional������ԭʼ��Ԫ���object������Inverse Functional������ԭʼ��Ԫ���subject + value1 = value; // Added by Wugang���������ֵ������Functional������ԭʼ��Ԫ���object������Inverse Functional������ԭʼ��Ԫ���subject if (value < minimum) { if (minimum != Long.MAX_VALUE) set.add(minimum); @@ -97,7 +97,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setObject(object); // System.out.println("Find a derive in functional and inverse functional property!" + triple); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step5"); outputSize++; } context.getCounter("OWL derived triples", "functional and inverse functional property").increment(outputSize); @@ -122,7 +122,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setPredicate(itr.next().get()); triple.setRpredicate(triple.getPredicate()); // Added by WuGang // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step5"); context.getCounter("OWL derived triples", "simmetric property").increment(1); } @@ -154,7 +154,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setPredicate(derivedPredicate); // Only one of the inverse, the others will be completed in outputInverseOf() //triple.setPredicate(itrInverse.next()); // Commented by WuGang 2015-01-27 // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step5"); context.getCounter("OWL derived triples", "inverse of").increment(1); // Moved to here by WuGang, 2015-01-27 @@ -171,7 +171,7 @@ else if (bytes[0] == 1){ //Inverse Functional break; case 4: case 5: - // �ⲿ���Ƿ�����inferTransitivityStatements�д������أ��˴���û���� + // �ⲿ���Ƿ�����inferTransitivityStatements�д�����أ��˴���û���� //Transitive property. I copy to a temporary directory setting a special triple source subject = NumberUtils.decodeLong(bytes, 1); object = NumberUtils.decodeLong(bytes, 9); @@ -191,7 +191,7 @@ else if (bytes[0] == 1){ //Inverse Functional transitiveSource.setDerivation(TripleSource.TRANSITIVE_ENABLED); triple.setPredicate(Math.abs(predicate)); // context.write(transitiveSource, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, transitiveSource, context); + CassandraDB.writeJustificationToMapReduceContext(triple, transitiveSource, context, "step5"); context.getCounter("OWL derived triples", "transitive property input").increment(1); } default: @@ -213,7 +213,7 @@ private void outputInverseOf(long subject, long object, long predicate, Set(), // not supported step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 5); job.getConfiguration().setInt("reasoner.step", step); job.getConfiguration().setInt("reasoner.previosTransitiveDerivation", previousTransitiveDerivation); job.getConfiguration().setInt("reasoner.previousDerivation", previousInferPropertiesDerivation); @@ -218,6 +225,16 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(OWLNotRecursiveReducer.class); + Configuration conf = job.getConfiguration(); + String outputCF1 = "stepNO"; + String outputCF2 = "alltriples"; +// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); + conf.set(outputCF1, "step1"); + conf.set(outputCF2, "alltriples"); + ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); + ConfigHelper.setOutputColumnFamily(conf, "step5"); + CqlConfigHelper.setOutputCql(conf, "select * from step1"); + job.waitForCompletion(true); @@ -274,7 +291,7 @@ private long inferTransitivityStatements(String[] args) new HashSet(), 0, numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 6); // for the level more than two, we only consider the last two level derived data in the current step if (level > 2) job = MapReduceReasonerJobConfig.createNewJob( @@ -284,7 +301,7 @@ private long inferTransitivityStatements(String[] args) levels, step, numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true ,7); job.getConfiguration().setInt("reasoning.baseLevel", step); @@ -296,6 +313,16 @@ private long inferTransitivityStatements(String[] args) job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLTransitivityReducer.class); + Configuration conf = job.getConfiguration(); + String outputCF1 = "stepNO"; + String outputCF2 = "alltriples"; +// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); + conf.set(outputCF1, "step1"); + conf.set(outputCF2, "alltriples"); + ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); + ConfigHelper.setOutputColumnFamily(conf, "step6"); + CqlConfigHelper.setOutputCql(conf, "select * from step1"); + job.waitForCompletion(true); long stepNotFilteredDerivation = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); @@ -335,12 +362,22 @@ private long inferSameAsStatements(String[] args) { new HashSet(), // Added by WuGang, 2015-07-12 step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 8); job.setMapperClass(OWLSameAsMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLSameAsReducer.class); + + Configuration conf = job.getConfiguration(); + String outputCF1 = "stepNO"; + String outputCF2 = "alltriples"; +// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); + conf.set(outputCF1, "step1"); + conf.set(outputCF2, "alltriples"); + ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); + ConfigHelper.setOutputColumnFamily(conf, "step8"); + CqlConfigHelper.setOutputCql(conf, "select * from step1"); job.waitForCompletion(true); @@ -374,10 +411,14 @@ private long inferSameAsStatements(String[] args) { new HashSet(), // Added by WuGang, 2015-07-12 step, // not used here numMapTasks, - numReduceTasks, true, false); // input from cassandra, but output to hdfs + numReduceTasks, true, false, 9); // input from cassandra, but output to hdfs job.getConfiguration().setInt("reasoner.samplingPercentage", sampling); //Sampling at 10% job.getConfiguration().setInt("reasoner.threshold", resourceThreshold); //Threshold resources + /* + * output to hdfs + */ + job.setMapperClass(OWLSampleResourcesMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(LongWritable.class); @@ -391,6 +432,16 @@ private long inferSameAsStatements(String[] args) { job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); + Configuration conf = job.getConfiguration(); + String outputCF1 = "stepNO"; + String outputCF2 = "alltriples"; +// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); + conf.set(outputCF1, "step1"); + conf.set(outputCF2, "alltriples"); + ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); + ConfigHelper.setOutputColumnFamily(conf, "step9"); + CqlConfigHelper.setOutputCql(conf, "select * from step1"); + job.waitForCompletion(true); @@ -430,7 +481,7 @@ private long inferSameAsStatements(String[] args) { new HashSet(), // Added by WuGang, 2015-07-12 step, // not used here numMapTasks, - numReduceTasks, false, true); // input from hdfs, but output to cassandra + numReduceTasks, false, true, 10); // input from hdfs, but output to cassandra SequenceFileInputFormat.addInputPath(job, tmpPath); job.setInputFormatClass(SequenceFileInputFormat.class); @@ -439,6 +490,17 @@ private long inferSameAsStatements(String[] args) { job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLSameAsReconstructReducer.class); + + conf = job.getConfiguration(); + outputCF1 = "stepNO"; + outputCF2 = "alltriples"; +// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); + conf.set(outputCF1, "step1"); + conf.set(outputCF2, "alltriples"); + ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); + ConfigHelper.setOutputColumnFamily(conf, "step10"); + CqlConfigHelper.setOutputCql(conf, "select * from step1"); + job.waitForCompletion(true); FileSystem fs = FileSystem.get(job.getConfiguration()); @@ -482,7 +544,7 @@ private long inferEquivalenceStatements(String[] args) throws IOException, Inter new HashSet(), // Added by WuGang, 20150712 step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 11); job.getConfiguration().setInt("maptasks", Math.max(job.getConfiguration().getInt("maptasks", 0) / 10, 1)); job.getConfiguration().setInt("reasoner.step", step); @@ -490,6 +552,16 @@ private long inferEquivalenceStatements(String[] args) throws IOException, Inter job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLEquivalenceSCSPReducer.class); + + Configuration conf = job.getConfiguration(); + String outputCF1 = "stepNO"; + String outputCF2 = "alltriples"; +// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); + conf.set(outputCF1, "step1"); + conf.set(outputCF2, "alltriples"); + ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); + ConfigHelper.setOutputColumnFamily(conf, "step11"); + CqlConfigHelper.setOutputCql(conf, "select * from step1"); job.waitForCompletion(true); return job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); @@ -518,7 +590,7 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, new HashSet(), step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 12); job.getConfiguration().setInt("reasoner.step", step); job.getConfiguration().setInt("reasoner.previousDerivation", previousSomeAllValuesDerivation); previousSomeAllValuesDerivation = step; @@ -527,6 +599,16 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLAllSomeValuesReducer.class); + + Configuration conf = job.getConfiguration(); + String outputCF1 = "stepNO"; + String outputCF2 = "alltriples"; +// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); + conf.set(outputCF1, "step1"); + conf.set(outputCF2, "alltriples"); + ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); + ConfigHelper.setOutputColumnFamily(conf, "step12"); + CqlConfigHelper.setOutputCql(conf, "select * from step1"); job.waitForCompletion(true); @@ -543,6 +625,13 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, notFilteredDerivation -= previousSomeAllValuesCycleDerivation; if (notFilteredDerivation > 0) { previousSomeAllValuesCycleDerivation += notFilteredDerivation; + //Modified by LiYang 2015/9/21 +// try { +// db.createIndexOnInferredSteps(); +// } catch (TException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } stepDerivation = db.getRowCountAccordingInferredSteps(step - 1); totalDerivation += stepDerivation; derivedNewStatements = stepDerivation > 0; @@ -578,7 +667,7 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup new HashSet(), step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 13); long schemaOnPropertySize = db.getRowCountAccordingTripleType(TriplesUtils.SCHEMA_TRIPLE_ON_PROPERTY); if (schemaOnPropertySize == 0) @@ -593,6 +682,16 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLHasValueReducer.class); + Configuration conf = job.getConfiguration(); + String outputCF1 = "stepNO"; + String outputCF2 = "alltriples"; +// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); + conf.set(outputCF1, "step1"); + conf.set(outputCF2, "alltriples"); + ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); + ConfigHelper.setOutputColumnFamily(conf, "step13"); + CqlConfigHelper.setOutputCql(conf, "select * from step1"); + job.waitForCompletion(true); // Get inferred count diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java index 5b02e6f..0462b42 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java @@ -17,9 +17,9 @@ import cn.edu.neu.mitt.mrj.utils.FileUtils; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; - import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; public class OWLSameAsDeconstructMapper extends Mapper { @@ -82,8 +82,8 @@ public void map(TripleSource key, Triple value, Context context) throws IOExcept context.write(oKey, oValue); - //�����oKey��һ����Դ����ν�����п��ܣ�����sameas���͵Ļ��϶���һ����������Ҫ�ֱ���Ϊ��ν�����Σ� - //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte + //�����oKey��һ����Դ����ν�����п��ܣ�����sameas���͵Ļ��϶���һ����������Ҫ�ֱ���Ϊ��ν�����Σ� + //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte ++tripleId; } @@ -92,7 +92,7 @@ public void map(TripleSource key, Triple value, Context context) throws IOExcept @Override public void setup(Context context) { oValue = new BytesWritable(bValue); - + try { String taskId = context.getConfiguration().get("mapred.task.id").substring(context.getConfiguration().get("mapred.task.id").indexOf("_m_") + 3); taskId = taskId.replaceAll("_", ""); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java index fa3135e..8d1a1a5 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java @@ -10,6 +10,7 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.utils.NumberUtils; public class OWLSameAsDeconstructReducer extends Reducer { @@ -17,7 +18,7 @@ public class OWLSameAsDeconstructReducer extends Reducer storage = new LinkedList(); @@ -38,9 +39,9 @@ public void reduce(LongWritable key, Iterable values, Context con byte[] bValue = iValue.getBytes(); // System.out.println("In processing things before storage, size of iValue is: " + iValue.getLength()); // System.out.println("In processing things before storage, size of bValue is: " + bValue.length); - // ���ڲ�һ���ں�ʱ��������bValue[0]=4���Ǹ�key��value�� - // ���������֮ǰ�Ƚ����滻��resource������storage�ﱣ����������һ��������֮��Ϳ���ֱ�ӽ����滻�ˣ���value�滻 - // ���(�����whileѭ��֮��)�ٰ�storage���汣���ֵ�����滻һ�¡� + // ���ڲ�һ���ں�ʱ��������bValue[0]=4���Ǹ�key��value�� + // ���������֮ǰ�Ƚ����滻��resource������storage�ﱣ����������һ��������֮��Ϳ���ֱ�ӽ����滻�ˣ���value�滻 + // ���(�����whileѭ��֮��)�ٰ�storage���汣���ֵ�����滻һ�¡� if (bValue[0] == 4) {//Same as long resource = NumberUtils.decodeLong(bValue, 1); replacement = true; @@ -54,14 +55,14 @@ public void reduce(LongWritable key, Iterable values, Context con byte[] bTempValue = new byte[15+8]; // Added by WuGang System.arraycopy(bValue, 0, bTempValue, 0, 15); // Added by WuGang System.arraycopy(bOriginalResource, 0, bTempValue, 15, 8); // Added by WuGang - iValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource + iValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource context.write(oKey, iValue); countOutput++; context.getCounter("reasoner", "substitutions").increment(1); } } - Iterator itr2 = storage.iterator(); //���storageΪ����˵���������������û�п�����sameas���滻�Ķ��� + Iterator itr2 = storage.iterator(); //���storageΪ����˵���������������û�п�����sameas���滻�Ķ��� while (itr2.hasNext()) { byte[] bValue = itr2.next(); oValue.set(bValue, 0, bValue.length); @@ -70,15 +71,19 @@ public void reduce(LongWritable key, Iterable values, Context con // System.out.println("In processing things in storage, size of bValue is: " + bValue.length); System.arraycopy(bValue, 0, bTempValue, 0, 15); // Added by WuGang System.arraycopy(bOriginalResource, 0, bTempValue, 15, 8); // Added by WuGang - oValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource + oValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource context.write(oKey, oValue); } - //�����oKey��һ����Դ������sameas����������Ķ����滻���ˣ���ʵ�����Ѿ�����sameas�滻����ˣ� - //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte + //�����oKey��һ����Դ������sameas����������Ķ����滻���ˣ���ʵ�����Ѿ�����sameas�滻����ˣ� + //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte if (replacement) { //Increment counter of replacements context.getCounter("reasoner", "substitutions").increment(countOutput + storage.size()); } } + public void setup(Context context) throws IOException, InterruptedException{ + CassandraDB.setConfigLocation(); + + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java index a526c85..ed4b73f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java @@ -29,7 +29,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup /* Source triple: s owl:sameAs o */ long olKey = 0; long olValue = 0; - if (value.getSubject() > value.getObject()) { //key�����Ǵ�ֵ��value����Сֵ + if (value.getSubject() > value.getObject()) { //key�����Ǵ�ֵ��value����Сֵ olKey = value.getSubject(); olValue = value.getObject(); } else { @@ -37,18 +37,21 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup olValue = value.getSubject(); } - // ����С�Ǹ�ֵ��ʶÿһ���� + // ����С�Ǹ�ֵ��ʶÿһ���� oKey.set(olKey); bValue[0] = 0; NumberUtils.encodeLong(bValue, 1, olValue); oValue.set(bValue, 0, bValue.length); - context.write(oKey, oValue); //����key�Ǵ�ֵ��value��Сֵ����������Ե�֪ÿһ��resource�������ĸ��� + context.write(oKey, oValue); //����key�Ǵ�ֵ��value��Сֵ����������Ե�֪ÿһ��resource�������ĸ��� oKey.set(olValue); bValue[0] = 1; NumberUtils.encodeLong(bValue, 1, olKey); oValue.set(bValue, 0, bValue.length); - context.write(oKey, oValue); //����key��Сֵ��value�Ǵ�ֵ����������Ե�֪ÿһ�����а�����Щresource + context.write(oKey, oValue); //����key��Сֵ��value�Ǵ�ֵ����������Ե�֪ÿһ�����а�����Щresource + } + public void setup(Context context) throws IOException{ + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java index 827e360..887503b 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java @@ -8,6 +8,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.utils.NumberUtils; public class OWLSameAsReconstructMapper extends Mapper { @@ -17,25 +18,26 @@ public class OWLSameAsReconstructMapper extends Mapper values, Context co oKey.setDerivation(bKey[12]); int elements = 0; - Iterator itr = values.iterator(); ////�����1�ֽڿ�ʼ����owlsameas�������0�ֽڱ�������ڱ������滻��λ�ã����ܵ�ֵ��0,1,2,3,4��4�DZ�ʾν����owl:sameas������Ϊ��owl:sameas������0��ʾ���1��ʾν�2��3��ʾ��� + Iterator itr = values.iterator(); ////�����1�ֽڿ�ʼ����owlsameas�������0�ֽڱ�������ڱ������滻��λ�ã����ܵ�ֵ��0,1,2,3,4��4�DZ�ʾν����owl:sameas������Ϊ��owl:sameas������0��ʾ���1��ʾν�2��3��ʾ��� while (itr.hasNext()) { elements++; byte[] bValue = itr.next().getBytes(); - long resource = NumberUtils.decodeLong(bValue, 1); //�������owlsameas��������潫���������滻�� - long originalResource = NumberUtils.decodeLong(bValue, 9); // Added by Wugang, �����滻ǰ��resource + long resource = NumberUtils.decodeLong(bValue, 1); //�������owlsameas��������潫���������滻�� + long originalResource = NumberUtils.decodeLong(bValue, 9); // Added by Wugang, �����滻ǰ��resource switch (bValue[0]) { case 0: - oValue.setSubject(resource); //�滻���� - oValue.setRsubject(originalResource); // Added by Wugang, ԭʼ���� + oValue.setSubject(resource); //�滻���� + oValue.setRsubject(originalResource); // Added by Wugang, ԭʼ���� // System.out.println("Replacing subject: " + resource); break; case 1: - oValue.setPredicate(resource); //�滻ν�� - oValue.setRpredicate(originalResource); // Added by Wugang, ԭʼν�� + oValue.setPredicate(resource); //�滻ν�� + oValue.setRpredicate(originalResource); // Added by Wugang, ԭʼν�� // System.out.println("Replacing predicate: " + resource); break; - case 2: //�滻���� - case 3: //�滻���� + case 2: //�滻���� + case 3: //�滻���� if (bValue[0] == 2) oValue.setObjectLiteral(false); else oValue.setObjectLiteral(true); oValue.setObject(resource); - oValue.setRobject(originalResource); // Added by Wugang, ԭʼ���� + oValue.setRobject(originalResource); // Added by Wugang, ԭʼ���� // System.out.println("Replacing object: " + resource); break; default: @@ -61,24 +61,24 @@ public void reduce(BytesWritable key, Iterable values, Context co } if (elements == 3){ - // Added by WuGang, ���rule11 + // Added by WuGang, ���rule11 // oValue.setRsubject(rsubject) if ((oValue.getSubject() == oValue.getRsubject()) && (oValue.getPredicate() == oValue.getRpredicate()) && (oValue.getObject() == oValue.getRobject())) - oValue.setType(TriplesUtils.OWL_HORST_NA); // �滻ǰ��û�б仯������һ��sameasrule + oValue.setType(TriplesUtils.OWL_HORST_NA); // �滻ǰ��û�б仯������һ��sameasrule else { if ((oValue.getPredicate() == TriplesUtils.OWL_SAME_AS) && (oValue.getRpredicate() == TriplesUtils.OWL_SAME_AS)) - oValue.setType(TriplesUtils.OWL_HORST_7); // Ӧ��OWL Horst����7 + oValue.setType(TriplesUtils.OWL_HORST_7); // Ӧ��OWL Horst����7 else - oValue.setType(TriplesUtils.OWL_HORST_11); // Ӧ��OWL - // Horst����11 + oValue.setType(TriplesUtils.OWL_HORST_11); // Ӧ��OWL + // Horst����11 } // System.out.println("Find a complete replacment of triple: " + oValue); - CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context); + CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context, "step10"); // context.write(oKey, oValue); } } @@ -86,5 +86,6 @@ public void reduce(BytesWritable key, Iterable values, Context co @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java index a7988da..192df4b 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java @@ -44,12 +44,12 @@ public void reduce(LongWritable key, Iterable values, Context con BytesWritable value = itr.next(); long lValue = NumberUtils.decodeLong(value.getBytes(), 1); // System.out.println("processing " + lValue + " with the first byte is: " + value.getBytes()[0]); - if (value.getBytes()[0] != 0) { // 1��ÿһ��value����һ����Ա + if (value.getBytes()[0] != 0) { // 1��ÿһ��value����һ����Ա //Store in-memory storage.add(lValue); // System.out.println("Storage size is: " + storage.size()); //} - } else { // 0���ϲ�һ��resource�����ĸ����飨valueֵ�� + } else { // 0���ϲ�һ��resource�����ĸ����飨valueֵ�� // System.out.println("Prepare to repalce: lValue is " + lValue + " and oValue.getSubject() is " + oValue.getSubject()); if (lValue < oValue.getSubject()) { // System.out.println("Hahahahah, I'm here!"); @@ -65,7 +65,7 @@ public void reduce(LongWritable key, Iterable values, Context con long lValue = itr2.next(); if (!duplicates.contains(lValue)) { oValue.setObject(lValue); - CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context); + CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context, "step8"); duplicates.add(lValue); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java index ace1796..2c8aa57 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java @@ -45,6 +45,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup } public void setup(Context context) { + threshold = context.getConfiguration().getInt("reasoner.samplingPercentage", 0); } } \ No newline at end of file diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java index d2c658e..50dfe04 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java @@ -6,6 +6,8 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; + public class OWLSampleResourcesReducer extends Reducer { //private static Logger log = LoggerFactory.getLogger(OWLSampleResourcesReducer.class); @@ -34,6 +36,8 @@ public void reduce(LongWritable key, Iterable values, Context cont @Override public void setup(Context context) { + CassandraDB.setConfigLocation(); + threshold = context.getConfiguration().getInt("reasoner.threshold", 0); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java index 69035e4..09232eb 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java @@ -78,6 +78,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup @Override public void setup(Context context) { + level = context.getConfiguration().getInt("reasoning.transitivityLevel", 0); baseLevel = context.getConfiguration().getInt("reasoning.baseLevel", 0) - 1; minLevel = Math.max(1, (int)Math.pow(2,level - 2)) + baseLevel; diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java index 6b4bd7f..92dd387 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java @@ -95,7 +95,7 @@ public void reduce(BytesWritable key, Iterable values, Context co source.setTransitiveLevel((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel)); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step6"); // System.out.println("In OWLTransitivityReducer: " + triple); } @@ -106,7 +106,6 @@ public void reduce(BytesWritable key, Iterable values, Context co @Override public void setup(Context context) { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - baseLevel = context.getConfiguration().getInt("reasoning.baseLevel", 1) - 1; level = context.getConfiguration().getInt("reasoning.transitivityLevel", -1); // Modified by WuGang 2015-01-28 diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java index 510be7d..5d784c2 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java @@ -2,9 +2,16 @@ import java.io.IOException; +import java.nio.ByteBuffer; import java.util.HashSet; +import java.util.List; import java.util.Set; +import org.apache.cassandra.hadoop.ColumnFamilyOutputFormat; +import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; +import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; import org.apache.cassandra.thrift.InvalidRequestException; import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TimedOutException; @@ -18,13 +25,14 @@ import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.thrift.TException; -import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.reasoner.MapReduceReasonerJobConfig; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; +//import org.apache.hadoop.mapred.lib.MultipleOutputs; public class RDFSReasoner extends Configured implements Tool { @@ -33,7 +41,7 @@ public class RDFSReasoner extends Configured implements Tool { private int numReduceTasks = -1; public static int step = 0; private int lastExecutionPropInheritance = -1; - private int lastExecutionDomRange = -1; + private int lastExecutionDomRange = -1; private void parseArgs(String[] args) { @@ -83,6 +91,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep Job job = null; long derivation = 0; + // RDFS subproperty inheritance reasoning // job = createNewJob("RDFS subproperty inheritance reasoning", "FILTER_ONLY_HIDDEN"); job = MapReduceReasonerJobConfig.createNewJob( @@ -92,7 +101,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep new HashSet(), // Added by WuGang, 2015-07-13 step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 1); job.setMapperClass(RDFSSubPropInheritMapper.class); job.setMapOutputKeyClass(BytesWritable.class); @@ -102,10 +111,38 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep job.getConfiguration().setInt("lastExecution.step", lastExecutionPropInheritance); lastExecutionPropInheritance = step; //TODO: configureOutputJob(job, args[0], "dir-rdfs-derivation/dir-subprop-inherit"); + + Configuration conf = job.getConfiguration(); + String outputCF1 = "stepNO"; + String outputCF2 = "alltriples"; +// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); + conf.set(outputCF1, "step1"); + conf.set(outputCF2, "alltriples"); + ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); + ConfigHelper.setOutputColumnFamily(conf, "step1"); + MrjMultioutput.addNamedOutput(job, conf.get(outputCF1), CqlBulkOutputFormat.class, ByteBuffer.class, List.class); + MrjMultioutput.addNamedOutput(job, conf.get(outputCF2), CqlBulkOutputFormat.class, ByteBuffer.class, List.class); + CqlConfigHelper.setOutputCql(conf, "select * from step1"); + + +// CqlBulkOutputFormat.setColumnFamilySchema(conf, "step1", CassandraDB.getStepsSchema(1)); +// CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF2, CassandraDB.getStepsSchema(1)); +// CqlBulkOutputFormat.setColumnFamilyInsertStatement(conf, "step1", CassandraDB.getAlltripleStatement()); +// CqlBulkOutputFormat.setColumnFamilyInsertStatement(conf, outputCF2, CassandraDB.getAlltripleStatement()); + + + System.out.println("cqlconfig"); + CqlConfigHelper.setOutputCql(conf, "select * from step1"); + System.out.println("set out put cql"); + +// System.out.println(CqlBulkOutputFormat.getColumnFamilyInsertStatement(conf, outputCF1)); +// System.out.println(CqlBulkOutputFormat.getColumnFamilySchema(conf, outputCF1)); + job.waitForCompletion(true); long propInheritanceDerivation = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); derivation += propInheritanceDerivation; + // RDFS subproperty domain and range reasoning // job = createNewJob("RDFS subproperty domain and range reasoning", "FILTER_ONLY_HIDDEN"); @@ -116,13 +153,41 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep new HashSet(), // Added by WuGang, 2015-07-13 step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 2); job.setMapperClass(RDFSSubPropDomRangeMapper.class); job.setMapOutputKeyClass(BytesWritable.class); // Modified by WuGang, 2010-08-26 job.setMapOutputValueClass(LongWritable.class); //job.setPartitionerClass(MyHashPartitioner.class); // Is this ok? seems not necessary job.setReducerClass(RDFSSubpropDomRangeReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); + System.out.println("set map reduce class finished"); + + conf = job.getConfiguration(); + conf.set(outputCF1, "step2"); + conf.set(outputCF2, "alltriples"); + + ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); + ConfigHelper.setOutputColumnFamily(conf, "alltriples"); + + CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF1, CassandraDB.getAlltripleSchema()); + CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF2, CassandraDB.getStepsSchema(2)); + MrjMultioutput.addNamedOutput(job, conf.get(outputCF1), CqlOutputFormat.class, ByteBuffer.class, List.class); + MrjMultioutput.addNamedOutput(job, conf.get(outputCF2), CqlOutputFormat.class, ByteBuffer.class, List.class); + CqlConfigHelper.setOutputCql(conf, "select * from step1"); + +// ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); +// ConfigHelper.setOutputColumnFamily(conf, "step1"); +// MultipleOutputs.addNamedOutput(job, CassandraDB.COLUMNFAMILY_ALLTRIPLES, ColumnFamilyOutputFormat.class, Map.class, List.class); +// MultipleOutputs.addNamedOutput(job, CassandraDB.COLUMNFAMILY_ALLTRIPLES, ColumnFamilyOutputFormat.class, Map.class, List.class); +// MultipleOutputs.addNamedOutput(job, conf.get(outputCF1), ColumnFamilyOutputFormat.class, ByteBuffer.class, List.class); +// MultipleOutputs.addNamedOutput(job, conf.get(outputCF2), ColumnFamilyOutputFormat.class, ByteBuffer.class, List.class); + + + + //MultipleOutputs.addNamedOutput((JobConf) job.getConfiguration() , CassandraDB.COLUMNFAMILY_ALLTRIPLES, ColumnFamilyOutputFormat.class, Map.class, List.class); + //Job jobs = new Job(getConf()); + + //MultipleOutputs.addMultiNamedOutput(conf, namedOutput, outputFormatClass, keyClass, valueClass); job.getConfiguration().setInt("lastExecution.step", lastExecutionDomRange); lastExecutionDomRange = step; //TODO: configureOutputJob(job, args[0], "dir-rdfs-derivation/dir-subprop-domain-range"); @@ -133,7 +198,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep // RDFS cleaning up subprop duplicates // We remove it for simplicity. That means we will not support stop and restart from breakpoints - + //RDFS subclass reasoning // job = createNewJob("RDFS subclass reasoning", "FILTER_ONLY_TYPE_SUBCLASS"); @@ -146,12 +211,23 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep new HashSet(), // Added by WuGang, 2015-07-13 step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 3); job.setMapperClass(RDFSSubclasMapper.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(RDFSSubclasReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); + + conf = job.getConfiguration(); + conf.set(outputCF1, "step3"); + conf.set(outputCF2, "alltriples"); + ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); + ConfigHelper.setOutputColumnFamily(conf, "alltriples"); + MrjMultioutput.addNamedOutput(job, conf.get(outputCF1), CqlOutputFormat.class, ByteBuffer.class, List.class); + MrjMultioutput.addNamedOutput(job, conf.get(outputCF2), CqlOutputFormat.class, ByteBuffer.class, List.class); + CqlConfigHelper.setOutputCql(conf, "select * from step1"); + CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF1, CassandraDB.getAlltripleSchema()); + CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF2, CassandraDB.getStepsSchema(3)); // configureOutputJob(job, args[0], "dir-rdfs-output/dir-subclass-" + step); job.waitForCompletion(true); derivation += job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); @@ -179,13 +255,25 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep new HashSet(), // Added by WuGang, 2015-07-13 step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 4); job.setMapperClass(RDFSSpecialPropsMapper.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(RDFSSpecialPropsReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); + conf = job.getConfiguration(); + conf.set(outputCF1, "step4"); + conf.set(outputCF2, "alltriples"); + ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); + ConfigHelper.setOutputColumnFamily(conf, "alltriples"); + MrjMultioutput.addNamedOutput(job, conf.get(outputCF1), CqlOutputFormat.class, ByteBuffer.class, List.class); + MrjMultioutput.addNamedOutput(job, conf.get(outputCF2), CqlOutputFormat.class, ByteBuffer.class, List.class); + CqlConfigHelper.setOutputCql(conf, "select * from step1"); + + CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF1, CassandraDB.getAlltripleSchema()); + CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF2, CassandraDB.getStepsSchema(4)); + // configureOutputJob(job, args[0], "dir-rdfs-output/dir-special-props-" + step); job.waitForCompletion(true); derivation += job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java index 5439b49..9a8e1b4 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java @@ -87,11 +87,9 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup } @Override - public void setup(Context context) throws IOException { - + public void setup(Context context) throws IOException { try{ CassandraDB db = new CassandraDB(); - db.Index(); if (memberProperties == null) { memberProperties = new HashSet(); Set filters = new HashSet(); @@ -128,15 +126,16 @@ public void setup(Context context) throws IOException { } catch (TException e) { e.printStackTrace(); } + } - protected void cleanup(Context context) throws IOException, InterruptedException{ - try { - CassandraDB db = new CassandraDB(); - db.UnIndex(); - db.CassandraDBClose(); - } catch (Exception e) { - // TODO: handle exception - } - } +// protected void cleanup(Context context) throws IOException, InterruptedException{ +// try { +// CassandraDB db = new CassandraDB(); +// db.UnIndex(); +// db.CassandraDBClose(); +// } catch (Exception e) { +// // TODO: handle exception +// } +// } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java index 2cbd51f..7f595c6 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java @@ -55,7 +55,7 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step4"); // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subproperty of member").increment(1); break; @@ -70,7 +70,7 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_DATATYPE); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step4"); // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subclass of literal").increment(1); break; @@ -86,7 +86,7 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CLASS); context.getCounter("RDFS derived triples", "subclass of resource").increment(1); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step4"); //context.write(source, oTriple); break; case 4: // û�ж�Ӧ��rdfs rule�� @@ -100,7 +100,7 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) else oTriple.setObjectLiteral(true); context.getCounter("RDFS derived triples", "subproperty inheritance of member").increment(1); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step4"); // context.write(source, oTriple); default: break; @@ -111,8 +111,8 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) public void setup(Context context) { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java index 78e1fa4..1f60787 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java @@ -4,6 +4,7 @@ import java.util.HashSet; import java.util.Set; +import org.apache.cassandra.thrift.Cassandra.AsyncProcessor.system_add_column_family; import org.apache.cassandra.thrift.InvalidRequestException; import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TimedOutException; @@ -61,7 +62,6 @@ public void map(Long key, Row row, Context context) throws IOException, Interru return; Triple value = CassandraDB.readJustificationFromMapReduceRow(row); - //Check if the predicate has a domain if (domainSchemaTriples.contains(value.getPredicate())) { NumberUtils.encodeLong(bKey,0,value.getSubject()); // Added by WuGang, 2010-08-26 @@ -73,6 +73,8 @@ public void map(Long key, Row row, Context context) throws IOException, Interru } //Check if the predicate has a range + System.out.println("range " + rangeSchemaTriples); + System.out.println("row " + value); if (rangeSchemaTriples.contains(value.getPredicate()) && !value.isObjectLiteral()) { NumberUtils.encodeLong(bKey,0,value.getObject()); // Added by WuGang, 2010-08-26 @@ -89,23 +91,31 @@ public void map(Long key, Row row, Context context) throws IOException, Interru protected void setup(Context context) throws IOException { hasSchemaChanged = false; previousExecutionStep = context.getConfiguration().getInt("lastExecution.step", -1); - - try{ + + try{ CassandraDB db = new CassandraDB(); + System.out.println("DB "); + System.out.println("domain : " + domainSchemaTriples + " range : " + rangeSchemaTriples); + if (domainSchemaTriples == null) { + System.out.println("domain begin" + previousExecutionStep); domainSchemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_DOMAIN_PROPERTY); hasSchemaChanged = db.loadSetIntoMemory(domainSchemaTriples, filters, previousExecutionStep); + System.out.println("domain end"); + // db not close } if (rangeSchemaTriples == null) { + System.out.println("rangeSchemaTriples begin: " + hasSchemaChanged + previousExecutionStep); rangeSchemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_RANGE_PROPERTY); - + hasSchemaChanged |= db.loadSetIntoMemory(rangeSchemaTriples, filters, previousExecutionStep); - db.CassandraDBClose(); + System.out.println("rangeSchemaTriples end: " + hasSchemaChanged); + db.CassandraDBClose(); } }catch(TTransportException tte){ tte.printStackTrace(); @@ -121,34 +131,16 @@ protected void setup(Context context) throws IOException { e.printStackTrace(); } + // Some debug codes + System.out.println("In mapper setup, peviousExecutionStep= " + previousExecutionStep + " and hasSchemaChanged status: " + hasSchemaChanged); + System.out.println("Input split: " + context.getInputSplit()); try { - CassandraDB db = new CassandraDB(); - db.Index(); - db.CassandraDBClose(); - } catch (Exception e) { - // TODO: handle exception + System.out.println("Input split length: " + context.getInputSplit().getLength()); + } catch (InterruptedException e) { + e.printStackTrace(); } - - // Some debug codes -// System.out.println("In mapper setup, peviousExecutionStep= " + previousExecutionStep + " and hasSchemaChanged status: " + hasSchemaChanged); -// System.out.println("Input split: " + context.getInputSplit()); -// try { -// System.out.println("Input split length: " + context.getInputSplit().getLength()); -// } catch (InterruptedException e) { -// e.printStackTrace(); -// } } - - protected void cleanup(Context context) throws IOException, InterruptedException{ - try { - CassandraDB db = new CassandraDB(); - db.UnIndex(); - db.CassandraDBClose(); - } catch (Exception e) { - // TODO: handle exception - } - } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java index 0f03564..b35abcc 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java @@ -75,7 +75,19 @@ protected void map(Long key, Row row, Context context) throws IOException, Inter protected void setup(Context context) throws IOException { hasSchemaChanged = false; previousExecutionStep = context.getConfiguration().getInt("lastExecution.step", -1); - +// try { //有存在的 直接跳出了~~ 必须放前面---db对象已经声明过的... +// CassandraDB d = new CassandraDB(); +// d.Index(); +// } catch (Exception e) { +// System.out.println("Error in creating Index"); +// } + try { + CassandraDB d = new CassandraDB(); + d.createIndexOnTripleType(); + d.createIndexOnRule(); + } catch (Exception e) { + // TODO: handle exception + } if (subpropSchemaTriples == null) { subpropSchemaTriples = new HashSet(); try { @@ -85,7 +97,9 @@ protected void setup(Context context) throws IOException { hasSchemaChanged = db.loadSetIntoMemory(subpropSchemaTriples, filters, previousExecutionStep); // hasSchemaChanged = FilesTriplesReader.loadSetIntoMemory(subpropSchemaTriples, context, // "FILTER_ONLY_SUBPROP_SCHEMA", previousExecutionStep); - +// System.out.println("AAA"); +// db.createIndexOnInferredSteps(); +// System.out.println("create on inferredsteps"); db.CassandraDBClose(); } catch (TException e) { e.printStackTrace(); @@ -93,23 +107,8 @@ protected void setup(Context context) throws IOException { } else { log.debug("Subprop schema triples already loaded in memory"); } - - try { - CassandraDB db = new CassandraDB(); - db.Index(); - db.CassandraDBClose(); - } catch (Exception e) { - // TODO: handle exception - } + + } - protected void cleanup(Context context) throws IOException, InterruptedException{ - try { - CassandraDB db = new CassandraDB(); - db.UnIndex(); - db.CassandraDBClose(); - } catch (Exception e) { - // TODO: handle exception - } - } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java index 1274ea5..56bd6cb 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java @@ -41,22 +41,16 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup } protected void setup(Context context) throws IOException, InterruptedException{ - try { - CassandraDB db = new CassandraDB(); - db.Index(); - db.CassandraDBClose(); - } catch (Exception e) { - // TODO: handle exception - } - } - protected void cleanup(Context context) throws IOException, InterruptedException{ - try { - CassandraDB db = new CassandraDB(); - db.UnIndex(); - db.CassandraDBClose(); - } catch (Exception e) { - // TODO: handle exception - } + } +// protected void cleanup(Context context) throws IOException, InterruptedException{ +// try { +// CassandraDB db = new CassandraDB(); +// db.UnIndex(); +// db.CassandraDBClose(); +// } catch (Exception e) { +// // TODO: handle exception +// } +// } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java index 64f43f2..ed523f0 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java @@ -55,21 +55,21 @@ private void recursiveScanSuperclasses(long value, Set set) { @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { -// System.out.println("����RDFSSubclasReducer��-"); +// System.out.println("����RDFSSubclasReducer��-"); existingURIs.clear(); Iterator itr = values.iterator(); while (itr.hasNext()) { long value = itr.next().get(); - existingURIs.add(value); //���еı��� + existingURIs.add(value); //���еı��� } Iterator oTypes = existingURIs.iterator(); subclasURIs.clear(); while (oTypes.hasNext()) { long existingURI = oTypes.next(); - recursiveScanSuperclasses(existingURI, subclasURIs); //subclasURIs�����е�subclass + recursiveScanSuperclasses(existingURI, subclasURIs); //subclasURIs�����е�subclass } subclasURIs.removeAll(existingURIs); @@ -103,7 +103,7 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setObject(oType); for (long obj : existingURIs) { oTriple.setRobject(obj); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step3"); // context.write(source, oTriple); } } @@ -112,7 +112,7 @@ public void reduce(BytesWritable key, Iterable values, Context con /* Check special rules */ if ((subclasURIs.contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY) || existingURIs.contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY)) - && !memberProperties.contains(oTriple.getSubject())) { // Rule 12���μ�RDFSSpecialPropsReducer + && !memberProperties.contains(oTriple.getSubject())) { // Rule 12���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBPROPERTY); oTriple.setObject(TriplesUtils.RDFS_MEMBER); // Added by WuGang, 2010-08-26 @@ -121,7 +121,7 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step3"); // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subproperty of member").increment(1); } @@ -130,7 +130,7 @@ public void reduce(BytesWritable key, Iterable values, Context con || existingURIs.contains(TriplesUtils.RDFS_DATATYPE)) { specialSuperclasses.clear(); recursiveScanSuperclasses(oTriple.getSubject(), specialSuperclasses); - if (!specialSuperclasses.contains(TriplesUtils.RDFS_LITERAL)) { // Rule 13���μ�RDFSSpecialPropsReducer + if (!specialSuperclasses.contains(TriplesUtils.RDFS_LITERAL)) { // Rule 13���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); oTriple.setObject(TriplesUtils.RDFS_LITERAL); // Added by WuGang, 2010-08-26 @@ -139,7 +139,7 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_DATATYPE); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step3"); // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subclass of Literal").increment(1); } @@ -149,7 +149,7 @@ public void reduce(BytesWritable key, Iterable values, Context con || existingURIs.contains(TriplesUtils.RDFS_CLASS)) { specialSuperclasses.clear(); recursiveScanSuperclasses(oTriple.getSubject(), specialSuperclasses); - if (!specialSuperclasses.contains(TriplesUtils.RDFS_RESOURCE)) { // Rule 8���μ�RDFSSpecialPropsReducer + if (!specialSuperclasses.contains(TriplesUtils.RDFS_RESOURCE)) { // Rule 8���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); oTriple.setObject(TriplesUtils.RDFS_RESOURCE); // Added by WuGang, 2010-08-26 @@ -158,7 +158,7 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CLASS); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step3"); // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subclass of resource").increment(1); } @@ -227,5 +227,6 @@ public void setup(Context context) throws IOException { source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java index 18fa915..7d21597 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java @@ -4,20 +4,32 @@ import java.nio.ByteBuffer; import java.util.AbstractMap; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; +import org.apache.cassandra.thrift.Column; +import org.apache.cassandra.thrift.ColumnOrSuperColumn; import org.apache.cassandra.thrift.InvalidRequestException; +import org.apache.cassandra.thrift.Mutation; import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TimedOutException; import org.apache.cassandra.thrift.UnavailableException; +import org.apache.cassandra.thrift.Cassandra.AsyncProcessor.system_add_column_family; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -30,7 +42,7 @@ import cn.edu.neu.mitt.mrj.utils.TriplesUtils; //public class RDFSSubpropDomRangeReducer extends Reducer { -public class RDFSSubpropDomRangeReducer extends Reducer, List> { +public class RDFSSubpropDomRangeReducer extends Reducer, List> { protected static Logger log = LoggerFactory.getLogger(RDFSSubpropDomRangeReducer.class); @@ -42,86 +54,116 @@ public class RDFSSubpropDomRangeReducer extends Reducer values, Context context) throws IOException, InterruptedException { - byte[] bKey = key.getBytes(); // Added by Wugang, 2010-08-26 -// long uri = key.get(); //��domain���ԣ���s����range���ԣ���o - long uri = NumberUtils.decodeLong(bKey, 0); //��domain������s����range������o - long uri_opposite = NumberUtils.decodeLong(bKey, 8); //��domain������o����range������s - - derivedProps.clear(); //���x - - //Logger logger = LoggerFactory.getLogger(CassandraDB.class); - //long time = System.currentTimeMillis(); - - //Get the predicates with a range or domain associated to this URIs - propURIs.clear(); - Iterator itr = values.iterator(); - while (itr.hasNext()) - propURIs.add(itr.next().get()); //���p - - - //logger.info("add " + (System.currentTimeMillis() - time)); - - Iterator itrProp = propURIs.iterator(); - while (itrProp.hasNext()) { - Collection objects = null; - long propURI = itrProp.next(); - if ((propURI & 0x1) == 1) { - objects = rangeSchemaTriples.get(propURI >> 1); - context.getCounter("derivation", "range matches").increment(1); - } else { - objects = domainSchemaTriples.get(propURI >> 1); - context.getCounter("derivation", "domain matches").increment(1); - } - - if (objects != null) { - Iterator itr3 = objects.iterator(); - while (itr3.hasNext()) -// derivedProps.add(itr3.next()); - derivedProps.add(new AbstractMap.SimpleEntry(itr3.next(), propURI)); // Modified by WuGang, 2010-08-26 - } + byte[] bKey = key.getBytes(); // Added by Wugang, 2010-08-26 + // long uri = key.get(); //��domain���ԣ���s����range���ԣ���o + long uri = NumberUtils.decodeLong(bKey, 0); // ��domain������s����range������o + long uri_opposite = NumberUtils.decodeLong(bKey, 8); // ��domain������o����range������s + + Configuration conf = context.getConfiguration(); + derivedProps.clear(); // ���x + + Logger logger = LoggerFactory.getLogger(CassandraDB.class); + long time = System.currentTimeMillis(); + + // Get the predicates with a range or domain associated to this URIs + propURIs.clear(); + Iterator itr = values.iterator(); + while (itr.hasNext()) + propURIs.add(itr.next().get()); // ���p + +// logger.info("while1 " + (System.currentTimeMillis() - time)); + System.out.println("while1 " + (System.currentTimeMillis() - time)); + + Iterator itrProp = propURIs.iterator(); + while (itrProp.hasNext()) { + Collection objects = null; + long propURI = itrProp.next(); + if ((propURI & 0x1) == 1) { + objects = rangeSchemaTriples.get(propURI >> 1); + context.getCounter("derivation", "range matches").increment(1); + } else { + objects = domainSchemaTriples.get(propURI >> 1); + context.getCounter("derivation", "domain matches").increment(1); } - - //logger.info("loop " + (System.currentTimeMillis() - time)); - - //Derive the new statements -// Iterator itr2 = derivedProps.iterator(); - Iterator> itr2 = derivedProps.iterator(); // Modified by WuGang, 2010-08-26 - oTriple.setSubject(uri); - oTriple.setPredicate(TriplesUtils.RDF_TYPE); - oTriple.setObjectLiteral(false); - while (itr2.hasNext()) { -// oTriple.setObject(itr2.next()); - Entry entry = itr2.next(); - oTriple.setObject(entry.getKey()); // Modified by WuGang, 2010-08-26 - // Added by WuGang, 2010-08-26 - long propURI = entry.getValue(); - oTriple.setRpredicate(propURI >> 1); // Modified by WuGang 2010-12-03����RDFSSubPropDomRangeMapper�������ˣ����ڱ��������ƻ��� - if ((propURI & 0x1) == 1) { // Rule 3, for range - oTriple.setType(TriplesUtils.RDFS_3); - oTriple.setRsubject(uri_opposite); - oTriple.setRobject(uri); - }else{ // Rule 2, for domain - oTriple.setType(TriplesUtils.RDFS_2); - oTriple.setRsubject(uri); - oTriple.setRobject(uri_opposite); - } - - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); - //context.write(source, oTriple); + + if (objects != null) { + Iterator itr3 = objects.iterator(); + while (itr3.hasNext()) + // derivedProps.add(itr3.next()); + derivedProps.add(new AbstractMap.SimpleEntry( + itr3.next(), propURI)); // Modified by WuGang, + // 2010-08-26 } - //logger.info(" " + (System.currentTimeMillis() - time)); - - context.getCounter("RDFS derived triples", "subprop range and domain rule").increment(derivedProps.size()); - //logger.info("finish " + (System.currentTimeMillis() - time)); - } + } + +// logger.info("while2 " + (System.currentTimeMillis() - time)); + time = System.currentTimeMillis(); + System.out.println("while2 " + (System.currentTimeMillis() - time)); + + // Derive the new statements + // Iterator itr2 = derivedProps.iterator(); + Iterator> itr2 = derivedProps.iterator(); // Modified + // by + // WuGang, + // 2010-08-26 + oTriple.setSubject(uri); + oTriple.setPredicate(TriplesUtils.RDF_TYPE); + oTriple.setObjectLiteral(false); + while (itr2.hasNext()) { + // oTriple.setObject(itr2.next()); + Entry entry = itr2.next(); + oTriple.setObject(entry.getKey()); // Modified by WuGang, 2010-08-26 + // Added by WuGang, 2010-08-26 + long propURI = entry.getValue(); + oTriple.setRpredicate(propURI >> 1); // Modified by WuGang + // 2010-12-03����RDFSSubPropDomRangeMapper�������ˣ����ڱ��������ƻ��� + if ((propURI & 0x1) == 1) { // Rule 3, for range + oTriple.setType(TriplesUtils.RDFS_3); + oTriple.setRsubject(uri_opposite); + oTriple.setRobject(uri); + } else { // Rule 2, for domain + oTriple.setType(TriplesUtils.RDFS_2); + oTriple.setRsubject(uri); + oTriple.setRobject(uri_opposite); + } + + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, + context, "step2"); +// logger.info("write " + (System.currentTimeMillis() - time)); + time = System.currentTimeMillis(); + System.out.println("finish " + (System.currentTimeMillis() - time)); + // CassandraDB.writealltripleToMapReduceContext(oTriple, source, + // context); + // context.write(source, oTriple); + + // _output.write(conf.get(CassandraDB.COLUMNFAMILY_ALLTRIPLES), + // ByteBufferUtil.bytes(key.toString()), + // Collections.singletonList(m)); + // Reporter reporter = null ; + // _output.getCollector(CassandraDB.COLUMNFAMILY_ALLTRIPLES, + // reporter).collect(key, arg1);; + } + + // logger.info(" " + (System.currentTimeMillis() - time)); + context.getCounter("RDFS derive triples", + "subprop range and domain rule").increment(derivedProps.size()); + // logger.info("finish " + (System.currentTimeMillis() - time)); + // Mutation m = new Mutation(); + } + + @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - +// _output = new MultipleOutputs(context); +// outputKey = ByteBufferUtil.bytes(context.getConfiguration().get(CassandraDB.COLUMNFAMILY_ALLTRIPLES)); try{ CassandraDB db = new CassandraDB(); if (domainSchemaTriples == null) { @@ -152,5 +194,6 @@ public void setup(Context context) throws IOException { source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java index 6f295a1..db289d2 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java @@ -9,6 +9,8 @@ import java.util.Map; import java.util.Set; +import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; +import org.apache.cassandra.thrift.Cassandra.AsyncProcessor.system_add_column_family; import org.apache.cassandra.thrift.InvalidRequestException; import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TimedOutException; @@ -92,8 +94,10 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setPredicate(itr3.next()); for (LongWritable pre : values) { oTriple.setRpredicate(pre.get()); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); + System.out.println("before wj"); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step1"); + System.out.println("after wj"); + // context.write(source, oTriple); } } @@ -124,12 +128,14 @@ public void reduce(BytesWritable key, Iterable values, Context con // oTriple.setObject(itr4.next()); // context.write(source, oTriple); // } - // Modified by WuGang, 2010-08-26 + // Modified by WuGang, 2010-08-26 while (itr4.hasNext()) { oTriple.setObject(itr4.next()); for(LongWritable obj:values){ oTriple.setRobject(obj.get()); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + System.out.println("before wj"); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step1"); + System.out.println("before wj"); // context.write(source, oTriple); } } @@ -145,6 +151,9 @@ public void reduce(BytesWritable key, Iterable values, Context con @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. +// System.out.println("reduce setup"); +// CqlBulkOutputFormat.setColumnFamilySchema(context.getConfiguration(), CassandraDB.KEYSPACE + ".step1", CassandraDB.getStepsSchema(1)); +// System.out.println(CqlBulkOutputFormat.getColumnFamilySchema(context.getConfiguration(), CassandraDB.COLUMNFAMILY_ALLTRIPLES + "step1")); if (subpropSchemaTriples == null) { CassandraDB db; @@ -154,7 +163,6 @@ public void setup(Context context) throws IOException { filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); subpropSchemaTriples = db.loadMapIntoMemory(filters); // subpropSchemaTriples = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_SUBPROP_SCHEMA", context); - db.CassandraDBClose(); } catch (TTransportException e) { e.printStackTrace(); @@ -177,6 +185,8 @@ public void setup(Context context) throws IOException { source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); oTriple2.setPredicate(TriplesUtils.RDF_TYPE); - oTriple2.setObjectLiteral(false); + oTriple2.setObjectLiteral(false); + } + } From b24c8b41ba8364f612166c1282e4981fb0a580ea Mon Sep 17 00:00:00 2001 From: Joe Date: Sat, 12 Dec 2015 10:08:00 +0800 Subject: [PATCH 10/16] changed by wu Signed-off-by: Joe --- .../edu/neu/mitt/mrj/io/dbs/CassandraDB.java | 190 +++++++++++++++--- .../neu/mitt/mrj/io/dbs/MrjMultioutput.java | 28 +++ .../reasoner/MapReduceReasonerJobConfig.java | 10 +- .../mitt/mrj/reasoner/rdfs/RDFSReasoner.java | 23 +-- .../rdfs/RDFSSubpropDomRangeReducer.java | 33 +-- 5 files changed, 224 insertions(+), 60 deletions(-) diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index e0e59b8..ddc44de 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -22,11 +22,8 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.UUID; import org.apache.cassandra.exceptions.RequestExecutionException; -import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; -import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; import org.apache.cassandra.thrift.Cassandra; import org.apache.cassandra.thrift.Column; import org.apache.cassandra.thrift.Compression; @@ -42,6 +39,7 @@ import org.apache.cassandra.thrift.UnavailableException; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.hadoop.mapreduce.Reducer.Context; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.protocol.TProtocol; @@ -52,12 +50,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import ch.qos.logback.classic.db.DBAppender; import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Cluster.Builder; import com.datastax.driver.core.ResultSet; import com.datastax.driver.core.Row; import com.datastax.driver.core.Session; @@ -66,13 +64,8 @@ import com.datastax.driver.core.Statement; //modified cassandra java 2.0.5 import com.datastax.driver.core.TupleValue; -import com.datastax.driver.core.Cluster.Builder; -import com.datastax.driver.core.querybuilder.Delete.Where; -import com.datastax.driver.core.querybuilder.Insert; import com.datastax.driver.core.querybuilder.QueryBuilder; //modified -import com.datastax.driver.core.querybuilder.Select; -import com.datastax.driver.core.utils.UUIDs; /** @@ -196,26 +189,58 @@ public static String getAlltripleSchema(){ COLUMN_INFERRED_STEPS + " int, " + "PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + "," + COLUMN_OBJ + ")) )"; - return (ALLTRIPLE_SCHEMA); + return ALLTRIPLE_SCHEMA; } public static String getStepsSchema(Integer step){ - String query = "CREATE TABLE " + CassandraDB.KEYSPACE + ".step" + step + + String STEPS_SCHEMA = "CREATE TABLE " + CassandraDB.KEYSPACE + ".step" + step + " ( " + - "sub" + " bigint, " + - "pre" + " bigint, " + - "obj" + " bigint, " + - "rule int, " + - "v1" + " bigint, " + - "v2" + " bigint, " + - "v3" + " bigint, " + - "transitivelevel int" + - ", primary key((sub, pre, obj, rule) ,v1, v2, v3 ))"; + COLUMN_SUB + " bigint, " + + COLUMN_PRE + " bigint, " + + COLUMN_OBJ + " bigint, " + + COLUMN_RULE + " int, " + + COLUMN_V1 + " bigint, " + + COLUMN_V2 + " bigint, " + + COLUMN_V3 + " bigint, " + + COLUMN_TRANSITIVE_LEVELS + " int, " + + "PRIMARY KEY((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_RULE + + "), " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + "))"; + return STEPS_SCHEMA; + } + + public static String getStepsSchema(String cfName){ + String STEPS_SCHEMA = "CREATE TABLE " + CassandraDB.KEYSPACE + "." + cfName + + " ( " + + COLUMN_SUB + " bigint, " + + COLUMN_PRE + " bigint, " + + COLUMN_OBJ + " bigint, " + + COLUMN_RULE + " int, " + + COLUMN_V1 + " bigint, " + + COLUMN_V2 + " bigint, " + + COLUMN_V3 + " bigint, " + + COLUMN_TRANSITIVE_LEVELS + " int, " + + "PRIMARY KEY((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_RULE + + "), " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + "))"; + return STEPS_SCHEMA; + } + + public static String getStepsStatement(int step){ + String query = "INSERT INTO " + CassandraDB.KEYSPACE + ".step" + step + + " (sub, pre, obj, rule, v1, v2, v3, transitivelevel) VALUES(?, ?, ?, ?, ?, ?, ?, ?)"; + return query; + } + + public static String getStepsStatement(String cfName){ + String query = "INSERT INTO " + CassandraDB.KEYSPACE + "." + cfName + + " (sub, pre, obj, rule, v1, v2, v3, transitivelevel) VALUES(?, ?, ?, ?, ?, ?, ?, ?)"; return query; } public static String getAlltripleStatement(){ - return ("INSERT INTO mrjks.alltriples () VALUES(?, ?)"); + return ("INSERT INTO " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + + " (sub, pre, obj, isliteral, tripletype, inferredsteps) VALUES(?, ?, ?, ?, ?, ?)"); } private static void setupTables(Cassandra.Iface client) @@ -560,6 +585,90 @@ public static int readStepFromMapReduceRow(Row row){ return step; } + public static void writeJustificationToMapReduceMultipleOutputs( + Triple triple, + TripleSource source, + MultipleOutputs output, + String stepname) throws IOException, InterruptedException{ + Map keys = new LinkedHashMap(); + Map allkeys = new LinkedHashMap(); + List allvariables = new ArrayList(); + long time = System.currentTimeMillis(); + + byte one = 1; + byte zero = 0; + // Prepare composite key (sub, pre, obj) + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + // the length of boolean type in cassandra is one byte!!!!!!!! + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long + + allkeys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + allkeys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + allkeys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + + allvariables.add(ByteBufferUtil.bytes(source.getStep())); + allvariables.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + allvariables.add(ByteBufferUtil.bytes((int)triple.getType())); + + // Prepare variables + List variables = new ArrayList(); +// variables.add(ByteBufferUtil.bytes(oValue.getSubject())); + // the length of boolean type in cassandra is one byte!!!!!!!! + // For column inferred, init it as false i.e. zero + //variables.add(ByteBuffer.wrap(new byte[]{zero})); + + variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); + + + + // Keys are not used for + // CqlBulkRecordWriter.write(Object key, List values), + // so it can be set to null. + // Only values are used there where the value correspond to + // the insert statement set in CqlBulkOutputFormat.setColumnFamilyInsertStatement() + // All triples columnfamily: + // sub, pre, obj, isliteral, tripletype, inferredsteps + // Steps columnfamily: + // sub, pre, obj, rule, v1, v2, v3, transitivelevel + + List allTValues = new ArrayList(); + allTValues.add(ByteBufferUtil.bytes(triple.getSubject())); + allTValues.add(ByteBufferUtil.bytes(triple.getPredicate())); + allTValues.add(ByteBufferUtil.bytes(triple.getObject())); + allTValues.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + allTValues.add(ByteBufferUtil.bytes( + TriplesUtils.getTripleType( + source, triple.getSubject(), + triple.getPredicate(), + triple.getObject()))); + allTValues.add(ByteBufferUtil.bytes((int)source.getStep())); + + List stepsValues = new ArrayList(); + stepsValues.add(ByteBufferUtil.bytes(triple.getSubject())); + stepsValues.add(ByteBufferUtil.bytes(triple.getPredicate())); + stepsValues.add(ByteBufferUtil.bytes(triple.getObject())); + stepsValues.add(ByteBufferUtil.bytes((int)triple.getType())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRsubject())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRpredicate())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRobject())); + stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel())); + + time = System.currentTimeMillis(); +// _output.write(stepname, keys, variables); + output.write(stepname, null, stepsValues); + System.out.println("wrote steps" + (System.currentTimeMillis() - time)); + time = System.currentTimeMillis(); + output.write("alltriples", null, allTValues); +// System.out.println("wrote all " + (System.currentTimeMillis() - time)); + System.out.println("write all " + (System.currentTimeMillis() - time)); + + } + public static void writeJustificationToMapReduceContext( Triple triple, TripleSource source, @@ -603,12 +712,45 @@ public static void writeJustificationToMapReduceContext( + // Keys are not used for + // CqlBulkRecordWriter.write(Object key, List values), + // so it can be set to null. + // Only values are used there where the value correspond to + // the insert statement set in CqlBulkOutputFormat.setColumnFamilyInsertStatement() + // All triples columnfamily: + // sub, pre, obj, isliteral, tripletype, inferredsteps + // Steps columnfamily: + // sub, pre, obj, rule, v1, v2, v3, transitivelevel + + List allTValues = new ArrayList(); + allTValues.add(ByteBufferUtil.bytes(triple.getSubject())); + allTValues.add(ByteBufferUtil.bytes(triple.getPredicate())); + allTValues.add(ByteBufferUtil.bytes(triple.getObject())); + allTValues.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + allTValues.add(ByteBufferUtil.bytes( + TriplesUtils.getTripleType( + source, triple.getSubject(), + triple.getPredicate(), + triple.getObject()))); + allTValues.add(ByteBufferUtil.bytes((int)source.getStep())); + + List stepsValues = new ArrayList(); + stepsValues.add(ByteBufferUtil.bytes(triple.getSubject())); + stepsValues.add(ByteBufferUtil.bytes(triple.getPredicate())); + stepsValues.add(ByteBufferUtil.bytes(triple.getObject())); + stepsValues.add(ByteBufferUtil.bytes((int)triple.getType())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRsubject())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRpredicate())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRobject())); + stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel())); time = System.currentTimeMillis(); - _output.write(stepname, keys, variables); - System.out.println("write step" + (System.currentTimeMillis() - time)); +// _output.write(stepname, keys, variables); + _output.write(stepname, null, stepsValues); + System.out.println("wrote steps" + (System.currentTimeMillis() - time)); time = System.currentTimeMillis(); - _output.write("alltriples", allkeys, allvariables); + _output.write("alltriples", null, allTValues); +// System.out.println("wrote all " + (System.currentTimeMillis() - time)); System.out.println("write all " + (System.currentTimeMillis() - time)); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java index 5202fc7..a9eda58 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java @@ -50,6 +50,18 @@ protected synchronized RecordWriter getRecordWriter( ConfigHelper.setOutputColumnFamily(taskContext.getConfiguration(), columnFamilyNameName); CqlConfigHelper.setOutputCql(taskContext.getConfiguration(), getCql(columnFamilyNameName)); + CqlBulkOutputFormat.setColumnFamilySchema( + taskContext.getConfiguration(), + columnFamilyNameName, + getSchema(columnFamilyNameName)); + + CqlBulkOutputFormat.setColumnFamilyInsertStatement( + taskContext.getConfiguration(), + columnFamilyNameName, + getInsertStatement(columnFamilyNameName)); + + + try { System.out.println(taskContext.getOutputFormatClass()); writer = ((OutputFormat) ReflectionUtils.newInstance( @@ -83,4 +95,20 @@ String getCql(String columnFamilyNameName){ return("UPDATE " + columnFamilyNameName + " SET transitivelevel =? "); } + String getSchema(String columnFamilyNameName){ + System.out.println(columnFamilyNameName + " schema"); + if (columnFamilyNameName == "alltriples") { + return CassandraDB.getAlltripleSchema(); + } + return CassandraDB.getStepsSchema(columnFamilyNameName); + } + + String getInsertStatement(String columnFamilyNameName){ + System.out.println(columnFamilyNameName + " insert statement"); + if (columnFamilyNameName == "alltriples") { + return CassandraDB.getAlltripleStatement(); + } + return CassandraDB.getStepsStatement(columnFamilyNameName); + } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java index a3b4edc..cd93745 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java @@ -176,14 +176,16 @@ else if (typeFilters.size() == 1){ // Output to CassandraDB.COLUMNFAMILY_JUSTIFICATIONS - private static void configureCassandraOutput(Job job, Integer step) { + private static void configureCassandraOutput(Job job, int step) { //Set the output job.setOutputKeyClass(Map.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(CqlBulkOutputFormat.class); - CqlBulkOutputFormat.setColumnFamilySchema(job.getConfiguration(), CassandraDB.KEYSPACE + ".step1", CassandraDB.getStepsSchema(1)); - System.out.println("Schema : " + CassandraDB.getStepsSchema(1)); + CqlBulkOutputFormat.setColumnFamilySchema(job.getConfiguration(), "step" + step, CassandraDB.getStepsSchema(step)); + System.out.println("Schema we set: " + CassandraDB.getStepsSchema(step)); + System.out.println("Schema we get: " + CqlBulkOutputFormat.getColumnFamilySchema(job.getConfiguration(), "step"+step)); + CqlBulkOutputFormat.setColumnFamilyInsertStatement(job.getConfiguration(), "step"+step, CassandraDB.getStepsStatement(step)); // job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); @@ -196,7 +198,7 @@ private static void configureCassandraOutput(Job job, Integer step) { * addMultiNamedOutput * */ - JobConf jobconf = new JobConf(job.getConfiguration()); +// JobConf jobconf = new JobConf(job.getConfiguration()); // MultipleOutputs.addNamedOutput(jobconf, "step" + step, ColumnFamilyOutputFormat.class, ByteBuffer.class, List.class); // MultipleOutputs.addNamedOutput(jobconf, CassandraDB.COLUMNFAMILY_ALLTRIPLES, ColumnFamilyOutputFormat.class, ByteBuffer.class, List.class); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java index 5d784c2..b3c4f20 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java @@ -118,23 +118,12 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep // conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); conf.set(outputCF1, "step1"); conf.set(outputCF2, "alltriples"); - ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); ConfigHelper.setOutputColumnFamily(conf, "step1"); MrjMultioutput.addNamedOutput(job, conf.get(outputCF1), CqlBulkOutputFormat.class, ByteBuffer.class, List.class); MrjMultioutput.addNamedOutput(job, conf.get(outputCF2), CqlBulkOutputFormat.class, ByteBuffer.class, List.class); CqlConfigHelper.setOutputCql(conf, "select * from step1"); -// CqlBulkOutputFormat.setColumnFamilySchema(conf, "step1", CassandraDB.getStepsSchema(1)); -// CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF2, CassandraDB.getStepsSchema(1)); -// CqlBulkOutputFormat.setColumnFamilyInsertStatement(conf, "step1", CassandraDB.getAlltripleStatement()); -// CqlBulkOutputFormat.setColumnFamilyInsertStatement(conf, outputCF2, CassandraDB.getAlltripleStatement()); - - - System.out.println("cqlconfig"); - CqlConfigHelper.setOutputCql(conf, "select * from step1"); - System.out.println("set out put cql"); - // System.out.println(CqlBulkOutputFormat.getColumnFamilyInsertStatement(conf, outputCF1)); // System.out.println(CqlBulkOutputFormat.getColumnFamilySchema(conf, outputCF1)); @@ -167,13 +156,13 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep conf.set(outputCF2, "alltriples"); ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); - ConfigHelper.setOutputColumnFamily(conf, "alltriples"); + ConfigHelper.setOutputColumnFamily(conf, "step2"); - CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF1, CassandraDB.getAlltripleSchema()); - CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF2, CassandraDB.getStepsSchema(2)); - MrjMultioutput.addNamedOutput(job, conf.get(outputCF1), CqlOutputFormat.class, ByteBuffer.class, List.class); - MrjMultioutput.addNamedOutput(job, conf.get(outputCF2), CqlOutputFormat.class, ByteBuffer.class, List.class); - CqlConfigHelper.setOutputCql(conf, "select * from step1"); +// CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF1, CassandraDB.getAlltripleSchema()); +// CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF2, CassandraDB.getStepsSchema(2)); + MrjMultioutput.addNamedOutput(job, conf.get(outputCF1), CqlBulkOutputFormat.class, ByteBuffer.class, List.class); + MrjMultioutput.addNamedOutput(job, conf.get(outputCF2), CqlBulkOutputFormat.class, ByteBuffer.class, List.class); + CqlConfigHelper.setOutputCql(conf, "select * from step2"); // ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); // ConfigHelper.setOutputColumnFamily(conf, "step1"); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java index 7d21597..551bf5f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java @@ -4,30 +4,20 @@ import java.nio.ByteBuffer; import java.util.AbstractMap; import java.util.Collection; -import java.util.Collections; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; -import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; -import org.apache.cassandra.thrift.Column; -import org.apache.cassandra.thrift.ColumnOrSuperColumn; import org.apache.cassandra.thrift.InvalidRequestException; -import org.apache.cassandra.thrift.Mutation; import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TimedOutException; import org.apache.cassandra.thrift.UnavailableException; -import org.apache.cassandra.thrift.Cassandra.AsyncProcessor.system_add_column_family; -import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; @@ -38,11 +28,12 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; //public class RDFSSubpropDomRangeReducer extends Reducer { -public class RDFSSubpropDomRangeReducer extends Reducer, List> { +public class RDFSSubpropDomRangeReducer extends Reducer, List> { protected static Logger log = LoggerFactory.getLogger(RDFSSubpropDomRangeReducer.class); @@ -54,7 +45,7 @@ public class RDFSSubpropDomRangeReducer extends Reducer values, Context con oTriple.setRobject(uri_opposite); } - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, - context, "step2"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, + _output, "step2"); // logger.info("write " + (System.currentTimeMillis() - time)); time = System.currentTimeMillis(); System.out.println("finish " + (System.currentTimeMillis() - time)); @@ -162,7 +153,7 @@ public void reduce(BytesWritable key, Iterable values, Context con @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. -// _output = new MultipleOutputs(context); + _output = new MrjMultioutput, List>(context); // outputKey = ByteBufferUtil.bytes(context.getConfiguration().get(CassandraDB.COLUMNFAMILY_ALLTRIPLES)); try{ CassandraDB db = new CassandraDB(); @@ -196,4 +187,16 @@ public void setup(Context context) throws IOException { source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); } + + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + // TODO Auto-generated method stub + super.cleanup(context); + _output.close(); + } + + } From 5a2c2702d6a1ac14d8078753fd0a96d195d27fbb Mon Sep 17 00:00:00 2001 From: Joe Date: Sun, 13 Dec 2015 19:49:49 +0800 Subject: [PATCH 11/16] shouldn't changed Signed-off-by: li --- mrj-0.1/.classpath | 4 ++-- .../mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/mrj-0.1/.classpath b/mrj-0.1/.classpath index 62a1d79..b1b26f7 100644 --- a/mrj-0.1/.classpath +++ b/mrj-0.1/.classpath @@ -6,7 +6,7 @@ - - + + diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java index 551bf5f..fef05d5 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java @@ -193,10 +193,13 @@ public void setup(Context context) throws IOException { protected void cleanup( Reducer, List>.Context context) throws IOException, InterruptedException { - // TODO Auto-generated method stub - super.cleanup(context); + _output.close(); + + super.cleanup(context); } + + } From 9b3e6908b4c62ef1d986f36ddef863b974831b90 Mon Sep 17 00:00:00 2001 From: Joe Date: Tue, 22 Dec 2015 11:22:28 +0800 Subject: [PATCH 12/16] Change the result Signed-off-by: Joe --- .../edu/neu/mitt/mrj/io/dbs/CassandraDB.java | 151 ++++++------------ .../edu/neu/mitt/mrj/io/dbs/CreateTables.java | 66 ++++++++ .../neu/mitt/mrj/io/dbs/MrjMultioutput.java | 37 ++--- .../reasoner/MapReduceReasonerJobConfig.java | 40 ++--- .../reasoner/owl/OWLAllSomeValuesReducer.java | 6 +- .../owl/OWLEquivalenceSCSPReducer.java | 14 +- .../mrj/reasoner/owl/OWLHasValueReducer.java | 8 +- .../reasoner/owl/OWLNotRecursiveReducer.java | 16 +- .../mitt/mrj/reasoner/owl/OWLReasoner.java | 92 +---------- .../owl/OWLSameAsReconstructReducer.java | 8 +- .../mrj/reasoner/owl/OWLSameAsReducer.java | 8 +- .../reasoner/owl/OWLTransitivityReducer.java | 6 +- .../mitt/mrj/reasoner/rdfs/RDFSReasoner.java | 68 +------- .../rdfs/RDFSSpecialPropsReducer.java | 20 ++- .../rdfs/RDFSSubPropInheritMapper.java | 18 ++- .../mrj/reasoner/rdfs/RDFSSubclasReducer.java | 141 +++++++++------- .../rdfs/RDFSSubpropDomRangeReducer.java | 6 +- .../rdfs/RDFSSubpropInheritReducer.java | 31 ++-- 18 files changed, 336 insertions(+), 400 deletions(-) create mode 100644 mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CreateTables.java diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index ddc44de..03ff52b 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -74,7 +74,7 @@ */ public class CassandraDB { private static final Logger logger = LoggerFactory.getLogger(CassandraDB.class); - public static final String KEYSPACE = "mrjkss"; // mr.j keyspace + public static final String KEYSPACE = "mrjks31"; // mr.j keyspace public static final String COLUMNFAMILY_JUSTIFICATIONS = "justifications"; // mr.j keyspace public static final String COLUMNFAMILY_RESOURCES = "resources"; // mr.j keyspace public static final String COLUMNFAMILY_RESULTS = "results"; // mr.j keyspace @@ -351,25 +351,6 @@ private static void setupTables(Cassandra.Iface client) logger.error("failed to create table " + KEYSPACE + "." + "AllTriples", e); } - for (int step = 1; step <= 2; step++) { - query = "CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + ".step" + step + - " ( " + - "sub" + " bigint, " + - "pre" + " bigint, " + - "obj" + " bigint, " + - "rule int, " + - "v1" + " bigint, " + - "v2" + " bigint, " + - "v3" + " bigint, " + - "transitivelevel int" + - ", primary key((sub, pre, obj, rule) ,v1, v2, v3 ))"; - try { - logger.info("set up table " + "step"); - client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); - } catch (InvalidRequestException e) { - logger.error("failed to create table " + KEYSPACE + "." + "step", e); - } - } query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + "(" + COLUMN_TRIPLE_TYPE + ")"; client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); @@ -659,16 +640,16 @@ public static void writeJustificationToMapReduceMultipleOutputs( stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel())); time = System.currentTimeMillis(); -// _output.write(stepname, keys, variables); - output.write(stepname, null, stepsValues); - System.out.println("wrote steps" + (System.currentTimeMillis() - time)); - time = System.currentTimeMillis(); - output.write("alltriples", null, allTValues); + output.write(CassandraDB.COLUMNFAMILY_ALLTRIPLES, null, allTValues); // System.out.println("wrote all " + (System.currentTimeMillis() - time)); - System.out.println("write all " + (System.currentTimeMillis() - time)); +// System.out.println("write all " + (System.currentTimeMillis() - time));// _output.write(stepname, keys, variables); + time = System.currentTimeMillis(); + output.write(stepname, null, stepsValues); +// System.out.println("wrote steps" + (System.currentTimeMillis() - time)); + } - + public static void writeJustificationToMapReduceContext( Triple triple, TripleSource source, @@ -681,80 +662,37 @@ public static void writeJustificationToMapReduceContext( byte one = 1; byte zero = 0; - MrjMultioutput _output; - _output = new MrjMultioutput, List>(context); + // Prepare composite key (sub, pre, obj) keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); // the length of boolean type in cassandra is one byte!!!!!!!! + keys.put(CassandraDB.COLUMN_IS_LITERAL, + triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + int tripletype = TriplesUtils.DATA_TRIPLE; + if (triple.getType()==TriplesUtils.OWL_HORST_SYNONYMS_TABLE){ + tripletype = TriplesUtils.SYNONYMS_TABLE; // In this way, we can provide a special triple type for triples in synonyms table + }else{ + tripletype = TriplesUtils.getTripleType(source, triple.getSubject(), triple.getPredicate(), triple.getObject()); + } + keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(tripletype)); // Modified by WuGang 20150109 keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long - allkeys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); - allkeys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); - allkeys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); - - allvariables.add(ByteBufferUtil.bytes(source.getStep())); - allvariables.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); - allvariables.add(ByteBufferUtil.bytes((int)triple.getType())); - // Prepare variables List variables = new ArrayList(); // variables.add(ByteBufferUtil.bytes(oValue.getSubject())); // the length of boolean type in cassandra is one byte!!!!!!!! // For column inferred, init it as false i.e. zero - //variables.add(ByteBuffer.wrap(new byte[]{zero})); - - variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); - - - - // Keys are not used for - // CqlBulkRecordWriter.write(Object key, List values), - // so it can be set to null. - // Only values are used there where the value correspond to - // the insert statement set in CqlBulkOutputFormat.setColumnFamilyInsertStatement() - // All triples columnfamily: - // sub, pre, obj, isliteral, tripletype, inferredsteps - // Steps columnfamily: - // sub, pre, obj, rule, v1, v2, v3, transitivelevel - - List allTValues = new ArrayList(); - allTValues.add(ByteBufferUtil.bytes(triple.getSubject())); - allTValues.add(ByteBufferUtil.bytes(triple.getPredicate())); - allTValues.add(ByteBufferUtil.bytes(triple.getObject())); - allTValues.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); - allTValues.add(ByteBufferUtil.bytes( - TriplesUtils.getTripleType( - source, triple.getSubject(), - triple.getPredicate(), - triple.getObject()))); - allTValues.add(ByteBufferUtil.bytes((int)source.getStep())); - - List stepsValues = new ArrayList(); - stepsValues.add(ByteBufferUtil.bytes(triple.getSubject())); - stepsValues.add(ByteBufferUtil.bytes(triple.getPredicate())); - stepsValues.add(ByteBufferUtil.bytes(triple.getObject())); - stepsValues.add(ByteBufferUtil.bytes((int)triple.getType())); - stepsValues.add(ByteBufferUtil.bytes(triple.getRsubject())); - stepsValues.add(ByteBufferUtil.bytes(triple.getRpredicate())); - stepsValues.add(ByteBufferUtil.bytes(triple.getRobject())); - stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel())); - - time = System.currentTimeMillis(); -// _output.write(stepname, keys, variables); - _output.write(stepname, null, stepsValues); - System.out.println("wrote steps" + (System.currentTimeMillis() - time)); - time = System.currentTimeMillis(); - _output.write("alltriples", null, allTValues); -// System.out.println("wrote all " + (System.currentTimeMillis() - time)); - System.out.println("write all " + (System.currentTimeMillis() - time)); - +// variables.add(ByteBuffer.wrap(new byte[]{zero})); + variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple + variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); // It corresponds to COLUMN_TRANSITIVE_LEVEL, only useful in owl's transitive + context.write(keys, variables); } - + public static void writealltripleToMapReduceContext( Triple triple, TripleSource source, @@ -764,21 +702,34 @@ public static void writealltripleToMapReduceContext( byte one = 1; byte zero = 0; + // Prepare composite key (sub, pre, obj) keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); - - List variables = new ArrayList(); - variables.add(ByteBufferUtil.bytes(source.getStep())); - variables.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + // the length of boolean type in cassandra is one byte!!!!!!!! + keys.put(CassandraDB.COLUMN_IS_LITERAL, + triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); int tripletype = TriplesUtils.DATA_TRIPLE; if (triple.getType()==TriplesUtils.OWL_HORST_SYNONYMS_TABLE){ tripletype = TriplesUtils.SYNONYMS_TABLE; // In this way, we can provide a special triple type for triples in synonyms table }else{ tripletype = TriplesUtils.getTripleType(source, triple.getSubject(), triple.getPredicate(), triple.getObject()); } - variables.add(ByteBufferUtil.bytes(tripletype)); - context.write(keys,variables); + keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(tripletype)); // Modified by WuGang 20150109 + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long + + // Prepare variables + List variables = new ArrayList(); +// variables.add(ByteBufferUtil.bytes(oValue.getSubject())); + // the length of boolean type in cassandra is one byte!!!!!!!! + // For column inferred, init it as false i.e. zero +// variables.add(ByteBuffer.wrap(new byte[]{zero})); + variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple + variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); // It corresponds to COLUMN_TRANSITIVE_LEVEL, only useful in owl's transitive + context.write(keys, variables); } public boolean loadSetIntoMemory(Set schemaTriples, Set filters, int previousStep) throws IOException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { @@ -1024,20 +975,20 @@ public void createIndexOnTripleType() throws InvalidRequestException, Unavailabl client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); } - public void createIndexOnRule() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ - String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_RULE + ")"; - client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); - } +// public void createIndexOnRule() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ +// String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_RULE + ")"; +// client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); +// } // // public void createIndexOnInferredSteps() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ // String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_INFERRED_STEPS + ")"; // client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); // } - - public void createIndexOnTransitiveLevel() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ - String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_TRANSITIVE_LEVELS + ")"; - client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); - } +// +// public void createIndexOnTransitiveLevel() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ +// String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_TRANSITIVE_LEVELS + ")"; +// client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); +// } /* diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CreateTables.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CreateTables.java new file mode 100644 index 0000000..ebe1f9e --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CreateTables.java @@ -0,0 +1,66 @@ +package cn.edu.neu.mitt.mrj.io.dbs; + +import org.apache.cassandra.transport.SimpleClient; + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Host; +import com.datastax.driver.core.Metadata; +import com.datastax.driver.core.Session; + +public class CreateTables { + private Cluster cluster; + private Session session; + + public Session getSession(){ + return this.session; + } + + public void connect(String node){ + cluster = Cluster.builder() + .addContactPoint(node) + .build(); + Metadata metadata = cluster.getMetadata(); + System.out.printf("Connected to cluster: %s\n", + metadata.getClusterName()); + for(Host host : metadata.getAllHosts()){ + System.out.printf("Datatacenter: %s; Host: %s; Rack: %s|n", + host.getDatacenter(), host.getAddress(), host.getRack()); + } + session = cluster.connect(); + } + + //javaDriver21 + public void createSchema(Integer step){ + session.execute("CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + ".step" + step + + " ( " + + "sub" + " bigint, " + + "pre" + " bigint, " + + "obj" + " bigint, " + + "rule int, " + + "v1" + " bigint, " + + "v2" + " bigint, " + + "v3" + " bigint, " + + "transitivelevel int" + + ", primary key((sub, pre, obj, rule) ,v1, v2, v3 ))"); + } + + public void close(){ + session.close(); + cluster.close(); + } + + public static void main(String args[]){ + CreateTables client = new CreateTables(); + client.connect(CassandraDB.DEFAULT_HOST); + for (int i = 1; i < 14; i++) { + client.createSchema(i); + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + client.close(); + } +} diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java index a9eda58..8fb04b4 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java @@ -27,48 +27,45 @@ public MrjMultioutput(TaskInputOutputContext context) { @Override protected synchronized RecordWriter getRecordWriter( - TaskAttemptContext taskContext, String columnFamilyNameName) + TaskAttemptContext taskContext, String columnFamilyName) throws IOException, InterruptedException { -// CqlBulkOutputFormat.setColumnFamilySchema(taskContext.getConfiguration(), "step1", CassandraDB.getStepsSchema(1)); -// CqlBulkOutputFormat.setColumnFamilyInsertStatement(taskContext.getConfiguration(), "step1", CassandraDB.getAlltripleStatement()); - // look for record-writer in the cache - RecordWriter writer = recordWriters.get(columnFamilyNameName); + RecordWriter writer = recordWriters.get(columnFamilyName); - System.out.println("get Record Writer"); +// System.out.println("get Record Writer"); // If not in cache, create a new one if (writer == null) { // get the record writer from context output format // FileOutputFormat.setOutputName(taskContext, baseFileName); - System.out.println("Before ConfigHelper.setOutputColumnFamily"); - System.out.println(ConfigHelper.getOutputColumnFamily(taskContext.getConfiguration())); +// System.out.println("Before ConfigHelper.setOutputColumnFamily"); +// System.out.println(ConfigHelper.getOutputColumnFamily(taskContext.getConfiguration())); - ConfigHelper.setOutputColumnFamily(taskContext.getConfiguration(), columnFamilyNameName); - CqlConfigHelper.setOutputCql(taskContext.getConfiguration(), getCql(columnFamilyNameName)); + ConfigHelper.setOutputColumnFamily(taskContext.getConfiguration(), columnFamilyName); +// CqlConfigHelper.setOutputCql(taskContext.getConfiguration(), getCql(columnFamilyNameName)); CqlBulkOutputFormat.setColumnFamilySchema( taskContext.getConfiguration(), - columnFamilyNameName, - getSchema(columnFamilyNameName)); + columnFamilyName, + getSchema(columnFamilyName)); CqlBulkOutputFormat.setColumnFamilyInsertStatement( taskContext.getConfiguration(), - columnFamilyNameName, - getInsertStatement(columnFamilyNameName)); + columnFamilyName, + getInsertStatement(columnFamilyName)); try { - System.out.println(taskContext.getOutputFormatClass()); +// System.out.println(taskContext.getOutputFormatClass()); writer = ((OutputFormat) ReflectionUtils.newInstance( taskContext.getOutputFormatClass(), taskContext.getConfiguration())) .getRecordWriter(taskContext); - System.out.println(writer.getClass()); +// System.out.println(writer.getClass()); } catch (ClassNotFoundException e) { throw new IOException(e); } @@ -76,11 +73,11 @@ protected synchronized RecordWriter getRecordWriter( // if counters are enabled, wrap the writer with context // to increment counters if (countersEnabled) { - writer = new MultipleOutputs.RecordWriterWithCounter(writer, columnFamilyNameName, context); + writer = new MultipleOutputs.RecordWriterWithCounter(writer, columnFamilyName, context); } // add the record-writer to the cache - recordWriters.put(columnFamilyNameName, writer); + recordWriters.put(columnFamilyName, writer); } return writer; } @@ -96,7 +93,7 @@ String getCql(String columnFamilyNameName){ } String getSchema(String columnFamilyNameName){ - System.out.println(columnFamilyNameName + " schema"); +// System.out.println(columnFamilyNameName + " schema"); if (columnFamilyNameName == "alltriples") { return CassandraDB.getAlltripleSchema(); } @@ -104,7 +101,7 @@ String getSchema(String columnFamilyNameName){ } String getInsertStatement(String columnFamilyNameName){ - System.out.println(columnFamilyNameName + " insert statement"); +// System.out.println(columnFamilyNameName + " insert statement"); if (columnFamilyNameName == "alltriples") { return CassandraDB.getAlltripleStatement(); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java index cd93745..659b893 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java @@ -11,6 +11,7 @@ import java.io.IOException; +import java.nio.ByteBuffer; import java.util.List; import java.util.Map; import java.util.Set; @@ -25,6 +26,7 @@ import org.apache.hadoop.mapreduce.Job; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; /** * @author gibeo_000 @@ -183,43 +185,19 @@ private static void configureCassandraOutput(Job job, int step) { job.setOutputFormatClass(CqlBulkOutputFormat.class); CqlBulkOutputFormat.setColumnFamilySchema(job.getConfiguration(), "step" + step, CassandraDB.getStepsSchema(step)); - System.out.println("Schema we set: " + CassandraDB.getStepsSchema(step)); - System.out.println("Schema we get: " + CqlBulkOutputFormat.getColumnFamilySchema(job.getConfiguration(), "step"+step)); +// System.out.println("Schema we set: " + CassandraDB.getStepsSchema(step)); +// System.out.println("Schema we get: " + CqlBulkOutputFormat.getColumnFamilySchema(job.getConfiguration(), "step"+step)); CqlBulkOutputFormat.setColumnFamilyInsertStatement(job.getConfiguration(), "step"+step, CassandraDB.getStepsStatement(step)); -// job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); - //ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, "alltriples"); ConfigHelper.setOutputKeyspace(job.getConfiguration(), CassandraDB.KEYSPACE); - - /* - * addMultiNamedOutput - * - */ -// JobConf jobconf = new JobConf(job.getConfiguration()); -// MultipleOutputs.addNamedOutput(jobconf, "step" + step, ColumnFamilyOutputFormat.class, ByteBuffer.class, List.class); -// MultipleOutputs.addNamedOutput(jobconf, CassandraDB.COLUMNFAMILY_ALLTRIPLES, ColumnFamilyOutputFormat.class, ByteBuffer.class, List.class); - -// ConfigHelper.setOutputColumnFamily(job.getConfiguration(), "step" + step); -// ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); -// job.getConfiguration().set(CONF_COLUMN_NAME, "sum"); - -// ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.COLUMNFAMILY_ALLTRIPLES); - - // String query = "UPDATE " + CassandraDB.KEYSPACE + ".step" + step + - // " SET " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "=? " ; -// "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + -// " SET " + CassandraDB.COLUMN_IS_LITERAL + "=?" + CassandraDB.COLUMN_TRIPLE_TYPE + "=?"; -// String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + -// " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=?, " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "=?"; - // CqlConfigHelper.setOutputCql(job.getConfiguration(), query); -// String querysString = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + -// " SET " + CassandraDB.COLUMN_INFERRED_STEPS + " =? ";// + CassandraDB.COLUMN_IS_LITERAL + "=?, " + CassandraDB.COLUMN_TRIPLE_TYPE + " =? "; - //CqlConfigHelper.setOutputCql(job.getConfiguration(), querysString); - - + ConfigHelper.setOutputColumnFamily(job.getConfiguration(), "step" + step); + + MrjMultioutput.addNamedOutput(job, CassandraDB.COLUMNFAMILY_ALLTRIPLES, CqlBulkOutputFormat.class, ByteBuffer.class, List.class); + MrjMultioutput.addNamedOutput(job, "step" + step, CqlBulkOutputFormat.class, ByteBuffer.class, List.class); +// CqlConfigHelper.setOutputCql(conf, "select * from step1"); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java index 0d8bfdc..ed22aba 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java @@ -9,6 +9,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -17,6 +18,7 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; public class OWLAllSomeValuesReducer extends Reducer, List> { @@ -30,6 +32,7 @@ public class OWLAllSomeValuesReducer extends Reducer others = new LinkedList(); // ��types����һ�� private LinkedList s_a_types = new LinkedList(); // ��types����һ��,���ڴ洢��someValues(0)����allValues(1)���� + private MultipleOutputs _output; @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { @@ -95,7 +98,7 @@ public void reduce(BytesWritable key, Iterable values, Context co // System.out.println("Generate an extended triple for OWLAllSomeValues: " + triple); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step12"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step12"); } } } @@ -104,6 +107,7 @@ public void reduce(BytesWritable key, Iterable values, Context co @Override public void setup(Context context) { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + _output = new MrjMultioutput, List>(context); source.setDerivation(TripleSource.OWL_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java index 093fb2c..be5e9c3 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java @@ -16,6 +16,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -24,6 +25,7 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; @@ -33,7 +35,8 @@ public class OWLEquivalenceSCSPReducer extends Reducer> subpropSchemaTriples = null; public static Map> subclassSchemaTriples = null; public static Map> equivalenceClassesSchemaTriples = null; // Added by WuGang @@ -109,7 +112,7 @@ public void reduce(LongWritable key, Iterable values, Context con } // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step11"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step11"); } } @@ -146,7 +149,7 @@ public void reduce(LongWritable key, Iterable values, Context con } // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step11"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step11"); } } @@ -180,7 +183,7 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRobject(triple.getObject()); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step11"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step11"); } } @@ -215,7 +218,7 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRobject(triple.getObject()); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step11"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step11"); } } } @@ -223,6 +226,7 @@ public void reduce(LongWritable key, Iterable values, Context con @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + _output = new MrjMultioutput, List>(context); source.setDerivation(TripleSource.OWL_DERIVED); source.setStep((byte)context.getConfiguration().getInt("reasoner.step", 0)); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java index 67d5c53..5d53a4f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java @@ -17,6 +17,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -25,6 +26,7 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; @@ -40,6 +42,7 @@ public class OWLHasValueReducer extends Reducer> hasValue2Map = new HashMap>(); private Map> onProperty2Map = new HashMap>(); + private MultipleOutputs _output; public void reduce(LongWritable key, Iterable values, Context context) throws IOException, InterruptedException { @@ -71,7 +74,7 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRobject(triple.getObject()); // w // System.out.println("In OWLHasValueReducer for 14b output: "+triple); // Added by Wugang - CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step13"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step13"); // context.write(source, triple); } } @@ -101,7 +104,7 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRobject(object); // w // System.out.println("In OWLHasValueReducer for 14a output: "+triple); // Added by Wugang - CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step13"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step13"); // context.write(source, triple); } } @@ -112,6 +115,7 @@ public void reduce(LongWritable key, Iterable values, Context con public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + _output = new MrjMultioutput, List>(context); source.setDerivation(TripleSource.OWL_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java index 33e5df7..b3c656a 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java @@ -16,6 +16,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -24,6 +25,7 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; @@ -37,7 +39,8 @@ public class OWLNotRecursiveReducer extends Reducer set = new HashSet(); protected Map> schemaInverseOfProperties = null; - + private MultipleOutputs _output; + protected void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { byte[] bytes = key.getBytes(); long rsubject=0, rpredicate=0, robject=0; @@ -97,7 +100,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setObject(object); // System.out.println("Find a derive in functional and inverse functional property!" + triple); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step5"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step5"); outputSize++; } context.getCounter("OWL derived triples", "functional and inverse functional property").increment(outputSize); @@ -122,7 +125,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setPredicate(itr.next().get()); triple.setRpredicate(triple.getPredicate()); // Added by WuGang // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step5"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step5"); context.getCounter("OWL derived triples", "simmetric property").increment(1); } @@ -154,7 +157,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setPredicate(derivedPredicate); // Only one of the inverse, the others will be completed in outputInverseOf() //triple.setPredicate(itrInverse.next()); // Commented by WuGang 2015-01-27 // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step5"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step5"); context.getCounter("OWL derived triples", "inverse of").increment(1); // Moved to here by WuGang, 2015-01-27 @@ -191,7 +194,7 @@ else if (bytes[0] == 1){ //Inverse Functional transitiveSource.setDerivation(TripleSource.TRANSITIVE_ENABLED); triple.setPredicate(Math.abs(predicate)); // context.write(transitiveSource, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, transitiveSource, context, "step5"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, transitiveSource, _output, "step5"); context.getCounter("OWL derived triples", "transitive property input").increment(1); } default: @@ -213,7 +216,7 @@ private void outputInverseOf(long subject, long object, long predicate, Set, List>(context); source.setDerivation(TripleSource.OWL_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java index 4ae17cc..fd7d0cf 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java @@ -224,17 +224,7 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(OWLNotRecursiveReducer.class); - - Configuration conf = job.getConfiguration(); - String outputCF1 = "stepNO"; - String outputCF2 = "alltriples"; -// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); - conf.set(outputCF1, "step1"); - conf.set(outputCF2, "alltriples"); - ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); - ConfigHelper.setOutputColumnFamily(conf, "step5"); - CqlConfigHelper.setOutputCql(conf, "select * from step1"); - + job.waitForCompletion(true); @@ -313,16 +303,6 @@ private long inferTransitivityStatements(String[] args) job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLTransitivityReducer.class); - Configuration conf = job.getConfiguration(); - String outputCF1 = "stepNO"; - String outputCF2 = "alltriples"; -// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); - conf.set(outputCF1, "step1"); - conf.set(outputCF2, "alltriples"); - ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); - ConfigHelper.setOutputColumnFamily(conf, "step6"); - CqlConfigHelper.setOutputCql(conf, "select * from step1"); - job.waitForCompletion(true); long stepNotFilteredDerivation = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); @@ -368,17 +348,7 @@ private long inferSameAsStatements(String[] args) { job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLSameAsReducer.class); - - Configuration conf = job.getConfiguration(); - String outputCF1 = "stepNO"; - String outputCF2 = "alltriples"; -// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); - conf.set(outputCF1, "step1"); - conf.set(outputCF2, "alltriples"); - ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); - ConfigHelper.setOutputColumnFamily(conf, "step8"); - CqlConfigHelper.setOutputCql(conf, "select * from step1"); - + job.waitForCompletion(true); // System.out.println("In FilesOWLReasoner: " + job.getCounters().findCounter("synonyms", "replacements").getValue()); @@ -431,17 +401,7 @@ private long inferSameAsStatements(String[] args) { SequenceFileOutputFormat.setOutputPath(job, commonResourcesPath); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); - - Configuration conf = job.getConfiguration(); - String outputCF1 = "stepNO"; - String outputCF2 = "alltriples"; -// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); - conf.set(outputCF1, "step1"); - conf.set(outputCF2, "alltriples"); - ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); - ConfigHelper.setOutputColumnFamily(conf, "step9"); - CqlConfigHelper.setOutputCql(conf, "select * from step1"); - + job.waitForCompletion(true); @@ -490,17 +450,7 @@ private long inferSameAsStatements(String[] args) { job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLSameAsReconstructReducer.class); - - conf = job.getConfiguration(); - outputCF1 = "stepNO"; - outputCF2 = "alltriples"; -// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); - conf.set(outputCF1, "step1"); - conf.set(outputCF2, "alltriples"); - ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); - ConfigHelper.setOutputColumnFamily(conf, "step10"); - CqlConfigHelper.setOutputCql(conf, "select * from step1"); - + job.waitForCompletion(true); FileSystem fs = FileSystem.get(job.getConfiguration()); @@ -552,17 +502,7 @@ private long inferEquivalenceStatements(String[] args) throws IOException, Inter job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLEquivalenceSCSPReducer.class); - - Configuration conf = job.getConfiguration(); - String outputCF1 = "stepNO"; - String outputCF2 = "alltriples"; -// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); - conf.set(outputCF1, "step1"); - conf.set(outputCF2, "alltriples"); - ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); - ConfigHelper.setOutputColumnFamily(conf, "step11"); - CqlConfigHelper.setOutputCql(conf, "select * from step1"); - + job.waitForCompletion(true); return job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); } @@ -599,17 +539,7 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLAllSomeValuesReducer.class); - - Configuration conf = job.getConfiguration(); - String outputCF1 = "stepNO"; - String outputCF2 = "alltriples"; -// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); - conf.set(outputCF1, "step1"); - conf.set(outputCF2, "alltriples"); - ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); - ConfigHelper.setOutputColumnFamily(conf, "step12"); - CqlConfigHelper.setOutputCql(conf, "select * from step1"); - + job.waitForCompletion(true); // Added by Wugang 20150111 @@ -682,16 +612,6 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLHasValueReducer.class); - Configuration conf = job.getConfiguration(); - String outputCF1 = "stepNO"; - String outputCF2 = "alltriples"; -// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); - conf.set(outputCF1, "step1"); - conf.set(outputCF2, "alltriples"); - ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); - ConfigHelper.setOutputColumnFamily(conf, "step13"); - CqlConfigHelper.setOutputCql(conf, "select * from step1"); - job.waitForCompletion(true); // Get inferred count diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructReducer.java index 4ab483e..f003ea1 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructReducer.java @@ -8,8 +8,10 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; import cn.edu.neu.mitt.mrj.data.Triple; @@ -19,7 +21,8 @@ public class OWLSameAsReconstructReducer extends Reducer values, Context context) throws IOException, InterruptedException { // System.out.println("In OWLSameAsReconstructReducer!!!"); @@ -78,7 +81,7 @@ public void reduce(BytesWritable key, Iterable values, Context co } // System.out.println("Find a complete replacment of triple: " + oValue); - CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context, "step10"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(oValue, oKey, _output, "step10"); // context.write(oKey, oValue); } } @@ -86,6 +89,7 @@ public void reduce(BytesWritable key, Iterable values, Context co @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + _output = new MrjMultioutput, List>(context); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java index 192df4b..1721d7f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java @@ -11,12 +11,14 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; public class OWLSameAsReducer extends Reducer, List> { @@ -25,7 +27,8 @@ public class OWLSameAsReducer extends Reducer duplicates = new HashSet(); private List storage = new LinkedList(); - + private MultipleOutputs _output; + @Override public void reduce(LongWritable key, Iterable values, Context context) throws IOException, InterruptedException { @@ -65,7 +68,7 @@ public void reduce(LongWritable key, Iterable values, Context con long lValue = itr2.next(); if (!duplicates.contains(lValue)) { oValue.setObject(lValue); - CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context, "step8"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(oValue, oKey, _output, "step8"); duplicates.add(lValue); } } @@ -80,6 +83,7 @@ public void reduce(LongWritable key, Iterable values, Context con @Override public void setup(Context context) { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + _output = new MrjMultioutput, List>(context); oValue.setObjectLiteral(false); oValue.setPredicate(TriplesUtils.OWL_SAME_AS); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java index 92dd387..e8aab52 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java @@ -10,6 +10,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -18,6 +19,7 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; public class OWLTransitivityReducer extends Reducer, List> { @@ -30,6 +32,7 @@ public class OWLTransitivityReducer extends Reducer values, Context context) throws IOException, InterruptedException { @@ -95,7 +98,7 @@ public void reduce(BytesWritable key, Iterable values, Context co source.setTransitiveLevel((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel)); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context, "step6"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step6"); // System.out.println("In OWLTransitivityReducer: " + triple); } @@ -106,6 +109,7 @@ public void reduce(BytesWritable key, Iterable values, Context co @Override public void setup(Context context) { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + _output = new MrjMultioutput, List>(context); baseLevel = context.getConfiguration().getInt("reasoning.baseLevel", 1) - 1; level = context.getConfiguration().getInt("reasoning.transitivityLevel", -1); // Modified by WuGang 2015-01-28 diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java index b3c4f20..722ea1f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java @@ -112,21 +112,6 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep lastExecutionPropInheritance = step; //TODO: configureOutputJob(job, args[0], "dir-rdfs-derivation/dir-subprop-inherit"); - Configuration conf = job.getConfiguration(); - String outputCF1 = "stepNO"; - String outputCF2 = "alltriples"; -// conf.set(outputCF1, CassandraDB.COLUMNFAMILY_ALLTRIPLES); - conf.set(outputCF1, "step1"); - conf.set(outputCF2, "alltriples"); - ConfigHelper.setOutputColumnFamily(conf, "step1"); - MrjMultioutput.addNamedOutput(job, conf.get(outputCF1), CqlBulkOutputFormat.class, ByteBuffer.class, List.class); - MrjMultioutput.addNamedOutput(job, conf.get(outputCF2), CqlBulkOutputFormat.class, ByteBuffer.class, List.class); - CqlConfigHelper.setOutputCql(conf, "select * from step1"); - - -// System.out.println(CqlBulkOutputFormat.getColumnFamilyInsertStatement(conf, outputCF1)); -// System.out.println(CqlBulkOutputFormat.getColumnFamilySchema(conf, outputCF1)); - job.waitForCompletion(true); long propInheritanceDerivation = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); derivation += propInheritanceDerivation; @@ -149,34 +134,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep //job.setPartitionerClass(MyHashPartitioner.class); // Is this ok? seems not necessary job.setReducerClass(RDFSSubpropDomRangeReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); - System.out.println("set map reduce class finished"); - - conf = job.getConfiguration(); - conf.set(outputCF1, "step2"); - conf.set(outputCF2, "alltriples"); - - ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); - ConfigHelper.setOutputColumnFamily(conf, "step2"); - -// CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF1, CassandraDB.getAlltripleSchema()); -// CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF2, CassandraDB.getStepsSchema(2)); - MrjMultioutput.addNamedOutput(job, conf.get(outputCF1), CqlBulkOutputFormat.class, ByteBuffer.class, List.class); - MrjMultioutput.addNamedOutput(job, conf.get(outputCF2), CqlBulkOutputFormat.class, ByteBuffer.class, List.class); - CqlConfigHelper.setOutputCql(conf, "select * from step2"); - -// ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); -// ConfigHelper.setOutputColumnFamily(conf, "step1"); -// MultipleOutputs.addNamedOutput(job, CassandraDB.COLUMNFAMILY_ALLTRIPLES, ColumnFamilyOutputFormat.class, Map.class, List.class); -// MultipleOutputs.addNamedOutput(job, CassandraDB.COLUMNFAMILY_ALLTRIPLES, ColumnFamilyOutputFormat.class, Map.class, List.class); -// MultipleOutputs.addNamedOutput(job, conf.get(outputCF1), ColumnFamilyOutputFormat.class, ByteBuffer.class, List.class); -// MultipleOutputs.addNamedOutput(job, conf.get(outputCF2), ColumnFamilyOutputFormat.class, ByteBuffer.class, List.class); - - - - //MultipleOutputs.addNamedOutput((JobConf) job.getConfiguration() , CassandraDB.COLUMNFAMILY_ALLTRIPLES, ColumnFamilyOutputFormat.class, Map.class, List.class); - //Job jobs = new Job(getConf()); - //MultipleOutputs.addMultiNamedOutput(conf, namedOutput, outputFormatClass, keyClass, valueClass); job.getConfiguration().setInt("lastExecution.step", lastExecutionDomRange); lastExecutionDomRange = step; //TODO: configureOutputJob(job, args[0], "dir-rdfs-derivation/dir-subprop-domain-range"); @@ -206,17 +164,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(RDFSSubclasReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); - - conf = job.getConfiguration(); - conf.set(outputCF1, "step3"); - conf.set(outputCF2, "alltriples"); - ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); - ConfigHelper.setOutputColumnFamily(conf, "alltriples"); - MrjMultioutput.addNamedOutput(job, conf.get(outputCF1), CqlOutputFormat.class, ByteBuffer.class, List.class); - MrjMultioutput.addNamedOutput(job, conf.get(outputCF2), CqlOutputFormat.class, ByteBuffer.class, List.class); - CqlConfigHelper.setOutputCql(conf, "select * from step1"); - CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF1, CassandraDB.getAlltripleSchema()); - CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF2, CassandraDB.getStepsSchema(3)); + // configureOutputJob(job, args[0], "dir-rdfs-output/dir-subclass-" + step); job.waitForCompletion(true); derivation += job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); @@ -250,19 +198,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(RDFSSpecialPropsReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); - - conf = job.getConfiguration(); - conf.set(outputCF1, "step4"); - conf.set(outputCF2, "alltriples"); - ConfigHelper.setOutputKeyspace(conf, CassandraDB.KEYSPACE); - ConfigHelper.setOutputColumnFamily(conf, "alltriples"); - MrjMultioutput.addNamedOutput(job, conf.get(outputCF1), CqlOutputFormat.class, ByteBuffer.class, List.class); - MrjMultioutput.addNamedOutput(job, conf.get(outputCF2), CqlOutputFormat.class, ByteBuffer.class, List.class); - CqlConfigHelper.setOutputCql(conf, "select * from step1"); - - CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF1, CassandraDB.getAlltripleSchema()); - CqlBulkOutputFormat.setColumnFamilySchema(conf, outputCF2, CassandraDB.getStepsSchema(4)); - + // configureOutputJob(job, args[0], "dir-rdfs-output/dir-special-props-" + step); job.waitForCompletion(true); derivation += job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java index 7f595c6..528f648 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java @@ -10,17 +10,20 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; public class RDFSSpecialPropsReducer extends Reducer, List> { private TripleSource source = new TripleSource(); private Triple oTriple = new Triple(); + private MultipleOutputs _output; @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { @@ -55,7 +58,7 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step4"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, _output, "step4"); // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subproperty of member").increment(1); break; @@ -70,7 +73,7 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_DATATYPE); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step4"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, _output, "step4"); // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subclass of literal").increment(1); break; @@ -86,7 +89,7 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CLASS); context.getCounter("RDFS derived triples", "subclass of resource").increment(1); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step4"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, _output, "step4"); //context.write(source, oTriple); break; case 4: // û�ж�Ӧ��rdfs rule�� @@ -100,7 +103,7 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) else oTriple.setObjectLiteral(true); context.getCounter("RDFS derived triples", "subproperty inheritance of member").increment(1); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step4"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, _output, "step4"); // context.write(source, oTriple); default: break; @@ -110,9 +113,18 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) @Override public void setup(Context context) { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + _output = new MrjMultioutput, List>(context); source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + _output.close(); + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java index b35abcc..9830e31 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java @@ -57,6 +57,7 @@ protected void map(Long key, Row row, Context context) throws IOException, Inter oKey.set(bKey, 0, 17); oValue.set(pre); + System.out.println("subprop map rule 7 " + pre); context.write(oKey, oValue); } @@ -66,7 +67,8 @@ protected void map(Long key, Row row, Context context) throws IOException, Inter bKey[0] = 5; NumberUtils.encodeLong(bKey, 1, sub); oKey.set(bKey, 0, 9); - oValue.set(obj); + oValue.set(obj); + System.out.println("subprop map rule 5 " + obj); context.write(oKey, oValue); } } @@ -81,13 +83,13 @@ protected void setup(Context context) throws IOException { // } catch (Exception e) { // System.out.println("Error in creating Index"); // } - try { - CassandraDB d = new CassandraDB(); - d.createIndexOnTripleType(); - d.createIndexOnRule(); - } catch (Exception e) { - // TODO: handle exception - } +// try { +// CassandraDB d = new CassandraDB(); +// d.createIndexOnTripleType(); +// d.createIndexOnRule(); +// } catch (Exception e) { +// // TODO: handle exception +// } if (subpropSchemaTriples == null) { subpropSchemaTriples = new HashSet(); try { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java index ed523f0..0f0c8dc 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java @@ -16,6 +16,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -24,13 +25,17 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; -public class RDFSSubclasReducer extends Reducer, List> { - - protected static Logger log = LoggerFactory.getLogger(RDFSSubclasReducer.class); - +public class RDFSSubclasReducer + extends + Reducer, List> { + + protected static Logger log = LoggerFactory + .getLogger(RDFSSubclasReducer.class); + public static Map> subclassSchemaTriples = null; protected Set subclasURIs = new HashSet(); protected Set existingURIs = new HashSet(); @@ -38,7 +43,8 @@ public class RDFSSubclasReducer extends Reducer specialSuperclasses = new HashSet(); private TripleSource source = new TripleSource(); private Triple oTriple = new Triple(); - + private MultipleOutputs _output; + private void recursiveScanSuperclasses(long value, Set set) { Collection subclassValues = subclassSchemaTriples.get(value); if (subclassValues != null) { @@ -54,65 +60,69 @@ private void recursiveScanSuperclasses(long value, Set set) { } @Override - public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { -// System.out.println("����RDFSSubclasReducer��-"); + public void reduce(BytesWritable key, Iterable values, + Context context) throws IOException, InterruptedException { + // System.out.println("����RDFSSubclasReducer��-"); existingURIs.clear(); Iterator itr = values.iterator(); while (itr.hasNext()) { long value = itr.next().get(); - existingURIs.add(value); //���еı��� + existingURIs.add(value); // ���еı��� } - + Iterator oTypes = existingURIs.iterator(); subclasURIs.clear(); while (oTypes.hasNext()) { long existingURI = oTypes.next(); - recursiveScanSuperclasses(existingURI, subclasURIs); //subclasURIs�����е�subclass + recursiveScanSuperclasses(existingURI, subclasURIs); // subclasURIs�����е�subclass } - + subclasURIs.removeAll(existingURIs); - + oTypes = subclasURIs.iterator(); byte[] bKey = key.getBytes(); - long oKey = NumberUtils.decodeLong(bKey,1); + long oKey = NumberUtils.decodeLong(bKey, 1); oTriple.setSubject(oKey); boolean typeTriple = bKey[0] == 0; - if (!typeTriple) { //It's a subclass triple - oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); // Rule 11 + if (!typeTriple) { // It's a subclass triple + oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); // Rule 11 // Added by WuGang, 2010-08-26 oTriple.setType(TriplesUtils.RDFS_11); oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDFS_SUBCLASS); - } else { //It's a type triple - oTriple.setPredicate(TriplesUtils.RDF_TYPE); // Rule 9 + } else { // It's a type triple + oTriple.setPredicate(TriplesUtils.RDF_TYPE); // Rule 9 // Added by WuGang, 2010-08-26 oTriple.setType(TriplesUtils.RDFS_9); oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); } -// while (oTypes.hasNext()) { -// long oType = oTypes.next(); -// oTriple.setObject(oType); -// context.write(source, oTriple); -// } + // while (oTypes.hasNext()) { + // long oType = oTypes.next(); + // oTriple.setObject(oType); + // context.write(source, oTriple); + // } // Modified by WuGang, 2010-08-26 while (oTypes.hasNext()) { long oType = oTypes.next(); oTriple.setObject(oType); for (long obj : existingURIs) { oTriple.setRobject(obj); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step3"); -// context.write(source, oTriple); + CassandraDB.writeJustificationToMapReduceMultipleOutputs( + oTriple, source, _output, "step3"); + // context.write(source, oTriple); } - } - + } + if (typeTriple) { /* Check special rules */ - if ((subclasURIs.contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY) - || existingURIs.contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY)) - && !memberProperties.contains(oTriple.getSubject())) { // Rule 12���μ�RDFSSpecialPropsReducer + if ((subclasURIs + .contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY) || existingURIs + .contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY)) + && !memberProperties.contains(oTriple.getSubject())) { // Rule + // 12���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBPROPERTY); oTriple.setObject(TriplesUtils.RDFS_MEMBER); // Added by WuGang, 2010-08-26 @@ -121,16 +131,20 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step3"); -// context.write(source, oTriple); - context.getCounter("RDFS derived triples", "subproperty of member").increment(1); + CassandraDB.writeJustificationToMapReduceMultipleOutputs( + oTriple, source, _output, "step3"); + // context.write(source, oTriple); + context.getCounter("RDFS derived triples", + "subproperty of member").increment(1); } - + if (subclasURIs.contains(TriplesUtils.RDFS_DATATYPE) || existingURIs.contains(TriplesUtils.RDFS_DATATYPE)) { specialSuperclasses.clear(); - recursiveScanSuperclasses(oTriple.getSubject(), specialSuperclasses); - if (!specialSuperclasses.contains(TriplesUtils.RDFS_LITERAL)) { // Rule 13���μ�RDFSSpecialPropsReducer + recursiveScanSuperclasses(oTriple.getSubject(), + specialSuperclasses); + if (!specialSuperclasses.contains(TriplesUtils.RDFS_LITERAL)) { // Rule + // 13���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); oTriple.setObject(TriplesUtils.RDFS_LITERAL); // Added by WuGang, 2010-08-26 @@ -139,17 +153,21 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_DATATYPE); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step3"); -// context.write(source, oTriple); - context.getCounter("RDFS derived triples", "subclass of Literal").increment(1); + CassandraDB.writeJustificationToMapReduceMultipleOutputs( + oTriple, source, _output, "step3"); + // context.write(source, oTriple); + context.getCounter("RDFS derived triples", + "subclass of Literal").increment(1); } } - + if (subclasURIs.contains(TriplesUtils.RDFS_CLASS) || existingURIs.contains(TriplesUtils.RDFS_CLASS)) { specialSuperclasses.clear(); - recursiveScanSuperclasses(oTriple.getSubject(), specialSuperclasses); - if (!specialSuperclasses.contains(TriplesUtils.RDFS_RESOURCE)) { // Rule 8���μ�RDFSSpecialPropsReducer + recursiveScanSuperclasses(oTriple.getSubject(), + specialSuperclasses); + if (!specialSuperclasses.contains(TriplesUtils.RDFS_RESOURCE)) { // Rule + // 8���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); oTriple.setObject(TriplesUtils.RDFS_RESOURCE); // Added by WuGang, 2010-08-26 @@ -158,23 +176,29 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CLASS); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step3"); -// context.write(source, oTriple); - context.getCounter("RDFS derived triples", "subclass of resource").increment(1); + CassandraDB.writeJustificationToMapReduceMultipleOutputs( + oTriple, source, _output, "step3"); + // context.write(source, oTriple); + context.getCounter("RDFS derived triples", + "subclass of resource").increment(1); } } } - - //Update the counters + + // Update the counters if (typeTriple) - context.getCounter("RDFS derived triples", "subclass inheritance rule").increment(subclasURIs.size()); + context.getCounter("RDFS derived triples", + "subclass inheritance rule").increment(subclasURIs.size()); else - context.getCounter("RDFS derived triples", "subclass transitivity rule").increment(subclasURIs.size()); + context.getCounter("RDFS derived triples", + "subclass transitivity rule").increment(subclasURIs.size()); } - + @Override public void setup(Context context) throws IOException { - CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works + // around. + _output = new MrjMultioutput, List>(context); if (subclassSchemaTriples == null) { CassandraDB db; @@ -198,17 +222,17 @@ public void setup(Context context) throws IOException { e.printStackTrace(); } } - + if (memberProperties == null) { CassandraDB db; try { db = new CassandraDB(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_MEMBER_SUBPROPERTY); - + memberProperties = new HashSet(); db.loadSetIntoMemory(memberProperties, filters, -1); - + db.CassandraDBClose(); } catch (TTransportException e) { e.printStackTrace(); @@ -222,11 +246,20 @@ public void setup(Context context) throws IOException { e.printStackTrace(); } catch (TException e) { e.printStackTrace(); - } + } } source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + + _output.close(); + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java index fef05d5..c328c69 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java @@ -69,7 +69,7 @@ public void reduce(BytesWritable key, Iterable values, Context con propURIs.add(itr.next().get()); // ���p // logger.info("while1 " + (System.currentTimeMillis() - time)); - System.out.println("while1 " + (System.currentTimeMillis() - time)); +// System.out.println("while1 " + (System.currentTimeMillis() - time)); Iterator itrProp = propURIs.iterator(); while (itrProp.hasNext()) { @@ -95,7 +95,7 @@ public void reduce(BytesWritable key, Iterable values, Context con // logger.info("while2 " + (System.currentTimeMillis() - time)); time = System.currentTimeMillis(); - System.out.println("while2 " + (System.currentTimeMillis() - time)); +// System.out.println("while2 " + (System.currentTimeMillis() - time)); // Derive the new statements // Iterator itr2 = derivedProps.iterator(); @@ -128,7 +128,7 @@ public void reduce(BytesWritable key, Iterable values, Context con _output, "step2"); // logger.info("write " + (System.currentTimeMillis() - time)); time = System.currentTimeMillis(); - System.out.println("finish " + (System.currentTimeMillis() - time)); +// System.out.println("finish " + (System.currentTimeMillis() - time)); // CassandraDB.writealltripleToMapReduceContext(oTriple, source, // context); // context.write(source, oTriple); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java index db289d2..4760244 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java @@ -18,6 +18,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -26,6 +27,7 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; @@ -40,6 +42,9 @@ public class RDFSSubpropInheritReducer extends Reducer set) { Collection subprops = subpropSchemaTriples.get(value); @@ -94,9 +99,9 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setPredicate(itr3.next()); for (LongWritable pre : values) { oTriple.setRpredicate(pre.get()); - System.out.println("before wj"); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step1"); - System.out.println("after wj"); + System.out.println("before w rule 7"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, _output, "step1"); + System.out.println("after w rule 7"); // context.write(source, oTriple); } } @@ -133,9 +138,9 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setObject(itr4.next()); for(LongWritable obj:values){ oTriple.setRobject(obj.get()); - System.out.println("before wj"); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context, "step1"); - System.out.println("before wj"); + System.out.println("before w rule 5"); + CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, _output, "step1"); + System.out.println("before w rule 5"); // context.write(source, oTriple); } } @@ -151,9 +156,7 @@ public void reduce(BytesWritable key, Iterable values, Context con @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. -// System.out.println("reduce setup"); -// CqlBulkOutputFormat.setColumnFamilySchema(context.getConfiguration(), CassandraDB.KEYSPACE + ".step1", CassandraDB.getStepsSchema(1)); -// System.out.println(CqlBulkOutputFormat.getColumnFamilySchema(context.getConfiguration(), CassandraDB.COLUMNFAMILY_ALLTRIPLES + "step1")); + _output = new MrjMultioutput, List>(context); if (subpropSchemaTriples == null) { CassandraDB db; @@ -188,5 +191,15 @@ public void setup(Context context) throws IOException { oTriple2.setObjectLiteral(false); } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + _output.close(); + super.cleanup(context); + } + + } From 3c36fd5fef0ba1f81c990f5352aa432825b0265f Mon Sep 17 00:00:00 2001 From: Joe Date: Thu, 24 Dec 2015 11:37:49 +0800 Subject: [PATCH 13/16] =?UTF-8?q?step1=20=E5=86=99=E5=85=A5=E5=80=BC=20Sig?= =?UTF-8?q?ned-off-by:=20Joe=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../edu/neu/mitt/mrj/io/dbs/CassandraDB.java | 2 +- .../reasoner/MapReduceReasonerJobConfig.java | 2 +- .../rdfs/RDFSSubPropInheritMapper.java | 19 +++----------- .../rdfs/RDFSSubpropInheritReducer.java | 25 ++++++++++++++++--- 4 files changed, 26 insertions(+), 22 deletions(-) diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index 03ff52b..1159632 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -74,7 +74,7 @@ */ public class CassandraDB { private static final Logger logger = LoggerFactory.getLogger(CassandraDB.class); - public static final String KEYSPACE = "mrjks31"; // mr.j keyspace + public static final String KEYSPACE = "mrjkss1"; // mr.j keyspace public static final String COLUMNFAMILY_JUSTIFICATIONS = "justifications"; // mr.j keyspace public static final String COLUMNFAMILY_RESOURCES = "resources"; // mr.j keyspace public static final String COLUMNFAMILY_RESULTS = "results"; // mr.j keyspace diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java index 659b893..1662086 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java @@ -217,7 +217,7 @@ public static Job createNewJob(Class classJar, String jobName, job.setJarByClass(classJar); job.setNumReduceTasks(numReduceTasks); - job.setNumReduceTasks(16); + job.setNumReduceTasks(8); if (bConfigCassandraInput) configureCassandraInput(job, typeFilters, transitiveLevelFilters, certainStep); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java index 9830e31..04f66fe 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java @@ -57,10 +57,10 @@ protected void map(Long key, Row row, Context context) throws IOException, Inter oKey.set(bKey, 0, 17); oValue.set(pre); - System.out.println("subprop map rule 7 " + pre); context.write(oKey, oValue); +// System.out.println(" i " + i); } - + //Check suprop transitivity if (pre == TriplesUtils.RDFS_SUBPROPERTY && subpropSchemaTriples.contains(obj)) { //Write the 05 + subject @@ -68,7 +68,6 @@ protected void map(Long key, Row row, Context context) throws IOException, Inter NumberUtils.encodeLong(bKey, 1, sub); oKey.set(bKey, 0, 9); oValue.set(obj); - System.out.println("subprop map rule 5 " + obj); context.write(oKey, oValue); } } @@ -77,19 +76,7 @@ protected void map(Long key, Row row, Context context) throws IOException, Inter protected void setup(Context context) throws IOException { hasSchemaChanged = false; previousExecutionStep = context.getConfiguration().getInt("lastExecution.step", -1); -// try { //有存在的 直接跳出了~~ 必须放前面---db对象已经声明过的... -// CassandraDB d = new CassandraDB(); -// d.Index(); -// } catch (Exception e) { -// System.out.println("Error in creating Index"); -// } -// try { -// CassandraDB d = new CassandraDB(); -// d.createIndexOnTripleType(); -// d.createIndexOnRule(); -// } catch (Exception e) { -// // TODO: handle exception -// } + if (subpropSchemaTriples == null) { subpropSchemaTriples = new HashSet(); try { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java index 4760244..7d2cae9 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java @@ -2,6 +2,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; @@ -24,6 +25,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.sun.corba.se.spi.ior.Writeable; + import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; @@ -64,7 +67,7 @@ private void recursiveScanSubproperties(long value, Set set) { public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { byte[] bKey = key.getBytes(); - + System.out.println("bkey " + bKey[0]); switch(bKey[0]) { case 2: case 3: // rdfs rule 7 @@ -74,8 +77,13 @@ public void reduce(BytesWritable key, Iterable values, Context con propURIs.clear(); //filter the properties that are already present Iterator itr = values.iterator(); + /* + * values在使用iterator之后会将值清空,使用list记录values + */ + List list1 = new ArrayList(); while (itr.hasNext()) { long value = itr.next().get(); + list1.add(value); if (!propURIs.contains(value)) { recursiveScanSubproperties(value, propURIs); } @@ -114,11 +122,15 @@ public void reduce(BytesWritable key, Iterable values, Context con propURIs.clear(); //filter the properties that are already present Iterator itr2 = values.iterator(); + List list = new ArrayList(); + System.out.println("rule 5 reduce "); while (itr2.hasNext()) { long value = itr2.next().get(); + list.add(value); if (!propURIs.contains(value)) { recursiveScanSubproperties(value, propURIs); } + System.out.println("itr2 values"); } Iterator itr4 = propURIs.iterator(); @@ -133,16 +145,21 @@ public void reduce(BytesWritable key, Iterable values, Context con // oTriple.setObject(itr4.next()); // context.write(source, oTriple); // } - // Modified by WuGang, 2010-08-26 + // Modified by WuGang, 2010-08-26 + System.out.println("itr4 " + itr4.toString()); + System.out.println("itr4 hasNext " + itr4.hasNext()); while (itr4.hasNext()) { + System.out.println("itr4 " + itr4.toString()); oTriple.setObject(itr4.next()); - for(LongWritable obj:values){ - oTriple.setRobject(obj.get()); + System.out.println("values " + values.toString()); + for(Long obj:list){ + oTriple.setRobject(obj); System.out.println("before w rule 5"); CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, _output, "step1"); System.out.println("before w rule 5"); // context.write(source, oTriple); } + } context.getCounter("RDFS derived triples", "subprop transitivity rule").increment(propURIs.size()); From 29a610686ca13e11272ca86439e67f7ba1c68604 Mon Sep 17 00:00:00 2001 From: Joe Date: Sun, 10 Jan 2016 10:12:58 +0800 Subject: [PATCH 14/16] =?UTF-8?q?=E8=A1=A8=E7=BB=93=E6=9E=84=EF=BC=9A=20?= =?UTF-8?q?=E7=94=A8=E4=BA=8E=E6=8E=A8=E7=90=86=E7=9A=84=E4=B8=89=E5=85=83?= =?UTF-8?q?=E7=BB=84=E5=92=8C=E6=AF=8F=E6=AD=A5=E6=8E=A8=E7=90=86=E7=9A=84?= =?UTF-8?q?=E4=B8=89=E5=85=83=E5=88=86=E5=88=AB=E5=AD=98=E5=82=A8=E4=BA=8E?= =?UTF-8?q?=E5=A4=9A=E4=B8=AA=E8=A1=A8=E4=B8=AD=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit final version 可能问题: gc ; java heap ; garbage collected Signed-off-by: Joe --- .../edu/neu/mitt/mrj/io/dbs/CassandraDB.java | 78 ++++++++++++++++--- .../edu/neu/mitt/mrj/io/dbs/CreateTables.java | 2 +- .../neu/mitt/mrj/io/dbs/MrjMultioutput.java | 36 ++++++++- .../reasoner/MapReduceReasonerJobConfig.java | 3 + .../reasoner/owl/OWLAllSomeValuesReducer.java | 9 +++ .../owl/OWLEquivalenceSCSPReducer.java | 10 ++- .../mrj/reasoner/owl/OWLHasValueReducer.java | 13 +++- .../reasoner/owl/OWLNotRecursiveReducer.java | 15 +++- .../owl/OWLSameAsReconstructReducer.java | 8 ++ .../mrj/reasoner/owl/OWLSameAsReducer.java | 8 ++ .../reasoner/owl/OWLTransitivityReducer.java | 8 ++ .../rdfs/RDFSSpecialPropsReducer.java | 24 ++++-- .../rdfs/RDFSSubPropDomRangeMapper.java | 8 -- .../mrj/reasoner/rdfs/RDFSSubclasReducer.java | 25 +++--- .../rdfs/RDFSSubpropDomRangeReducer.java | 12 ++- .../rdfs/RDFSSubpropInheritReducer.java | 41 +++++----- 16 files changed, 235 insertions(+), 65 deletions(-) diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index 1159632..9188ee7 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -74,7 +74,7 @@ */ public class CassandraDB { private static final Logger logger = LoggerFactory.getLogger(CassandraDB.class); - public static final String KEYSPACE = "mrjkss1"; // mr.j keyspace + public static final String KEYSPACE = "mrjks0"; // mr.j keyspace public static final String COLUMNFAMILY_JUSTIFICATIONS = "justifications"; // mr.j keyspace public static final String COLUMNFAMILY_RESOURCES = "resources"; // mr.j keyspace public static final String COLUMNFAMILY_RESULTS = "results"; // mr.j keyspace @@ -188,7 +188,7 @@ public static String getAlltripleSchema(){ COLUMN_TRIPLE_TYPE + " int, " + COLUMN_INFERRED_STEPS + " int, " + "PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + "," + COLUMN_OBJ + - ")) )"; + ")) ) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; return ALLTRIPLE_SCHEMA; } @@ -205,7 +205,7 @@ public static String getStepsSchema(Integer step){ COLUMN_TRANSITIVE_LEVELS + " int, " + "PRIMARY KEY((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_RULE + "), " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + - "))"; + ")) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; return STEPS_SCHEMA; } @@ -222,7 +222,7 @@ public static String getStepsSchema(String cfName){ COLUMN_TRANSITIVE_LEVELS + " int, " + "PRIMARY KEY((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_RULE + "), " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + - "))"; + ")) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; return STEPS_SCHEMA; } @@ -342,7 +342,7 @@ private static void setupTables(Cassandra.Iface client) COLUMN_TRIPLE_TYPE + " int, " + COLUMN_INFERRED_STEPS + " int, " + "PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + "," + COLUMN_OBJ + - ")) )"; + ")) ) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; try { logger.info("set up table " + "all triples"); @@ -566,6 +566,62 @@ public static int readStepFromMapReduceRow(Row row){ return step; } + public static void writeJustificationToMapReduceMultipleOutputsLessObjects( + Triple triple, + TripleSource source, + MultipleOutputs output, + Map keys, + Map allkeys, + List stepsValues, + List allTValues, + String stepname) throws IOException, InterruptedException{ + + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long + + allkeys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + allkeys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + allkeys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + + allTValues.add(ByteBufferUtil.bytes(triple.getSubject())); + allTValues.add(ByteBufferUtil.bytes(triple.getPredicate())); + allTValues.add(ByteBufferUtil.bytes(triple.getObject())); + //用数字直接替代。 + allTValues.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{1}):ByteBuffer.wrap(new byte[]{0})); + allTValues.add(ByteBufferUtil.bytes( + TriplesUtils.getTripleType( + source, triple.getSubject(), + triple.getPredicate(), + triple.getObject()))); + allTValues.add(ByteBufferUtil.bytes((int)source.getStep())); + + stepsValues.add(ByteBufferUtil.bytes(triple.getSubject())); + stepsValues.add(ByteBufferUtil.bytes(triple.getPredicate())); + stepsValues.add(ByteBufferUtil.bytes(triple.getObject())); + stepsValues.add(ByteBufferUtil.bytes((int)triple.getType())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRsubject())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRpredicate())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRobject())); + stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel())); + + output.write(CassandraDB.COLUMNFAMILY_ALLTRIPLES, null, allTValues); + output.write(stepname, null, stepsValues); + + /* + * 必要否?减速否? + */ + keys.clear(); + allkeys.clear(); + allTValues.clear(); + stepsValues.clear(); + + } + public static void writeJustificationToMapReduceMultipleOutputs( Triple triple, TripleSource source, @@ -574,8 +630,8 @@ public static void writeJustificationToMapReduceMultipleOutputs( Map keys = new LinkedHashMap(); Map allkeys = new LinkedHashMap(); List allvariables = new ArrayList(); - long time = System.currentTimeMillis(); - +// long time = System.currentTimeMillis(); + byte one = 1; byte zero = 0; // Prepare composite key (sub, pre, obj) @@ -606,7 +662,7 @@ public static void writeJustificationToMapReduceMultipleOutputs( variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); - + // Keys are not used for // CqlBulkRecordWriter.write(Object key, List values), // so it can be set to null. @@ -637,13 +693,13 @@ public static void writeJustificationToMapReduceMultipleOutputs( stepsValues.add(ByteBufferUtil.bytes(triple.getRsubject())); stepsValues.add(ByteBufferUtil.bytes(triple.getRpredicate())); stepsValues.add(ByteBufferUtil.bytes(triple.getRobject())); - stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel())); + stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel())); - time = System.currentTimeMillis(); +// time = System.currentTimeMillis(); output.write(CassandraDB.COLUMNFAMILY_ALLTRIPLES, null, allTValues); // System.out.println("wrote all " + (System.currentTimeMillis() - time)); // System.out.println("write all " + (System.currentTimeMillis() - time));// _output.write(stepname, keys, variables); - time = System.currentTimeMillis(); +// time = System.currentTimeMillis(); output.write(stepname, null, stepsValues); // System.out.println("wrote steps" + (System.currentTimeMillis() - time)); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CreateTables.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CreateTables.java index ebe1f9e..8280f5f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CreateTables.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CreateTables.java @@ -41,7 +41,7 @@ public void createSchema(Integer step){ "v2" + " bigint, " + "v3" + " bigint, " + "transitivelevel int" + - ", primary key((sub, pre, obj, rule) ,v1, v2, v3 ))"); + ", primary key((sub, pre, obj, rule) ,v1, v2, v3 )) WITH compaction = {'class': 'LeveledCompactionStrategy'}"); } public void close(){ diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java index 8fb04b4..71c422d 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java @@ -4,10 +4,12 @@ package cn.edu.neu.mitt.mrj.io.dbs; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; import org.apache.cassandra.hadoop.ConfigHelper; import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; -import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; +import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; @@ -21,9 +23,41 @@ */ public class MrjMultioutput extends MultipleOutputs { + private Map taskContexts = new HashMap(); + public MrjMultioutput(TaskInputOutputContext context) { super(context); } + + + + //This is copied from hadoop 0.23.11 + // maybe resolve the problem of construct job redundantly + @Override + protected TaskAttemptContext getContext(String nameOutput) + throws IOException { + TaskAttemptContext taskContext = taskContexts.get(nameOutput); + + if (taskContext != null) { + return taskContext; + } + + // The following trick leverages the instantiation of a record writer via + // the job thus supporting arbitrary output formats. + Job job = new Job(context.getConfiguration()); + job.setOutputFormatClass(getNamedOutputFormatClass(context, nameOutput)); + job.setOutputKeyClass(getNamedOutputKeyClass(context, nameOutput)); + job.setOutputValueClass(getNamedOutputValueClass(context, nameOutput)); + + taskContext = new TaskAttemptContext( + job.getConfiguration(), context.getTaskAttemptID()); + + taskContexts.put(nameOutput, taskContext); + + return taskContext; + } + + @Override protected synchronized RecordWriter getRecordWriter( diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java index 1662086..8de3073 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java @@ -188,6 +188,7 @@ private static void configureCassandraOutput(Job job, int step) { // System.out.println("Schema we set: " + CassandraDB.getStepsSchema(step)); // System.out.println("Schema we get: " + CqlBulkOutputFormat.getColumnFamilySchema(job.getConfiguration(), "step"+step)); CqlBulkOutputFormat.setColumnFamilyInsertStatement(job.getConfiguration(), "step"+step, CassandraDB.getStepsStatement(step)); + CqlBulkOutputFormat.setDeleteSourceOnSuccess(job.getConfiguration(), true); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); @@ -211,6 +212,8 @@ public static Job createNewJob(Class classJar, String jobName, Configuration conf = new Configuration(); conf.setInt("maptasks", numMapTasks); conf.set("input.filter", typeFilters.toString()); + + conf.set("mapreduce.output.bulkoutputformat.streamthrottlembits", "400"); Job job = new Job(conf); job.setJobName(jobName); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java index ed22aba..badaa85 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java @@ -114,4 +114,13 @@ public void setup(Context context) { triple.setObjectLiteral(false); triple.setPredicate(TriplesUtils.RDF_TYPE); } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + _output.close(); + super.cleanup(context); + } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java index be5e9c3..3bcb71b 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java @@ -110,7 +110,6 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRpredicate(TriplesUtils.OWL_EQUIVALENT_CLASS); triple.setRobject(triple.getSubject()); } - // context.write(source, triple); CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step11"); } @@ -216,7 +215,6 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRsubject(triple.getSubject()); triple.setRpredicate(TriplesUtils.RDFS_SUBCLASS); triple.setRobject(triple.getObject()); - // context.write(source, triple); CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step11"); } @@ -276,4 +274,12 @@ public void setup(Context context) throws IOException { } } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + _output.close(); + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java index 5d53a4f..424dc83 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java @@ -47,8 +47,10 @@ public class OWLHasValueReducer extends Reducer values, Context context) throws IOException, InterruptedException { Iterator itr = values.iterator(); + System.out.println("step 6"); while (itr.hasNext()) { byte[] v = itr.next().getBytes(); + System.out.println("step6 has values reduce"); if (v.length > 0) { if (v[0] == 0) { //Rule 14b // System.out.println("In OWLHasValueReducer for 14b: "); // Added by Wugang @@ -72,8 +74,7 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRsubject(object); // v triple.setRpredicate(TriplesUtils.OWL_HAS_VALUE); // owl:hasValue triple.setRobject(triple.getObject()); // w -// System.out.println("In OWLHasValueReducer for 14b output: "+triple); // Added by Wugang - + System.out.println("In OWLHasValueReducer for 14b output: "+triple); // Added by Wugang CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step13"); // context.write(source, triple); } @@ -149,4 +150,12 @@ public void setup(Context context) throws IOException { e.printStackTrace(); } } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + _output.close(); + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java index b3c656a..7f06a45 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java @@ -45,7 +45,7 @@ protected void reduce(BytesWritable key, Iterable values, Context byte[] bytes = key.getBytes(); long rsubject=0, rpredicate=0, robject=0; long key1=0, key2=0, value1 = 0; - + switch(bytes[0]) { // case 0: // case 1: //Functional and inverse functional property @@ -58,6 +58,7 @@ protected void reduce(BytesWritable key, Iterable values, Context long minimum = Long.MAX_VALUE; set.clear(); Iterator itr = values.iterator(); + while (itr.hasNext()) { long value = itr.next().get(); value1 = value; // Added by Wugang���������ֵ������Functional������ԭʼ��Ԫ���object������Inverse Functional������ԭʼ��Ԫ���subject @@ -119,7 +120,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setRsubject(subject); triple.setRobject(object); triple.setType(TriplesUtils.OWL_HORST_3); - + itr = values.iterator(); while (itr.hasNext()) { triple.setPredicate(itr.next().get()); @@ -147,7 +148,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setRsubject(subject); triple.setRobject(object); triple.setRpredicate(predicate); - + /* I only output the last key of the inverse */ Collection inverse = schemaInverseOfProperties.get(predicate); if (inverse != null) { @@ -261,4 +262,12 @@ public void setup(Context context) throws IOException { } } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + _output.close(); + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructReducer.java index f003ea1..0aa5917 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructReducer.java @@ -92,4 +92,12 @@ public void setup(Context context) throws IOException { _output = new MrjMultioutput, List>(context); } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + _output.close(); + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java index 1721d7f..02004c8 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java @@ -95,4 +95,12 @@ public void setup(Context context) { oKey.setDerivation(TripleSource.OWL_DERIVED); oKey.setStep(context.getConfiguration().getInt("reasoner.step", 0)); } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + _output.close(); + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java index e8aab52..6e30412 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java @@ -118,4 +118,12 @@ public void setup(Context context) { source.setDerivation(TripleSource.TRANSITIVE_ENABLED); triple.setObjectLiteral(false); } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + _output.close(); + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java index 528f648..97e886b 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java @@ -2,7 +2,9 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -24,11 +26,20 @@ public class RDFSSpecialPropsReducer extends Reducer keys = new LinkedHashMap(); + private Map allkeys = new LinkedHashMap(); + private List allvariables = new ArrayList(); + private List allTValues = new ArrayList(); + private List stepsValues = new ArrayList(); @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { byte[] bKey = key.getBytes(); Iterator itr = values.iterator(); + + _output = new MrjMultioutput, List>(context); + + while (itr.hasNext()) { long value = itr.next().get(); if (value == TriplesUtils.RDFS_LITERAL && (bKey[0] == 0 || bKey[0] == 2)) @@ -58,7 +69,7 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, _output, "step4"); + CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step4"); // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subproperty of member").increment(1); break; @@ -73,7 +84,7 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_DATATYPE); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, _output, "step4"); + CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step4"); // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subclass of literal").increment(1); break; @@ -89,7 +100,7 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CLASS); context.getCounter("RDFS derived triples", "subclass of resource").increment(1); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, _output, "step4"); + CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step4"); //context.write(source, oTriple); break; case 4: // û�ж�Ӧ��rdfs rule�� @@ -103,17 +114,19 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) else oTriple.setObjectLiteral(true); context.getCounter("RDFS derived triples", "subproperty inheritance of member").increment(1); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, _output, "step4"); + CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step4"); // context.write(source, oTriple); default: break; } + + _output.close(); + } @Override public void setup(Context context) { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - _output = new MrjMultioutput, List>(context); source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); @@ -124,7 +137,6 @@ public void setup(Context context) { protected void cleanup( Reducer, List>.Context context) throws IOException, InterruptedException { - _output.close(); super.cleanup(context); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java index 1f60787..7ca4151 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java @@ -73,8 +73,6 @@ public void map(Long key, Row row, Context context) throws IOException, Interru } //Check if the predicate has a range - System.out.println("range " + rangeSchemaTriples); - System.out.println("row " + value); if (rangeSchemaTriples.contains(value.getPredicate()) && !value.isObjectLiteral()) { NumberUtils.encodeLong(bKey,0,value.getObject()); // Added by WuGang, 2010-08-26 @@ -94,27 +92,21 @@ protected void setup(Context context) throws IOException { try{ CassandraDB db = new CassandraDB(); - System.out.println("DB "); - System.out.println("domain : " + domainSchemaTriples + " range : " + rangeSchemaTriples); if (domainSchemaTriples == null) { - System.out.println("domain begin" + previousExecutionStep); domainSchemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_DOMAIN_PROPERTY); hasSchemaChanged = db.loadSetIntoMemory(domainSchemaTriples, filters, previousExecutionStep); - System.out.println("domain end"); // db not close } if (rangeSchemaTriples == null) { - System.out.println("rangeSchemaTriples begin: " + hasSchemaChanged + previousExecutionStep); rangeSchemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_RANGE_PROPERTY); hasSchemaChanged |= db.loadSetIntoMemory(rangeSchemaTriples, filters, previousExecutionStep); - System.out.println("rangeSchemaTriples end: " + hasSchemaChanged); db.CassandraDBClose(); } }catch(TTransportException tte){ diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java index 0f0c8dc..f8cd7c2 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java @@ -2,9 +2,11 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -44,7 +46,11 @@ public class RDFSSubclasReducer private TripleSource source = new TripleSource(); private Triple oTriple = new Triple(); private MultipleOutputs _output; - + private Map keys = new LinkedHashMap(); + private Map allkeys = new LinkedHashMap(); + private List allvariables = new ArrayList(); + private List allTValues = new ArrayList(); + private List stepsValues = new ArrayList(); private void recursiveScanSuperclasses(long value, Set set) { Collection subclassValues = subclassSchemaTriples.get(value); if (subclassValues != null) { @@ -110,8 +116,8 @@ public void reduce(BytesWritable key, Iterable values, oTriple.setObject(oType); for (long obj : existingURIs) { oTriple.setRobject(obj); - CassandraDB.writeJustificationToMapReduceMultipleOutputs( - oTriple, source, _output, "step3"); + CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step3"); + // context.write(source, oTriple); } } @@ -131,8 +137,8 @@ public void reduce(BytesWritable key, Iterable values, oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY); - CassandraDB.writeJustificationToMapReduceMultipleOutputs( - oTriple, source, _output, "step3"); + CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step3"); + // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subproperty of member").increment(1); @@ -153,8 +159,8 @@ public void reduce(BytesWritable key, Iterable values, oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_DATATYPE); - CassandraDB.writeJustificationToMapReduceMultipleOutputs( - oTriple, source, _output, "step3"); + CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step3"); + // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subclass of Literal").increment(1); @@ -176,8 +182,9 @@ public void reduce(BytesWritable key, Iterable values, oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CLASS); - CassandraDB.writeJustificationToMapReduceMultipleOutputs( - oTriple, source, _output, "step3"); + CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, + keys, allkeys, stepsValues, allTValues,"step3"); + // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subclass of resource").increment(1); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java index c328c69..a95a989 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java @@ -3,9 +3,11 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.AbstractMap; +import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -47,7 +49,11 @@ public class RDFSSubpropDomRangeReducer extends Reducer keys = new LinkedHashMap(); + private Map allkeys = new LinkedHashMap(); + private List allvariables = new ArrayList(); + private List allTValues = new ArrayList(); + private List stepsValues = new ArrayList(); @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { @@ -124,8 +130,8 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setRobject(uri_opposite); } - CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, - _output, "step2"); + CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step2"); + // logger.info("write " + (System.currentTimeMillis() - time)); time = System.currentTimeMillis(); // System.out.println("finish " + (System.currentTimeMillis() - time)); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java index 7d2cae9..c4315f2 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java @@ -6,6 +6,7 @@ import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -47,7 +48,11 @@ public class RDFSSubpropInheritReducer extends Reducer keys = new LinkedHashMap(); + private Map allkeys = new LinkedHashMap(); + private List allvariables = new ArrayList(); + private List allTValues = new ArrayList(); + private List stepsValues = new ArrayList(); private void recursiveScanSubproperties(long value, Set set) { Collection subprops = subpropSchemaTriples.get(value); @@ -66,8 +71,8 @@ private void recursiveScanSubproperties(long value, Set set) { @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { + byte[] bKey = key.getBytes(); - System.out.println("bkey " + bKey[0]); switch(bKey[0]) { case 2: case 3: // rdfs rule 7 @@ -87,6 +92,7 @@ public void reduce(BytesWritable key, Iterable values, Context con if (!propURIs.contains(value)) { recursiveScanSubproperties(value, propURIs); } + } Iterator itr3 = propURIs.iterator(); @@ -105,11 +111,9 @@ public void reduce(BytesWritable key, Iterable values, Context con // Modified by WuGang, 2010-08-26 while (itr3.hasNext()) { oTriple.setPredicate(itr3.next()); - for (LongWritable pre : values) { - oTriple.setRpredicate(pre.get()); - System.out.println("before w rule 7"); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, _output, "step1"); - System.out.println("after w rule 7"); + for (Long pre : list1) { + oTriple.setRpredicate(pre); + CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step1"); // context.write(source, oTriple); } } @@ -122,15 +126,13 @@ public void reduce(BytesWritable key, Iterable values, Context con propURIs.clear(); //filter the properties that are already present Iterator itr2 = values.iterator(); - List list = new ArrayList(); - System.out.println("rule 5 reduce "); + List list2 = new ArrayList(); while (itr2.hasNext()) { long value = itr2.next().get(); - list.add(value); + list2.add(value); if (!propURIs.contains(value)) { recursiveScanSubproperties(value, propURIs); } - System.out.println("itr2 values"); } Iterator itr4 = propURIs.iterator(); @@ -146,17 +148,12 @@ public void reduce(BytesWritable key, Iterable values, Context con // context.write(source, oTriple); // } // Modified by WuGang, 2010-08-26 - System.out.println("itr4 " + itr4.toString()); - System.out.println("itr4 hasNext " + itr4.hasNext()); + while (itr4.hasNext()) { - System.out.println("itr4 " + itr4.toString()); oTriple.setObject(itr4.next()); - System.out.println("values " + values.toString()); - for(Long obj:list){ + for(Long obj:list2){ oTriple.setRobject(obj); - System.out.println("before w rule 5"); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(oTriple, source, _output, "step1"); - System.out.println("before w rule 5"); + CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step1"); // context.write(source, oTriple); } @@ -168,6 +165,7 @@ public void reduce(BytesWritable key, Iterable values, Context con default: break; } + } @Override @@ -207,13 +205,18 @@ public void setup(Context context) throws IOException { oTriple2.setPredicate(TriplesUtils.RDF_TYPE); oTriple2.setObjectLiteral(false); + } @Override protected void cleanup( Reducer, List>.Context context) throws IOException, InterruptedException { + /* + * 不写close就会写不进数据库。 + */ _output.close(); + super.cleanup(context); } From 5c26cd8c725a856a58bb4342df1547e18037f30b Mon Sep 17 00:00:00 2001 From: Joe Date: Thu, 14 Jul 2016 09:29:49 +0800 Subject: [PATCH 15/16] Signed-off-by: Joe --- mrj-0.1/.classpath | 5 +- .../org.eclipse.core.resources.prefs | 1 - .../edu/neu/mitt/mrj/data/TripleSource.java | 11 - .../mrj/importtriples/FilesImportTriples.java | 35 +- ...tTriplesReconstructReducerToCassandra.java | 29 +- .../ImportTriplesSampleMapper.java | 2 +- .../edu/neu/mitt/mrj/io/dbs/CassandraDB.java | 489 ++---------------- .../justification/OWLHorstJustification.java | 4 +- .../reasoner/MapReduceReasonerJobConfig.java | 214 ++------ .../mrj/reasoner/ReasonedJustifications.java | 24 +- .../ReasonedJustificationsMapper.java | 13 +- .../ReasonedJustificationsReducer.java | 8 +- .../reasoner/owl/OWLAllSomeValuesMapper.java | 40 +- .../reasoner/owl/OWLAllSomeValuesReducer.java | 31 +- .../owl/OWLEquivalenceSCSPMapper.java | 2 +- .../owl/OWLEquivalenceSCSPReducer.java | 30 +- .../mrj/reasoner/owl/OWLHasValueMapper.java | 6 +- .../mrj/reasoner/owl/OWLHasValueReducer.java | 23 +- .../reasoner/owl/OWLNotRecursiveMapper.java | 2 +- .../reasoner/owl/OWLNotRecursiveReducer.java | 37 +- .../mitt/mrj/reasoner/owl/OWLReasoner.java | 143 ++--- .../owl/OWLSameAsDeconstructMapper.java | 8 +- .../owl/OWLSameAsDeconstructReducer.java | 23 +- .../mrj/reasoner/owl/OWLSameAsMapper.java | 11 +- .../owl/OWLSameAsReconstructMapper.java | 18 +- .../owl/OWLSameAsReconstructReducer.java | 47 +- .../mrj/reasoner/owl/OWLSameAsReducer.java | 20 +- .../owl/OWLSampleResourcesMapper.java | 1 - .../owl/OWLSampleResourcesReducer.java | 4 - .../reasoner/owl/OWLTransitivityMapper.java | 15 +- .../reasoner/owl/OWLTransitivityReducer.java | 28 +- .../mitt/mrj/reasoner/rdfs/RDFSReasoner.java | 48 +- .../reasoner/rdfs/RDFSSpecialPropsMapper.java | 14 +- .../rdfs/RDFSSpecialPropsReducer.java | 40 +- .../rdfs/RDFSSubPropDomRangeMapper.java | 37 +- .../rdfs/RDFSSubPropInheritMapper.java | 13 +- .../mrj/reasoner/rdfs/RDFSSubclasMapper.java | 17 +- .../mrj/reasoner/rdfs/RDFSSubclasReducer.java | 149 ++---- .../rdfs/RDFSSubpropDomRangeReducer.java | 186 +++---- .../rdfs/RDFSSubpropInheritReducer.java | 63 +-- .../src/prejustification/SelectInferRows.java | 143 +++++ .../prejustification/SelectInferRowsMap.java | 79 +++ .../SelectInferRowsReduce.java | 18 + 43 files changed, 711 insertions(+), 1420 deletions(-) create mode 100644 mrj-0.1/src/prejustification/SelectInferRows.java create mode 100644 mrj-0.1/src/prejustification/SelectInferRowsMap.java create mode 100644 mrj-0.1/src/prejustification/SelectInferRowsReduce.java diff --git a/mrj-0.1/.classpath b/mrj-0.1/.classpath index b1b26f7..e1c1f9a 100644 --- a/mrj-0.1/.classpath +++ b/mrj-0.1/.classpath @@ -5,8 +5,7 @@ - - - + + diff --git a/mrj-0.1/.settings/org.eclipse.core.resources.prefs b/mrj-0.1/.settings/org.eclipse.core.resources.prefs index 83eb0de..2dcd9cf 100644 --- a/mrj-0.1/.settings/org.eclipse.core.resources.prefs +++ b/mrj-0.1/.settings/org.eclipse.core.resources.prefs @@ -1,3 +1,2 @@ eclipse.preferences.version=1 -encoding//src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java=UTF-8 encoding//src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java=UTF-8 diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java index afbc721..8d5c320 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java @@ -16,20 +16,17 @@ public class TripleSource implements WritableComparable { byte derivation = 0; int step = 0; - int transitive_level = 0; @Override public void readFields(DataInput in) throws IOException { derivation = in.readByte(); step = in.readInt(); - transitive_level = in.readInt(); } @Override public void write(DataOutput out) throws IOException { out.write(derivation); out.writeInt(step); - out.writeInt(transitive_level); } @Override @@ -50,14 +47,6 @@ public void setStep(int step) { this.step = step; } - public int getTransitiveLevel() { - return transitive_level; - } - - public void setTransitiveLevel(int level) { - this.transitive_level = level; - } - public void setDerivation(byte ruleset) { derivation = ruleset; } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java index a647241..7140fbc 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java @@ -101,7 +101,7 @@ public void parseArgs(String[] args) { } public void sampleCommonResources(String[] args) throws Exception { -// System.out.println("��sampleCommonResources�����С�"); +// System.out.println("��sampleCommonResources�����С�"); Job job = createNewJob("Sample common resources"); //Input @@ -127,7 +127,7 @@ public void sampleCommonResources(String[] args) throws Exception { } public void assignIdsToNodes(String[] args) throws Exception { -// System.out.println("��assignIdsToNodes�����С�"); +// System.out.println("��assignIdsToNodes�����С�"); Job job = createNewJob("Deconstruct statements"); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); @@ -156,7 +156,7 @@ public void assignIdsToNodes(String[] args) throws Exception { } private void rewriteTriples(String[] args) throws Exception { -// System.out.println("��rewriteTriples�����С�"); +// System.out.println("��rewriteTriples�����С�"); Job job = createNewJob("Reconstruct statements"); @@ -184,12 +184,13 @@ private void rewriteTriples(String[] args) throws Exception { job.setOutputValueClass(List.class); job.setOutputFormatClass(CqlOutputFormat.class); - ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_ALLTRIPLES); + ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); // is it useful below line? //job.getConfiguration().set(CASSANDRA_PRIMARY_KEY, "(sub, pre, obj)"); - String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + - " SET " + CassandraDB.COLUMN_IS_LITERAL + "=? ,"+ CassandraDB.COLUMN_TRIPLE_TYPE + "=?" + ","+ CassandraDB.COLUMN_INFERRED_STEPS + "=0"; + String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; + CqlConfigHelper.setOutputCql(job.getConfiguration(), query); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); @@ -222,21 +223,13 @@ public static void main(String[] args) throws Exception { long time = System.currentTimeMillis(); int res = ToolRunner.run(new Configuration(), new FilesImportTriples(), args); // log.info("Import time: " + (System.currentTimeMillis() - time)); -// -// //Modified by LiYang 2015/4/10 -// CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); -// db.init(); -// // Modified -// db.createIndexOnTripleType(); -// //db.createIndexOnRule(); -// -// /* -// * Add by LiYang -// * 2015.7.19 -// */ -// //db.createIndexOnInferredSteps(); -// //db.createIndexOnTransitiveLevel(); -// db.CassandraDBClose(); + + //Modified by LiYang 2015/4/10 + CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); + db.init(); + db.createIndexOnTripleType(); + db.createIndexOnRule(); + db.CassandraDBClose(); System.out.println("Import time: " + (System.currentTimeMillis() - time)); System.exit(res); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java index 9933365..4b7acc3 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java @@ -2,7 +2,7 @@ * Project Name: mrj-0.1 * File Name: ImportTriplesReconstructReducerToCassandra.java * @author Gang Wu - * 2014锟斤拷10锟斤拷28锟斤拷 锟斤拷锟斤拷10:35:24 + * 2014��10��28�� ����10:35:24 * * Description: * Send reducer output to Cassandra DB by representing triples with ids @@ -16,11 +16,8 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.UUID; -import org.apache.cassandra.cli.CliParser.rowKey_return; import org.apache.cassandra.utils.ByteBufferUtil; -import org.apache.cassandra.utils.UUIDGen; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; import org.slf4j.Logger; @@ -31,7 +28,6 @@ import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; - /** * @author gibeo_000 * @@ -82,7 +78,7 @@ protected void reduce(LongWritable key, Iterable values, Context c } if (counter != 3) { - // Modified by WuGang 2010-12-3, 锟斤拷锟�?锟斤拷3元锟斤拷锟斤拷郑锟斤拷锟斤拷锟揭拷锟斤拷锟斤拷锟� + // Modified by WuGang 2010-12-3, ��������3Ԫ����֣�����Ҫ������ log.error("Found a non-triple when reconstructing. The count num is " + counter + ", and triple is " + oValue); // throw new IOException("Triple is not reconstructed!"); } @@ -93,22 +89,17 @@ protected void reduce(LongWritable key, Iterable values, Context c byte one = 1; byte zero = 0; - /* - keys.put("sub", ByteBufferUtil.bytes(oValue.getSubject())); - keys.put("pre", ByteBufferUtil.bytes(oValue.getPredicate())); - keys.put("obj", ByteBufferUtil.bytes(oValue.getObject())); - // the length of boolean type in cassandra is one byte!!!!!!!! - keys.put(CassandraDB.COLUMN_IS_LITERAL, oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); - keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); -// keys.put("id", ByteBufferUtil.bytes(UUIDGen.getTimeUUID())); - */ - + // Prepare composite key (sub, pre, obj) keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(oValue.getSubject())); keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(oValue.getPredicate())); keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(oValue.getObject())); + // the length of boolean type in cassandra is one byte!!!!!!!! keys.put(CassandraDB.COLUMN_IS_LITERAL, oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); - + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes(0)); // for original triple set 0 int + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(0L)); // for original triple set 0 long + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(0L)); // for original triple set 0 long + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(0L)); // for original triple set 0 long // Prepare variables, here is a boolean value for CassandraDB.COLUMN_IS_LITERAL List variables = new ArrayList(); @@ -116,9 +107,7 @@ protected void reduce(LongWritable key, Iterable values, Context c // the length of boolean type in cassandra is one byte!!!!!!!! // For column inferred, init it as false i.e. zero // variables.add(ByteBuffer.wrap(new byte[]{zero})); - variables.add(oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); - variables.add(ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); - + variables.add(ByteBufferUtil.bytes(0)); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple context.write(keys, variables); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java index 8614816..c1153f9 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java @@ -19,7 +19,7 @@ public class ImportTriplesSampleMapper extends Mapper preloadedURIs = TriplesUtils.getInstance().getPreloadedURIs(); protected void map(Text key, Text value, Context context) { - //System.out.println("��ImportTriplesSampleMapper��"); + System.out.println("��ImportTriplesSampleMapper��"); try { String[] uris = TriplesUtils.parseTriple(value.toString(), key.toString()); for(String uri : uris) { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index 9188ee7..dbfceca 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -39,7 +39,6 @@ import org.apache.cassandra.thrift.UnavailableException; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.hadoop.mapreduce.Reducer.Context; -import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.protocol.TProtocol; @@ -55,7 +54,6 @@ import cn.edu.neu.mitt.mrj.utils.TriplesUtils; import com.datastax.driver.core.Cluster; -import com.datastax.driver.core.Cluster.Builder; import com.datastax.driver.core.ResultSet; import com.datastax.driver.core.Row; import com.datastax.driver.core.Session; @@ -64,8 +62,12 @@ import com.datastax.driver.core.Statement; //modified cassandra java 2.0.5 import com.datastax.driver.core.TupleValue; +import com.datastax.driver.core.Cluster.Builder; +import com.datastax.driver.core.querybuilder.Delete.Where; +import com.datastax.driver.core.querybuilder.Insert; import com.datastax.driver.core.querybuilder.QueryBuilder; //modified +import com.datastax.driver.core.querybuilder.Select; /** @@ -74,17 +76,14 @@ */ public class CassandraDB { private static final Logger logger = LoggerFactory.getLogger(CassandraDB.class); - public static final String KEYSPACE = "mrjks0"; // mr.j keyspace + public static final String KEYSPACE = "mrjks"; // mr.j keyspace public static final String COLUMNFAMILY_JUSTIFICATIONS = "justifications"; // mr.j keyspace public static final String COLUMNFAMILY_RESOURCES = "resources"; // mr.j keyspace public static final String COLUMNFAMILY_RESULTS = "results"; // mr.j keyspace - - public static final String COLUMNFAMILY_ALLTRIPLES = "alltriples"; - public static final String COLUMN_SUB = "sub"; // mrjks.justifications.sub public static final String COLUMN_PRE = "pre"; // mrjks.justifications.pre public static final String COLUMN_OBJ = "obj"; // mrjks.justifications.obj - public static final String COLUMN_TRIPLE_TYPE = "tripletype" ; // mrjks.justifications.tripletype + public static final String COLUMN_TRIPLE_TYPE = "tripletype" ; // mrjks.justifications.tripletype public static final String COLUMN_IS_LITERAL = "isliteral" ; // mrjks.justifications.isliteral public static final String COLUMN_INFERRED_STEPS = "inferredsteps" ; // mrjks.justifications.inferredsteps public static final String COLUMN_RULE = "rule"; // mrjks.justifications.rule @@ -94,14 +93,14 @@ public class CassandraDB { public static final String COLUMN_ID = "id"; // mrjks.resources.id public static final String COLUMN_LABEL = "label"; // mrjks.resources.label public static final String COLUMN_JUSTIFICATION = "justification"; //mrjks.results.justification - public static final String COLUMN_TRANSITIVE_LEVELS = "transitivelevel"; // mrjks.results.step + public static final String COLUMN_STEP = "step"; // mrjks.results.step public static final String DEFAULT_HOST = cn.edu.neu.mitt.mrj.utils.Cassandraconf.host; public static final String DEFAULT_PORT = "9160"; // in version 2.1.2, cql3 port is 9042 public static final String CQL_PAGE_ROW_SIZE = "10000"; //3 modified by liyang - + // 2014-12-11, Very strange, this works around. public static final String CONFIG_LOCATION = cn.edu.neu.mitt.mrj.utils.Cassandraconf.CassandraConfFile; public static void setConfigLocation(){ @@ -178,79 +177,13 @@ private static void setupKeyspace(Cassandra.Iface client) } } - public static String getAlltripleSchema(){ - String ALLTRIPLE_SCHEMA = "CREATE TABLE " + KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + - " ( " + - COLUMN_SUB + " bigint, " + // partition key - COLUMN_PRE + " bigint, " + // partition key - COLUMN_OBJ + " bigint, " + // partition key - COLUMN_IS_LITERAL + " boolean, " + // partition key - COLUMN_TRIPLE_TYPE + " int, " + - COLUMN_INFERRED_STEPS + " int, " + - "PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + "," + COLUMN_OBJ + - ")) ) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; - return ALLTRIPLE_SCHEMA; - } - - public static String getStepsSchema(Integer step){ - String STEPS_SCHEMA = "CREATE TABLE " + CassandraDB.KEYSPACE + ".step" + step + - " ( " + - COLUMN_SUB + " bigint, " + - COLUMN_PRE + " bigint, " + - COLUMN_OBJ + " bigint, " + - COLUMN_RULE + " int, " + - COLUMN_V1 + " bigint, " + - COLUMN_V2 + " bigint, " + - COLUMN_V3 + " bigint, " + - COLUMN_TRANSITIVE_LEVELS + " int, " + - "PRIMARY KEY((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_RULE + - "), " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + - ")) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; - return STEPS_SCHEMA; - } - - public static String getStepsSchema(String cfName){ - String STEPS_SCHEMA = "CREATE TABLE " + CassandraDB.KEYSPACE + "." + cfName + - " ( " + - COLUMN_SUB + " bigint, " + - COLUMN_PRE + " bigint, " + - COLUMN_OBJ + " bigint, " + - COLUMN_RULE + " int, " + - COLUMN_V1 + " bigint, " + - COLUMN_V2 + " bigint, " + - COLUMN_V3 + " bigint, " + - COLUMN_TRANSITIVE_LEVELS + " int, " + - "PRIMARY KEY((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_RULE + - "), " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + - ")) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; - return STEPS_SCHEMA; - } - - public static String getStepsStatement(int step){ - String query = "INSERT INTO " + CassandraDB.KEYSPACE + ".step" + step + - " (sub, pre, obj, rule, v1, v2, v3, transitivelevel) VALUES(?, ?, ?, ?, ?, ?, ?, ?)"; - return query; - } - - public static String getStepsStatement(String cfName){ - String query = "INSERT INTO " + CassandraDB.KEYSPACE + "." + cfName + - " (sub, pre, obj, rule, v1, v2, v3, transitivelevel) VALUES(?, ?, ?, ?, ?, ?, ?, ?)"; - return query; - } - - public static String getAlltripleStatement(){ - return ("INSERT INTO " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + - " (sub, pre, obj, isliteral, tripletype, inferredsteps) VALUES(?, ?, ?, ?, ?, ?)"); - } - private static void setupTables(Cassandra.Iface client) throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { - - /* + // Create justifications table String query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + " ( " + @@ -264,10 +197,9 @@ private static void setupTables(Cassandra.Iface client) COLUMN_V2 + " bigint, " + COLUMN_V3 + " bigint, " + // COLUMN_TRIPLE_TYPE + " int, " + - COLUMN_INFERRED_STEPS + " int, " + // from this line, the fields are non-primary key - COLUMN_TRANSITIVE_LEVELS + " int, " + - " PRIMARY KEY ((" + COLUMN_IS_LITERAL + ", " + COLUMN_RULE + ", " + COLUMN_SUB + "), " + COLUMN_TRIPLE_TYPE + - ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key + " PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_IS_LITERAL + "), " + + COLUMN_TRIPLE_TYPE + ", " + COLUMN_RULE + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + //", " + COLUMN_TRIPLE_TYPE + " ) ) "; @@ -277,10 +209,9 @@ private static void setupTables(Cassandra.Iface client) } catch (InvalidRequestException e) { logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS, e); } - */ - + // Create resources table - String query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_RESOURCES + + query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_RESOURCES + " ( " + COLUMN_ID + " bigint, " + COLUMN_LABEL + " text, " + @@ -294,7 +225,6 @@ private static void setupTables(Cassandra.Iface client) logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_RESOURCES, e); } - /* // Create results table query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_RESULTS + " ( " + @@ -309,8 +239,6 @@ private static void setupTables(Cassandra.Iface client) catch (InvalidRequestException e) { logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_RESULTS, e); } - - //Create resultrow table String cquery = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + "." + "resultrows" + " ( " + @@ -330,31 +258,8 @@ private static void setupTables(Cassandra.Iface client) //", " + COLUMN_TRIPLE_TYPE + " ) ) "; client.execute_cql3_query(ByteBufferUtil.bytes(cquery), Compression.NONE, ConsistencyLevel.ONE); - */ - - //创建所有三元组的表 - String cquery = "CREATE TABLE " + KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + - " ( " + - COLUMN_SUB + " bigint, " + // partition key - COLUMN_PRE + " bigint, " + // partition key - COLUMN_OBJ + " bigint, " + // partition key - COLUMN_IS_LITERAL + " boolean, " + // partition key - COLUMN_TRIPLE_TYPE + " int, " + - COLUMN_INFERRED_STEPS + " int, " + - "PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + "," + COLUMN_OBJ + - ")) ) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; - - try { - logger.info("set up table " + "all triples"); - client.execute_cql3_query(ByteBufferUtil.bytes(cquery), Compression.NONE, ConsistencyLevel.ONE); - } catch (InvalidRequestException e) { - logger.error("failed to create table " + KEYSPACE + "." + "AllTriples", e); - } + - - query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + "(" + COLUMN_TRIPLE_TYPE + ")"; - client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); - } @@ -369,7 +274,7 @@ public CassandraDB(String host, Integer port) throws TTransportException { } public void CassandraDBClose(){ - //this.close(); + this.close(); } public void init() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ @@ -382,41 +287,6 @@ public Cassandra.Iface getDBClient(){ return client; } - - /** - * Get the row count according to the COLUMN_INFERRED_STEPS. - * @return row count. - */ - - /* - * Need to change - */ - - public long getRowCountAccordingInferredSteps(int level){ - //ALLOW FILTERING - String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + - " WHERE " + COLUMN_INFERRED_STEPS + " = " + level + " ALLOW FILTERING"; - - long num = 0; - try { - CqlResult cqlresult = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); - num = ByteBufferUtil.toLong(cqlresult.getRows().get(0).getColumns().get(0).value); - } catch (InvalidRequestException e) { - e.printStackTrace(); - } catch (UnavailableException e) { - e.printStackTrace(); - } catch (TimedOutException e) { - e.printStackTrace(); - } catch (SchemaDisagreementException e) { - e.printStackTrace(); - } catch (TException e) { - e.printStackTrace(); - } - - return num; - } - - //TriplesUtils.SYNONYMS_TABLE //TriplesUtils.TRANSITIVE_TRIPLE //TriplesUtils.DATA_TRIPLE_SAME_AS @@ -426,7 +296,7 @@ public long getRowCountAccordingInferredSteps(int level){ */ public long getRowCountAccordingTripleType(int tripletype){ //ALLOW FILTERING - String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + + String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " ALLOW FILTERING"; long num = 0; @@ -448,40 +318,6 @@ public long getRowCountAccordingTripleType(int tripletype){ return num; } - - /** - * Get the row count according to the triple type. - * @return row count. - */ - public long getRowCountAccordingTripleTypeWithLimitation(int tripletype, int limit){ - //ALLOW FILTERING - String query = ""; - if (limit <= 0) - query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + - " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " ALLOW FILTERING"; - else - query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + - " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " LIMIT " + limit + " ALLOW FILTERING "; - - long num = 0; - try { - CqlResult cqlresult = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); - num = ByteBufferUtil.toLong(cqlresult.getRows().get(0).getColumns().get(0).value); - } catch (InvalidRequestException e) { - e.printStackTrace(); - } catch (UnavailableException e) { - e.printStackTrace(); - } catch (TimedOutException e) { - e.printStackTrace(); - } catch (SchemaDisagreementException e) { - e.printStackTrace(); - } catch (TException e) { - e.printStackTrace(); - } - - return num; - } - /** * Get the row count according to the type of rule. @@ -523,7 +359,7 @@ public void insertResources(long id, String label) throws InvalidRequestExceptio args.add(ByteBufferUtil.bytes(label)); CqlPreparedResult p_result = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); CqlResult result = client.execute_prepared_cql3_query(p_result.itemId, args, ConsistencyLevel.ANY); - //logger.info("Number of results: " + result.getNum()); + logger.info("Number of results: " + result.getNum()); } // TODO it's wrong!!!!!!!!!! @@ -545,10 +381,10 @@ public static Triple readJustificationFromMapReduceRow(Row row){ long pre = row.getLong(CassandraDB.COLUMN_PRE); long obj = row.getLong(CassandraDB.COLUMN_OBJ); boolean isObjectLiteral = row.getBool(CassandraDB.COLUMN_IS_LITERAL); - long v1 = -1; - long v2 = -2; - long v3 = -3; - int rule = -4; + long v1 = row.getLong(CassandraDB.COLUMN_V1); + long v2 = row.getLong(CassandraDB.COLUMN_V2); + long v3 = row.getLong(CassandraDB.COLUMN_V3); + int rule = row.getInt(CassandraDB.COLUMN_RULE); result.setObject(obj); result.setObjectLiteral(isObjectLiteral); @@ -566,190 +402,7 @@ public static int readStepFromMapReduceRow(Row row){ return step; } - public static void writeJustificationToMapReduceMultipleOutputsLessObjects( - Triple triple, - TripleSource source, - MultipleOutputs output, - Map keys, - Map allkeys, - List stepsValues, - List allTValues, - String stepname) throws IOException, InterruptedException{ - - keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); - keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); - keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); - keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int - keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long - keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long - keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long - - allkeys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); - allkeys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); - allkeys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); - - allTValues.add(ByteBufferUtil.bytes(triple.getSubject())); - allTValues.add(ByteBufferUtil.bytes(triple.getPredicate())); - allTValues.add(ByteBufferUtil.bytes(triple.getObject())); - //用数字直接替代。 - allTValues.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{1}):ByteBuffer.wrap(new byte[]{0})); - allTValues.add(ByteBufferUtil.bytes( - TriplesUtils.getTripleType( - source, triple.getSubject(), - triple.getPredicate(), - triple.getObject()))); - allTValues.add(ByteBufferUtil.bytes((int)source.getStep())); - - stepsValues.add(ByteBufferUtil.bytes(triple.getSubject())); - stepsValues.add(ByteBufferUtil.bytes(triple.getPredicate())); - stepsValues.add(ByteBufferUtil.bytes(triple.getObject())); - stepsValues.add(ByteBufferUtil.bytes((int)triple.getType())); - stepsValues.add(ByteBufferUtil.bytes(triple.getRsubject())); - stepsValues.add(ByteBufferUtil.bytes(triple.getRpredicate())); - stepsValues.add(ByteBufferUtil.bytes(triple.getRobject())); - stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel())); - - output.write(CassandraDB.COLUMNFAMILY_ALLTRIPLES, null, allTValues); - output.write(stepname, null, stepsValues); - - /* - * 必要否?减速否? - */ - keys.clear(); - allkeys.clear(); - allTValues.clear(); - stepsValues.clear(); - - } - - public static void writeJustificationToMapReduceMultipleOutputs( - Triple triple, - TripleSource source, - MultipleOutputs output, - String stepname) throws IOException, InterruptedException{ - Map keys = new LinkedHashMap(); - Map allkeys = new LinkedHashMap(); - List allvariables = new ArrayList(); -// long time = System.currentTimeMillis(); - - byte one = 1; - byte zero = 0; - // Prepare composite key (sub, pre, obj) - keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); - keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); - keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); - // the length of boolean type in cassandra is one byte!!!!!!!! - keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int - keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long - keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long - keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long - - allkeys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); - allkeys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); - allkeys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); - - allvariables.add(ByteBufferUtil.bytes(source.getStep())); - allvariables.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); - allvariables.add(ByteBufferUtil.bytes((int)triple.getType())); - - // Prepare variables - List variables = new ArrayList(); -// variables.add(ByteBufferUtil.bytes(oValue.getSubject())); - // the length of boolean type in cassandra is one byte!!!!!!!! - // For column inferred, init it as false i.e. zero - //variables.add(ByteBuffer.wrap(new byte[]{zero})); - - variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); - - - - // Keys are not used for - // CqlBulkRecordWriter.write(Object key, List values), - // so it can be set to null. - // Only values are used there where the value correspond to - // the insert statement set in CqlBulkOutputFormat.setColumnFamilyInsertStatement() - // All triples columnfamily: - // sub, pre, obj, isliteral, tripletype, inferredsteps - // Steps columnfamily: - // sub, pre, obj, rule, v1, v2, v3, transitivelevel - - List allTValues = new ArrayList(); - allTValues.add(ByteBufferUtil.bytes(triple.getSubject())); - allTValues.add(ByteBufferUtil.bytes(triple.getPredicate())); - allTValues.add(ByteBufferUtil.bytes(triple.getObject())); - allTValues.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); - allTValues.add(ByteBufferUtil.bytes( - TriplesUtils.getTripleType( - source, triple.getSubject(), - triple.getPredicate(), - triple.getObject()))); - allTValues.add(ByteBufferUtil.bytes((int)source.getStep())); - - List stepsValues = new ArrayList(); - stepsValues.add(ByteBufferUtil.bytes(triple.getSubject())); - stepsValues.add(ByteBufferUtil.bytes(triple.getPredicate())); - stepsValues.add(ByteBufferUtil.bytes(triple.getObject())); - stepsValues.add(ByteBufferUtil.bytes((int)triple.getType())); - stepsValues.add(ByteBufferUtil.bytes(triple.getRsubject())); - stepsValues.add(ByteBufferUtil.bytes(triple.getRpredicate())); - stepsValues.add(ByteBufferUtil.bytes(triple.getRobject())); - stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel())); - -// time = System.currentTimeMillis(); - output.write(CassandraDB.COLUMNFAMILY_ALLTRIPLES, null, allTValues); -// System.out.println("wrote all " + (System.currentTimeMillis() - time)); -// System.out.println("write all " + (System.currentTimeMillis() - time));// _output.write(stepname, keys, variables); -// time = System.currentTimeMillis(); - output.write(stepname, null, stepsValues); -// System.out.println("wrote steps" + (System.currentTimeMillis() - time)); - - - } - public static void writeJustificationToMapReduceContext( - Triple triple, - TripleSource source, - Context context, - String stepname) throws IOException, InterruptedException{ - Map keys = new LinkedHashMap(); - Map allkeys = new LinkedHashMap(); - List allvariables = new ArrayList(); - long time = System.currentTimeMillis(); - - byte one = 1; - byte zero = 0; - - // Prepare composite key (sub, pre, obj) - keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); - keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); - keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); - // the length of boolean type in cassandra is one byte!!!!!!!! - keys.put(CassandraDB.COLUMN_IS_LITERAL, - triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); - int tripletype = TriplesUtils.DATA_TRIPLE; - if (triple.getType()==TriplesUtils.OWL_HORST_SYNONYMS_TABLE){ - tripletype = TriplesUtils.SYNONYMS_TABLE; // In this way, we can provide a special triple type for triples in synonyms table - }else{ - tripletype = TriplesUtils.getTripleType(source, triple.getSubject(), triple.getPredicate(), triple.getObject()); - } - keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(tripletype)); // Modified by WuGang 20150109 - keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int - keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long - keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long - keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long - - // Prepare variables - List variables = new ArrayList(); -// variables.add(ByteBufferUtil.bytes(oValue.getSubject())); - // the length of boolean type in cassandra is one byte!!!!!!!! - // For column inferred, init it as false i.e. zero -// variables.add(ByteBuffer.wrap(new byte[]{zero})); - variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple - variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); // It corresponds to COLUMN_TRANSITIVE_LEVEL, only useful in owl's transitive - context.write(keys, variables); - } - - public static void writealltripleToMapReduceContext( Triple triple, TripleSource source, Context context) throws IOException, InterruptedException{ @@ -784,7 +437,6 @@ public static void writealltripleToMapReduceContext( // For column inferred, init it as false i.e. zero // variables.add(ByteBuffer.wrap(new byte[]{zero})); variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple - variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); // It corresponds to COLUMN_TRANSITIVE_LEVEL, only useful in owl's transitive context.write(keys, variables); } @@ -916,24 +568,20 @@ public boolean loadSetIntoMemory( * add ALLOW FILTERING * 2015/6/12 */ - - - String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS + - " FROM " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + - " WHERE " + COLUMN_TRIPLE_TYPE + " = ? "; - System.out.println(query); + String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS + + " FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + " WHERE " + COLUMN_TRIPLE_TYPE + " = ? " + " ALLOW FILTERING"; +// System.out.println(query); CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); for (int filter : filters){ List list = new ArrayList(); list.add(ByteBufferUtil.bytes(filter)); -// System.out.println("filter " + filter); CqlResult result = client.execute_prepared_cql3_query(preparedResult.itemId, list, ConsistencyLevel.ONE); for(CqlRow row : result.rows){ Iterator columnsIt = row.getColumnsIterator(); Long sub = null, obj = null; - System.out.println("row : " + row); while (columnsIt.hasNext()) { Column column = columnsIt.next(); if (new String(column.getName()).equals(COLUMN_SUB)) @@ -947,11 +595,9 @@ public boolean loadSetIntoMemory( } } if (!inverted) - schemaTriples.add(sub); + schemaTriples.add(sub); else schemaTriples.add(obj); - - System.out.println("schema : " + schemaTriples); } } @@ -980,8 +626,8 @@ public Map> loadMapIntoMemory(Set filters, boole // Require an index created on COLUMN_TRIPLE_TYPE column String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS + - " FROM " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + - " WHERE " + COLUMN_TRIPLE_TYPE + " = ? ALLOW FILTERING"; //partitonkey + " FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + " WHERE " + COLUMN_TRIPLE_TYPE + " = ? " + " ALLOW FILTERING"; CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); @@ -1019,7 +665,7 @@ public Map> loadMapIntoMemory(Set filters, boole } } - logger.info("Time for CassandraDB's loadMapIntoMemory " + (System.currentTimeMillis() - startTime)); + logger.debug("Time for CassandraDB's loadMapIntoMemory " + (System.currentTimeMillis() - startTime)); return schemaTriples; } @@ -1027,64 +673,17 @@ public Map> loadMapIntoMemory(Set filters, boole // Created index on COLUMN_TRIPLE_TYPE column public void createIndexOnTripleType() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ - String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + "(" + COLUMN_TRIPLE_TYPE + ")"; - client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); - } - -// public void createIndexOnRule() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ -// String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_RULE + ")"; -// client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); -// } -// -// public void createIndexOnInferredSteps() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ -// String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_INFERRED_STEPS + ")"; -// client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); -// } -// -// public void createIndexOnTransitiveLevel() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ -// String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_TRANSITIVE_LEVELS + ")"; -// client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); -// } - - /* - - public void Index() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ - //createIndexOnInferredSteps(); - createIndexOnRule(); - createIndexOnTransitiveLevel(); - createIndexOnTripleType(); - System.out.println("IndexED"); - } - - public void DropTripleTypeIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ - String query = "DROP INDEX mrjks.justifications_tripletype_idx"; - client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); - } - - public void DropRuleIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ - String query = "DROP INDEX mrjks.justifications_rule_idx"; - client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); - } - - public void DropInferredStepsIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ - String query = "DROP INDEX mrjks.justifications_inferredSteps_idx"; + String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_TRIPLE_TYPE + ")"; client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); } - public void DropTransitiveLevelIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ - String query = "DROP INDEX mrjks.justifications_transitiveLevel_idx"; + public void createIndexOnRule() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_RULE + ")"; client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); } - - public void UnIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ - - this.DropInferredStepsIndex(); - this.DropRuleIndex(); - this.DropTransitiveLevelIndex(); - this.DropTripleTypeIndex(); - } - */ + // Added by WuGang 2015-06-08 + public static ResultSet getRows(){ Builder builder = Cluster.builder(); @@ -1100,12 +699,12 @@ public static ResultSet getRows(){ } public static boolean delornot = false; -/* + public static void removeOriginalTriples(){ if (delornot == true) return; delornot = true; - //ִ�в�Ӧ�жϡ� + //ִ�в�Ӧ�жϡ� Builder builder = Cluster.builder(); builder.addContactPoint(DEFAULT_HOST); SocketOptions socketoptions= new SocketOptions().setKeepAlive(true).setReadTimeoutMillis(10 * 10000).setConnectTimeoutMillis(5 * 10000); @@ -1124,8 +723,7 @@ public static void removeOriginalTriples(){ COLUMN_V2 + " bigint, " + COLUMN_V3 + " bigint, " + // COLUMN_TRIPLE_TYPE + " int, " + - COLUMN_INFERRED_STEPS + " int, " + // from this line is non-primary key - COLUMN_TRANSITIVE_LEVELS + " int, " + + COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key " PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_IS_LITERAL + "), " + COLUMN_TRIPLE_TYPE + ", " + COLUMN_RULE + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + //", " + COLUMN_TRIPLE_TYPE + @@ -1166,7 +764,7 @@ public static void removeOriginalTriples(){ session.execute(delete); System.out.println(row); } - */ + // SimpleClientDataStax scds = new SimpleClientDataStax(); // scds.connect(DEFAULT_HOST); // @@ -1202,7 +800,7 @@ public static void removeOriginalTriples(){ // scds.close(); -// } + } //create by LiYang // public static void createReasonTable(){ @@ -1242,16 +840,13 @@ public static void main(String[] args) { try { CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); db.init(); -// db.createIndexOnTripleType(); -// db.createIndexOnRule(); -// db.createIndexOnInferredSteps(); -// db.createIndexOnTransitiveLevel(); + db.createIndexOnTripleType(); + db.createIndexOnRule(); // db.insertResources(100, "Hello World!"); Set schemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); db.loadSetIntoMemory(schemaTriples, filters, 0); - //db.loadMapIntoMemory(filters, inverted) System.out.println(schemaTriples); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java index 369dc7a..5552170 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java @@ -158,8 +158,8 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio parseArgs(args); // Added by WuGang 2015-06-08 -// if (bClearOriginals) -// CassandraDB.removeOriginalTriples(); + if (bClearOriginals) + CassandraDB.removeOriginalTriples(); long total = 0; // Total justifications diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java index 8de3073..b0f8c8e 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java @@ -2,31 +2,26 @@ * Project Name: mrj-0.1 * File Name: MapReduceJobConfig.java * @author Gang Wu - * 2014��12��28�� ����10:44:16 + * 2014��12��28�� ����10:44:16 * * Description: * TODO */ package cn.edu.neu.mitt.mrj.reasoner; - import java.io.IOException; -import java.nio.ByteBuffer; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.cassandra.hadoop.ConfigHelper; -import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; import org.apache.cassandra.hadoop.cql3.CqlInputFormat; import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; /** * @author gibeo_000 @@ -36,115 +31,72 @@ public class MapReduceReasonerJobConfig { // Input from CassandraDB.COLUMNFAMILY_JUSTIFICATIONS - private static void configureCassandraInput(Job job, Set typeFilters, Set transitiveLevelFilters, int certainStep) { + private static void configureCassandraInput(Job job, Set filters) { //Set the input ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); // Should not use 9160 port in cassandra 2.1.2 because new cql3 port is 9042, please refer to conf/cassandra.yaml //ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); - ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_ALLTRIPLES); - if (typeFilters.size() == 0){ - - if (transitiveLevelFilters.size() == 0) - CqlConfigHelper.setInputCql(job.getConfiguration(), - "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + - " WHERE TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + - ") > ? AND TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + - ") <= ? ALLOW FILTERING"); -// "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + -// " WHERE TOKEN(" + -// CassandraDB.COLUMN_SUB + ", " + -// CassandraDB.COLUMN_PRE + ", " + -// CassandraDB.COLUMN_OBJ + ", " + -// CassandraDB.COLUMN_IS_LITERAL + -// ") > ? AND TOKEN(" + -// CassandraDB.COLUMN_SUB + ", " + -// CassandraDB.COLUMN_PRE + ", " + -// CassandraDB.COLUMN_OBJ + ", " + -// CassandraDB.COLUMN_IS_LITERAL + -// ") <= ? ALLOW FILTERING"); - else{ - Integer max = java.util.Collections.max(transitiveLevelFilters); - Integer min = java.util.Collections.min(transitiveLevelFilters); - - - CqlConfigHelper.setInputCql(job.getConfiguration(), - "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + - " WHERE TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + - ") > ? AND TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + - ") <= ? " + -// CassandraDB.COLUMN_INFERRED_STEPS + " = " + certainStep + " AND " + -// CassandraDB.COLUMN_TRANSITIVE_LEVELS + " >= " + min + " AND " + -// CassandraDB.COLUMN_TRANSITIVE_LEVELS + " <= " + max + - " ALLOW FILTERING"); - } - + ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + if (filters.size() == 0){ + CqlConfigHelper.setInputCql(job.getConfiguration(), + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " WHERE TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") > ? AND TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") <= ? ALLOW FILTERING"); } - else if (typeFilters.size() == 1){ - if (transitiveLevelFilters.size() != 0){ // stepFilter is only for handling transitive property - System.err.println("This is not supported!!!"); - return; - } - + else if (filters.size() == 1){ CqlConfigHelper.setInputCql(job.getConfiguration(), - "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + " WHERE TOKEN(" + CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + ") > ? AND TOKEN(" + CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + - ") <= ? "); -// ") <= ? AND " + -// CassandraDB.COLUMN_TRIPLE_TYPE + " = " + typeFilters.toArray()[0] + -// " ALLOW FILTERING"); + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") <= ? AND " + + CassandraDB.COLUMN_TRIPLE_TYPE + " = " + filters.toArray()[0] + + " ALLOW FILTERING"); }else{ - if (transitiveLevelFilters.size() != 0){ // stepFilter is only for handling transitive property - System.err.println("This is not supported!!!"); - return; - } - - // The support of IN clause in cassandra db's SELECT is restricted. // So we have to try to manually cluster the values in the filters. // see http://www.datastax.com/documentation/cql/3.1/cql/cql_reference/select_r.html#reference_ds_d35_v2q_xj__selectIN System.out.println("<<<<<<<>>>>>>>>"); System.out.println("<<<<<<<>>>>>>>>"); - Integer max = java.util.Collections.max(typeFilters); - Integer min = java.util.Collections.min(typeFilters); + Integer max = java.util.Collections.max(filters); + Integer min = java.util.Collections.min(filters); CqlConfigHelper.setInputCql(job.getConfiguration(), - "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + " WHERE TOKEN(" + CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + ") > ? AND TOKEN(" + CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + - ") <= ? "); -// + "AND " + -// CassandraDB.COLUMN_TRIPLE_TYPE + " >= " + min + " AND " + -// CassandraDB.COLUMN_TRIPLE_TYPE + " <= " + max + -// " ALLOW FILTERING"); + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") <= ? AND " + + CassandraDB.COLUMN_TRIPLE_TYPE + " >= " + min + " AND " + + CassandraDB.COLUMN_TRIPLE_TYPE + " <= " + max + + " ALLOW FILTERING"); // String strFilter = filters.toString(); // String strInFilterClause = strFilter.substring(1, strFilter.length()-1); // remove "[" and "]" characters of Set.toString() @@ -178,55 +130,39 @@ else if (typeFilters.size() == 1){ // Output to CassandraDB.COLUMNFAMILY_JUSTIFICATIONS - private static void configureCassandraOutput(Job job, int step) { + private static void configureCassandraOutput(Job job) { //Set the output job.setOutputKeyClass(Map.class); job.setOutputValueClass(List.class); - - job.setOutputFormatClass(CqlBulkOutputFormat.class); - CqlBulkOutputFormat.setColumnFamilySchema(job.getConfiguration(), "step" + step, CassandraDB.getStepsSchema(step)); -// System.out.println("Schema we set: " + CassandraDB.getStepsSchema(step)); -// System.out.println("Schema we get: " + CqlBulkOutputFormat.getColumnFamilySchema(job.getConfiguration(), "step"+step)); - CqlBulkOutputFormat.setColumnFamilyInsertStatement(job.getConfiguration(), "step"+step, CassandraDB.getStepsStatement(step)); - CqlBulkOutputFormat.setDeleteSourceOnSuccess(job.getConfiguration(), true); - + job.setOutputFormatClass(CqlOutputFormat.class); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); - ConfigHelper.setOutputKeyspace(job.getConfiguration(), CassandraDB.KEYSPACE); - ConfigHelper.setOutputColumnFamily(job.getConfiguration(), "step" + step); - - MrjMultioutput.addNamedOutput(job, CassandraDB.COLUMNFAMILY_ALLTRIPLES, CqlBulkOutputFormat.class, ByteBuffer.class, List.class); - MrjMultioutput.addNamedOutput(job, "step" + step, CqlBulkOutputFormat.class, ByteBuffer.class, List.class); -// CqlConfigHelper.setOutputCql(conf, "select * from step1"); + ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; + CqlConfigHelper.setOutputCql(job.getConfiguration(), query); } - // In each derivation, we may create a set of jobs - // certainStep is optional, if it is specified then we can use it to filter transitiveLevel with non-equal operator - // (see cql specification) + // In each derivation, we may create a set of jobs public static Job createNewJob(Class classJar, String jobName, - Set typeFilters, Set transitiveLevelFilters, int certainStep, int numMapTasks, int numReduceTasks, - boolean bConfigCassandraInput, boolean bConfigCassandraOutput, Integer step) + Set filters, int numMapTasks, int numReduceTasks, + boolean bConfigCassandraInput, boolean bConfigCassandraOutput) throws IOException { Configuration conf = new Configuration(); conf.setInt("maptasks", numMapTasks); - conf.set("input.filter", typeFilters.toString()); - - conf.set("mapreduce.output.bulkoutputformat.streamthrottlembits", "400"); - + conf.set("input.filter", filters.toString()); + Job job = new Job(conf); job.setJobName(jobName); job.setJarByClass(classJar); job.setNumReduceTasks(numReduceTasks); - job.setNumReduceTasks(8); - if (bConfigCassandraInput) - configureCassandraInput(job, typeFilters, transitiveLevelFilters, certainStep); + configureCassandraInput(job, filters); if (bConfigCassandraOutput) - configureCassandraOutput(job, step); - + configureCassandraOutput(job); // Added by WuGang 2010-05-25 System.out.println("Create a job - " + jobName); @@ -235,44 +171,6 @@ public static Job createNewJob(Class classJar, String jobName, return job; } -/* - public static void CreateTables(String jobname){ - Builder builder = Cluster.builder(); - builder.addContactPoint(CassandraDB.DEFAULT_HOST); - SocketOptions socketoptions= new SocketOptions().setKeepAlive(true).setReadTimeoutMillis(10 * 10000).setConnectTimeoutMillis(5 * 10000); - Cluster clu = builder.build(); - Session session = clu.connect(); - - String query = ""; - if(jobname == "RDFS special properties reasoning"){ - query = "CREATE TABLE IF NOT EXISTS " + "mrjks" + "." + jobname + - " ( " + - "sub" + " bigint, " + - "pre" + " bigint, " + - "obj" + " bigint, " + - "rule int, " + - "v1" + " bigint, " + - "v2" + " bigint, " + - "v3" + " bigint, " + - "transitiveleves int" + - ", primary key((sub, pre, obj, rule) ,v1, v2, v3 ))"; - } - else { - query = "CREATE TABLE IF NOT EXISTS " + "mrjks" + "." + jobname + - " ( " + - "sub" + " bigint, " + - "pre" + " bigint, " + - "obj" + " bigint, " + - "rule int, " + - "v1" + " bigint, " + - "v2" + " bigint, " + - "v3" + " bigint, " + - ", primary key((id, rule) ,v1, v2, v3))"; - } - - session.execute(query); - System.out.println(query); - System.out.println("--------Create Table----------"); - } - */ + + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java index adea7f8..e27c689 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java @@ -1,22 +1,34 @@ package cn.edu.neu.mitt.mrj.reasoner; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; +import org.apache.cassandra.hadoop.ColumnFamilyInputFormat; import org.apache.cassandra.hadoop.ConfigHelper; import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; import org.apache.cassandra.hadoop.cql3.CqlInputFormat; import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; +import org.apache.cassandra.thrift.SlicePredicate; +import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import prejustification.SelectInferRows; +import prejustification.SelectInferRowsMap; +import prejustification.SelectInferRowsReduce; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.utils.Cassandraconf; public class ReasonedJustifications extends Configured implements Tool{ public int run(String[] args) throws Exception{ @@ -25,7 +37,7 @@ public int run(String[] args) throws Exception{ Job job = new Job(conf); job.setJobName(" Test "); - job.setJarByClass(ReasonedJustifications.class); + job.setJarByClass(SelectInferRows.class); job.setNumReduceTasks(8); ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); @@ -36,13 +48,13 @@ public int run(String[] args) throws Exception{ " WHERE TOKEN(" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + - //CassandraDB.COLUMN_IS_LITERAL + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + ") > ? AND TOKEN(" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + - //CassandraDB.COLUMN_IS_LITERAL + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + ") <= ? ALLOW FILTERING"); CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); //Modifide by LiYang @@ -56,7 +68,7 @@ public int run(String[] args) throws Exception{ ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); String query = "UPDATE " + CassandraDB.KEYSPACE + "." + "resultrows" + - " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=?, " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "=?"; + " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; CqlConfigHelper.setOutputCql(job.getConfiguration(), query); job.setMapperClass(ReasonedJustificationsMapper.class); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java index dcd2230..7e719e1 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java @@ -31,13 +31,12 @@ public void map(Long keys, Row rows, Context context) throws IOException, Interr // ResultSet results = session.execute(statement); Integer inferredsteps; - Integer transitivelevel; // for (Row rows : row){ if (rows.getInt(CassandraDB.COLUMN_RULE) != 0) { String conKey; //***** - conKey = rows.getLong(CassandraDB.COLUMN_SUB) //��ʹ��ByteBufferUtil�� + conKey = rows.getLong(CassandraDB.COLUMN_SUB) //��ʹ��ByteBufferUtil�� + "-" + rows.getLong(CassandraDB.COLUMN_PRE) + "-" + rows.getLong(CassandraDB.COLUMN_OBJ) + "-" + rows.getBool(CassandraDB.COLUMN_IS_LITERAL) @@ -45,11 +44,10 @@ public void map(Long keys, Row rows, Context context) throws IOException, Interr + "-" + rows.getInt(CassandraDB.COLUMN_RULE) + "-" + rows.getLong(CassandraDB.COLUMN_V1) + "-" + rows.getLong(CassandraDB.COLUMN_V2) - + "-" + rows.getLong(CassandraDB.COLUMN_V3) - + "-" + rows.getInt(CassandraDB.COLUMN_INFERRED_STEPS); // Modified by WuGang, 2015-07-15 - transitivelevel = rows.getInt(CassandraDB.COLUMN_TRANSITIVE_LEVELS); // Added by WuGang, 2015-07-15 + + "-" + rows.getLong(CassandraDB.COLUMN_V3); + inferredsteps = rows.getInt(CassandraDB.COLUMN_INFERRED_STEPS); - context.write(new Text(conKey), new IntWritable(transitivelevel)); + context.write(new Text(conKey), new IntWritable(inferredsteps)); } //} @@ -72,8 +70,7 @@ public void setup(Context context) throws IOException, InterruptedException{ CassandraDB.COLUMN_V1 + " bigint, " + CassandraDB.COLUMN_V2 + " bigint, " + CassandraDB. COLUMN_V3 + " bigint, " + - CassandraDB.COLUMN_INFERRED_STEPS + " int, " + // from this line, fields are non-primary key - CassandraDB.COLUMN_TRANSITIVE_LEVELS + " int, " + + CassandraDB.COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key " PRIMARY KEY ((" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_IS_LITERAL + "), " + CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 + " ) ) "; diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java index 83d36db..cec9547 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java @@ -12,6 +12,8 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; +import com.sun.org.apache.xpath.internal.operations.Bool; + import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; public class ReasonedJustificationsReducer extends Reducer, List>{ @@ -34,10 +36,8 @@ public void reduce(Text key, Iterable values, Context context) thro //prepare the insert variables collection List variables = new ArrayList(); - int var_inferredsteps = Integer.parseInt(value.toString()); - variables.add(ByteBufferUtil.bytes(var_inferredsteps)); - int var_transitivelevel = Integer.parseInt(splitkeys[9]); - variables.add(ByteBufferUtil.bytes(var_transitivelevel)); + int var = Integer.parseInt(value.toString()); + variables.add(ByteBufferUtil.bytes(var)); context.write(keys, variables); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java index 8cad3d8..5fc2e89 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java @@ -36,7 +36,7 @@ public class OWLAllSomeValuesMapper extends Mapper values = someValues.get(value.getObject()); Iterator itr = values.iterator(); bKey[0] = 2; bValue[0] = 1; - bValue[17] = 0; // ��������һ��someValues + bValue[17] = 0; // ��������һ��someValues NumberUtils.encodeLong(bKey, 9, value.getSubject()); - NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��wд��value�� + NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��wд��value�� while (itr.hasNext()) { byte[] bytes = itr.next(); System.arraycopy(bytes, 0, bKey, 1, 8); System.arraycopy(bytes, 8, bValue, 1, 8); - context.write(oKey, oValue); //�������((p,x),v) -> ((p,x),(v,w,0)) + context.write(oKey, oValue); //�������((p,x),v) -> ((p,x),(v,w,0)) } } - // ��Ҫ���⴫��һ��v - if (allValues.containsKey(value.getObject())) { //�ҵ���һ��(w,rdf:type,v)��������Ԫ�飬����v����v owl:allValuesFrom u + // ��Ҫ���⴫��һ��v + if (allValues.containsKey(value.getObject())) { //�ҵ���һ��(w,rdf:type,v)��������Ԫ�飬����v����v owl:allValuesFrom u log.info("I met allValuesFrom: " + value); Collection values = allValues.get(value.getObject()); Iterator itr = values.iterator(); bKey[0] = 1; bValue[0] = 1; - bValue[17] = 1; // ��������һ��allValues + bValue[17] = 1; // ��������һ��allValues NumberUtils.encodeLong(bKey, 9, value.getSubject()); - NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��vд��value�� + NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��vд��value�� while (itr.hasNext()) { byte[] bytes = itr.next(); System.arraycopy(bytes, 0, bKey, 1, 8); System.arraycopy(bytes, 8, bValue, 1, 8); - context.write(oKey, oValue); //�������((p,w),u) -> ((p,w),(u,v,1)) + context.write(oKey, oValue); //�������((p,w),u) -> ((p,w),(u,v,1)) } } } else { - // onPropertySome�з��õ������е���onPropertySome�����õ����ԣ������е� v owl:someValuesFrom w��ʽ����Ԫ���е�w - if (onPropertySome.contains(value.getPredicate())) {//ijһ����Ԫ��u p x�е�p��һ��onPropertySome�����õ������� + // onPropertySome�з��õ������е���onPropertySome�����õ����ԣ������е� v owl:someValuesFrom w��ʽ����Ԫ���е�w + if (onPropertySome.contains(value.getPredicate())) {//ijһ����Ԫ��u p x�е�p��һ��onPropertySome�����õ������� //Rule 15 - someValuesFrom log.info("I met onPropertySome: " + value); bKey[0] = 2; @@ -103,19 +103,19 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup NumberUtils.encodeLong(bKey, 1, value.getPredicate()); NumberUtils.encodeLong(bKey, 9, value.getObject()); NumberUtils.encodeLong(bValue, 1, value.getSubject()); - context.write(oKey, oValue); //�����((p,x),(u,,)) value�ĺ�����field������ֵ,û�и�ֵ + context.write(oKey, oValue); //�����((p,x),(u,,)) value�ĺ�����field������ֵ,û�и�ֵ } - // onPropertyAll�з��õ������е���onPropertyAll�����õ����ԣ������е� v owl:allValuesFrom u��ʽ����Ԫ���е�u - if (onPropertyAll.contains(value.getPredicate())) {//ijһ����Ԫ��w p x�е�p��һ��onPropertyAll�����õ������� + // onPropertyAll�з��õ������е���onPropertyAll�����õ����ԣ������е� v owl:allValuesFrom u��ʽ����Ԫ���е�u + if (onPropertyAll.contains(value.getPredicate())) {//ijһ����Ԫ��w p x�е�p��һ��onPropertyAll�����õ������� //Rule 16 - allValuesFrom log.info("I met onPropertyAll: " + value); bKey[0] = 1; - bValue[0] = 0; // Added by WuGang, ԭ������������һ�У����ж����������Ԫ��ʱ�������������reduce�����л���ɴ��� + bValue[0] = 0; // Added by WuGang, ԭ������������һ�У����ж����������Ԫ��ʱ�������������reduce�����л���ɴ��� NumberUtils.encodeLong(bKey, 1, value.getPredicate()); NumberUtils.encodeLong(bKey, 9, value.getSubject()); NumberUtils.encodeLong(bValue, 1, value.getObject()); - context.write(oKey, oValue); //�������((p,w),(x,,)) value�ĺ�����field������ֵ,û�и�ֵ + context.write(oKey, oValue); //�������((p,w),(x,,)) value�ĺ�����field������ֵ,û�и�ֵ } } } @@ -123,7 +123,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup @Override public void setup(Context context) throws IOException { previousDerivation = context.getConfiguration().getInt("reasoner.previousDerivation", -1); - + // List filesProperty = MultiFilesReader.recursiveListStatus(context, "FILTER_ONLY_OWL_ON_PROPERTY"); // Map> allValuesTmp = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_OWL_ALL_VALUES", context); // Map> someValuesTmp = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_OWL_SOME_VALUES", context); @@ -212,7 +212,7 @@ protected void makeJoin(Map> onPropertyTmp, Context contex } if (allValuesTmp.containsKey(sub)) { - // col��Ӧ�������е�subject��Ӧ��object������subject��object����subject, owl:allValuesFrom, object + // col��Ӧ�������е�subject��Ӧ��object������subject��object����subject, owl:allValuesFrom, object Collection col = allValuesTmp.get(sub); if (col != null) { Iterator itr = col.iterator(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java index badaa85..e8bad41 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java @@ -9,7 +9,6 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -18,7 +17,6 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; public class OWLAllSomeValuesReducer extends Reducer, List> { @@ -30,9 +28,8 @@ public class OWLAllSomeValuesReducer extends Reducer resources = new LinkedList(); // Added by WuGang - private LinkedList others = new LinkedList(); // ��types����һ�� - private LinkedList s_a_types = new LinkedList(); // ��types����һ��,���ڴ洢��someValues(0)����allValues(1)���� - private MultipleOutputs _output; + private LinkedList others = new LinkedList(); // ��types����һ�� + private LinkedList s_a_types = new LinkedList(); // ��types����һ��,���ڴ洢��someValues(0)����allValues(1)���� @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { @@ -42,7 +39,7 @@ public void reduce(BytesWritable key, Iterable values, Context co resources.clear(); byte[] bKey = key.getBytes(); - long rSubject = NumberUtils.decodeLong(bKey, 9); // rSubject����key�ĵڶ���Long����ʼλ��Ϊ9���ͷ����һ��byte�� + long rSubject = NumberUtils.decodeLong(bKey, 9); // rSubject����key�ĵڶ���Long����ʼλ��Ϊ9���ͷ����һ��byte�� long predicate = NumberUtils.decodeLong(bKey, 1); // Added by WuGang 2010-07-14 Iterator itr = values.iterator(); @@ -51,7 +48,7 @@ public void reduce(BytesWritable key, Iterable values, Context co byte[] bValue = value.getBytes(); if (bValue[0] == 1) { //Type triple types.add(NumberUtils.decodeLong(bValue, 1)); - others.add(NumberUtils.decodeLong(bValue, 9)); // Added by WuGang, ��types����Ҫ���⴫��һ��long�ͣ���һ��byte + others.add(NumberUtils.decodeLong(bValue, 9)); // Added by WuGang, ��types����Ҫ���⴫��һ��long�ͣ���һ��byte s_a_types.add(bValue[17]); } else { //Resource triple resources.add(NumberUtils.decodeLong(bValue, 1)); @@ -69,7 +66,7 @@ public void reduce(BytesWritable key, Iterable values, Context co while (itrResource.hasNext()) { long resource = itrResource.next(); triple.setSubject(resource); - // ����Types���͵�value����someValuesΪ����((p,x),(v,w))����allValuesΪ����((p,w),(u,v)) + // ����Types���͵�value����someValuesΪ����((p,x),(v,w))����allValuesΪ����((p,w),(u,v)) Iterator itrTypes = types.listIterator(); Iterator itrOthers = others.listIterator(); Iterator itrSATypes = s_a_types.listIterator(); @@ -77,14 +74,14 @@ public void reduce(BytesWritable key, Iterable values, Context co long type = itrTypes.next(); triple.setObject(type); - // Added by WuGang����triple��ֵ + // Added by WuGang����triple��ֵ long other = itrOthers.next(); byte s_a_type = itrSATypes.next(); - triple.setRsubject(rSubject); // ��someValues������x,��allValues������w + triple.setRsubject(rSubject); // ��someValues������x,��allValues������w // Modified by WuGang 2010-07-14 // triple.setRpredicate(TriplesUtils.RDF_TYPE); //rdf:type triple.setRpredicate(predicate); - triple.setRobject(other); // ��someValues������w,��allValues������v + triple.setRobject(other); // ��someValues������w,��allValues������v switch (s_a_type) { case 0: triple.setType(TriplesUtils.OWL_HORST_15); @@ -98,7 +95,7 @@ public void reduce(BytesWritable key, Iterable values, Context co // System.out.println("Generate an extended triple for OWLAllSomeValues: " + triple); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step12"); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } } @@ -107,20 +104,10 @@ public void reduce(BytesWritable key, Iterable values, Context co @Override public void setup(Context context) { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - _output = new MrjMultioutput, List>(context); source.setDerivation(TripleSource.OWL_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); triple.setObjectLiteral(false); triple.setPredicate(TriplesUtils.RDF_TYPE); } - - @Override - protected void cleanup( - Reducer, List>.Context context) - throws IOException, InterruptedException { - _output.close(); - super.cleanup(context); - } - } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java index 3323bd6..ab4cfc0 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java @@ -85,7 +85,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup @Override public void setup(Context context) throws IOException { - + CassandraDB db; try { db = new CassandraDB(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java index 3bcb71b..c755300 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java @@ -16,7 +16,6 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -25,7 +24,6 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; @@ -35,8 +33,7 @@ public class OWLEquivalenceSCSPReducer extends Reducer> subpropSchemaTriples = null; public static Map> subclassSchemaTriples = null; public static Map> equivalenceClassesSchemaTriples = null; // Added by WuGang @@ -93,7 +90,7 @@ public void reduce(LongWritable key, Iterable values, Context con } } - if (!found) { // ��������ó��Ľ�� + if (!found) { // ���������ó��Ľ�� triple.setObject(resource); triple.setSubject(key.get()); triple.setPredicate(TriplesUtils.RDFS_SUBCLASS); @@ -110,8 +107,9 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRpredicate(TriplesUtils.OWL_EQUIVALENT_CLASS); triple.setRobject(triple.getSubject()); } + // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step11"); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } @@ -148,12 +146,12 @@ public void reduce(LongWritable key, Iterable values, Context con } // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step11"); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } //Subproperties - // Modified by WuGang,����ò��Ӧ����superProperties + // Modified by WuGang,����ò��Ӧ����superProperties // itr2 = equivalenceProperties.iterator(); itr2 = superProperties.iterator(); while (itr2.hasNext()) { @@ -182,12 +180,12 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRobject(triple.getObject()); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step11"); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } //Subclasses - // Modified by WuGang,����ò��Ӧ����superClasses + // Modified by WuGang,����ò��Ӧ����superClasses // itr2 = equivalenceClasses.iterator(); itr2 = superClasses.iterator(); while (itr2.hasNext()) { @@ -215,8 +213,9 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRsubject(triple.getSubject()); triple.setRpredicate(TriplesUtils.RDFS_SUBCLASS); triple.setRobject(triple.getObject()); + // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step11"); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } } @@ -224,7 +223,6 @@ public void reduce(LongWritable key, Iterable values, Context con @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - _output = new MrjMultioutput, List>(context); source.setDerivation(TripleSource.OWL_DERIVED); source.setStep((byte)context.getConfiguration().getInt("reasoner.step", 0)); @@ -274,12 +272,4 @@ public void setup(Context context) throws IOException { } } - - @Override - protected void cleanup( - Reducer, List>.Context context) - throws IOException, InterruptedException { - _output.close(); - super.cleanup(context); - } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java index 2ca8a07..3cd6514 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java @@ -43,7 +43,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup } //TODO: check whether also the schema is modified oKey.set(value.getSubject()); - if (value.getPredicate() == TriplesUtils.RDF_TYPE && // ����14b������value����(u rdf:type v)��Ŀ������reduce������(u p w)�����ǻ�Ҫ���14b(v owl:hasValue w) + if (value.getPredicate() == TriplesUtils.RDF_TYPE && // ����14b������value����(u rdf:type v)��Ŀ������reduce������(u p w)�����ǻ�Ҫ����14b(v owl:hasValue w) hasValue.contains(value.getObject()) && onProperty.contains(value.getObject())) { // System.out.println("In OWLHasValueMapper for 14b: " + value); // Added by Wugang @@ -52,7 +52,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup oValue.set(values, 0, 9); context.write(oKey, oValue); - } else if (value.getPredicate() != TriplesUtils.RDF_TYPE // ����14a������value����(u p w)��Ŀ������reduce������(u rdf:type v)�����ǻ�Ҫ���14a(v owl:hasValue w) + } else if (value.getPredicate() != TriplesUtils.RDF_TYPE // ����14a������value����(u p w)��Ŀ������reduce������(u rdf:type v)�����ǻ�Ҫ����14a(v owl:hasValue w) && hasValueInverted.contains(value.getObject()) && onPropertyInverted.contains(value.getPredicate())) { // System.out.println("In OWLHasValueMapper for 14a: " + value); // Added by Wugang @@ -71,7 +71,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup public void setup(Context context) throws IOException { previousStep = context.getConfiguration().getInt("reasoner.previousStep", -1); - + try{ CassandraDB db = new CassandraDB(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java index 424dc83..8a6a562 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java @@ -17,7 +17,6 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -26,7 +25,6 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; @@ -42,15 +40,12 @@ public class OWLHasValueReducer extends Reducer> hasValue2Map = new HashMap>(); private Map> onProperty2Map = new HashMap>(); - private MultipleOutputs _output; public void reduce(LongWritable key, Iterable values, Context context) throws IOException, InterruptedException { Iterator itr = values.iterator(); - System.out.println("step 6"); while (itr.hasNext()) { byte[] v = itr.next().getBytes(); - System.out.println("step6 has values reduce"); if (v.length > 0) { if (v[0] == 0) { //Rule 14b // System.out.println("In OWLHasValueReducer for 14b: "); // Added by Wugang @@ -74,8 +69,9 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRsubject(object); // v triple.setRpredicate(TriplesUtils.OWL_HAS_VALUE); // owl:hasValue triple.setRobject(triple.getObject()); // w - System.out.println("In OWLHasValueReducer for 14b output: "+triple); // Added by Wugang - CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step13"); +// System.out.println("In OWLHasValueReducer for 14b output: "+triple); // Added by Wugang + + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); // context.write(source, triple); } } @@ -101,11 +97,11 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setType(TriplesUtils.OWL_HORST_14a); triple.setRsubject(triple.getObject()); // v // triple.setRpredicate(TriplesUtils.OWL_HAS_VALUE); // owl:hasValue - triple.setRpredicate(predicate); // p // Modified by WuGang, 2010-08-26,�����Ϣ�������»ָ��� + triple.setRpredicate(predicate); // p // Modified by WuGang, 2010-08-26,�����Ϣ�������»ָ��� triple.setRobject(object); // w // System.out.println("In OWLHasValueReducer for 14a output: "+triple); // Added by Wugang - CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step13"); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); // context.write(source, triple); } } @@ -116,7 +112,6 @@ public void reduce(LongWritable key, Iterable values, Context con public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - _output = new MrjMultioutput, List>(context); source.setDerivation(TripleSource.OWL_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); @@ -150,12 +145,4 @@ public void setup(Context context) throws IOException { e.printStackTrace(); } } - - @Override - protected void cleanup( - Reducer, List>.Context context) - throws IOException, InterruptedException { - _output.close(); - super.cleanup(context); - } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java index d6bf4a6..ef5bce4 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java @@ -105,7 +105,7 @@ protected void setup(Context context) throws IOException { previousTransDerivation = context.getConfiguration().getInt("reasoner.previosTransitiveDerivation", -1); previousDerivation = context.getConfiguration().getInt("reasoner.previousDerivation", -1); hasSchemaChanged = false; - + try{ CassandraDB db = new CassandraDB(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java index 7f06a45..cc08af6 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java @@ -16,7 +16,6 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -25,7 +24,6 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; @@ -39,29 +37,27 @@ public class OWLNotRecursiveReducer extends Reducer set = new HashSet(); protected Map> schemaInverseOfProperties = null; - private MultipleOutputs _output; - + protected void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { byte[] bytes = key.getBytes(); long rsubject=0, rpredicate=0, robject=0; long key1=0, key2=0, value1 = 0; - + switch(bytes[0]) { // case 0: // case 1: //Functional and inverse functional property case 0: // Modified by WuGang, Functional case 1: // Modified by WuGang, Inverse Functional // System.out.println("Processing Functional & Inverse Functional Property."); - key1 = NumberUtils.decodeLong(bytes, 1); // ����Functional������subject������Inverse Functional������object + key1 = NumberUtils.decodeLong(bytes, 1); // ����Functional������subject������Inverse Functional������object key2 = NumberUtils.decodeLong(bytes, 9); // predicate long minimum = Long.MAX_VALUE; set.clear(); Iterator itr = values.iterator(); - while (itr.hasNext()) { long value = itr.next().get(); - value1 = value; // Added by Wugang���������ֵ������Functional������ԭʼ��Ԫ���object������Inverse Functional������ԭʼ��Ԫ���subject + value1 = value; // Added by Wugang���������ֵ������Functional������ԭʼ��Ԫ���object������Inverse Functional������ԭʼ��Ԫ���subject if (value < minimum) { if (minimum != Long.MAX_VALUE) set.add(minimum); @@ -101,7 +97,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setObject(object); // System.out.println("Find a derive in functional and inverse functional property!" + triple); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step5"); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); outputSize++; } context.getCounter("OWL derived triples", "functional and inverse functional property").increment(outputSize); @@ -120,13 +116,13 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setRsubject(subject); triple.setRobject(object); triple.setType(TriplesUtils.OWL_HORST_3); - + itr = values.iterator(); while (itr.hasNext()) { triple.setPredicate(itr.next().get()); triple.setRpredicate(triple.getPredicate()); // Added by WuGang // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step5"); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); context.getCounter("OWL derived triples", "simmetric property").increment(1); } @@ -148,7 +144,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setRsubject(subject); triple.setRobject(object); triple.setRpredicate(predicate); - + /* I only output the last key of the inverse */ Collection inverse = schemaInverseOfProperties.get(predicate); if (inverse != null) { @@ -158,7 +154,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setPredicate(derivedPredicate); // Only one of the inverse, the others will be completed in outputInverseOf() //triple.setPredicate(itrInverse.next()); // Commented by WuGang 2015-01-27 // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step5"); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); context.getCounter("OWL derived triples", "inverse of").increment(1); // Moved to here by WuGang, 2015-01-27 @@ -175,7 +171,7 @@ else if (bytes[0] == 1){ //Inverse Functional break; case 4: case 5: - // �ⲿ���Ƿ�����inferTransitivityStatements�д�����أ��˴���û���� + // �ⲿ���Ƿ�����inferTransitivityStatements�д������أ��˴���û���� //Transitive property. I copy to a temporary directory setting a special triple source subject = NumberUtils.decodeLong(bytes, 1); object = NumberUtils.decodeLong(bytes, 9); @@ -195,7 +191,7 @@ else if (bytes[0] == 1){ //Inverse Functional transitiveSource.setDerivation(TripleSource.TRANSITIVE_ENABLED); triple.setPredicate(Math.abs(predicate)); // context.write(transitiveSource, triple); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, transitiveSource, _output, "step5"); + CassandraDB.writeJustificationToMapReduceContext(triple, transitiveSource, context); context.getCounter("OWL derived triples", "transitive property input").increment(1); } default: @@ -217,7 +213,7 @@ private void outputInverseOf(long subject, long object, long predicate, Set, List>(context); source.setDerivation(TripleSource.OWL_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); @@ -262,12 +257,4 @@ public void setup(Context context) throws IOException { } } - - @Override - protected void cleanup( - Reducer, List>.Context context) - throws IOException, InterruptedException { - _output.close(); - super.cleanup(context); - } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java index fd7d0cf..85cb0d9 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java @@ -1,14 +1,9 @@ package cn.edu.neu.mitt.mrj.reasoner.owl; import java.io.IOException; -import java.nio.ByteBuffer; import java.util.HashSet; -import java.util.List; import java.util.Set; -import org.apache.cassandra.hadoop.ConfigHelper; -import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; -import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; @@ -22,12 +17,10 @@ import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.io.files.readers.FilesTriplesReader; import cn.edu.neu.mitt.mrj.partitioners.MyHashPartitioner; import cn.edu.neu.mitt.mrj.reasoner.MapReduceReasonerJobConfig; @@ -109,7 +102,7 @@ public static void main(String[] args) { try { OWLReasoner owlreasoner = new OWLReasoner(); owlreasoner.db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); -// owlreasoner.db.init(); + owlreasoner.db.init(); ToolRunner.run(new Configuration(), owlreasoner, args); } catch (Exception e) { @@ -211,10 +204,8 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter OWLReasoner.class, "OWL reasoner: infer properties inherited statements (not recursive), step " + step, new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), - new HashSet(), // not supported - step, // not used here numMapTasks, - numReduceTasks, true, true, 5); + numReduceTasks, true, true); job.getConfiguration().setInt("reasoner.step", step); job.getConfiguration().setInt("reasoner.previosTransitiveDerivation", previousTransitiveDerivation); job.getConfiguration().setInt("reasoner.previousDerivation", previousInferPropertiesDerivation); @@ -224,7 +215,7 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(OWLNotRecursiveReducer.class); - + job.waitForCompletion(true); @@ -258,42 +249,19 @@ private long inferTransitivityStatements(String[] args) int level = 0; //modified 2015/5/19 - long beforeInferCount = db.getRowCountAccordingTripleTypeWithLimitation(TriplesUtils.TRANSITIVE_TRIPLE, 1); + long beforeInferCount = db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE); while ((beforeInferCount > 0) && derivedNewStatements && shouldInferTransitivity) { // System.out.println("��ʼ��inferTransitivityStatements��whileѭ����Ѱ�ҡ�"); level++; - Set levels = new HashSet(); - levels.add(new Integer(level-1)); - if (level > 1) - levels.add(new Integer(level-2)); - //Configure input. Take only the directories that are two levels below - Job job = null; - - // for the first two level, we use the whole data in the database - if (level <= 2) - job = MapReduceReasonerJobConfig.createNewJob( - OWLReasoner.class, - "OWL reasoner: transitivity rule. Level " + level, - new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), - new HashSet(), - 0, - numMapTasks, - numReduceTasks, true, true, 6); - // for the level more than two, we only consider the last two level derived data in the current step - if (level > 2) - job = MapReduceReasonerJobConfig.createNewJob( - OWLReasoner.class, - "OWL reasoner: transitivity rule. Level " + level, - new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), - levels, - step, - numMapTasks, - numReduceTasks, true, true ,7); - - + Job job = MapReduceReasonerJobConfig.createNewJob( + OWLReasoner.class, + "OWL reasoner: transitivity rule. Level " + level, + new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + numMapTasks, + numReduceTasks, true, true); job.getConfiguration().setInt("reasoning.baseLevel", step); job.getConfiguration().setInt("reasoning.transitivityLevel", level); job.getConfiguration().setInt("maptasks", Math.max(numMapTasks / 10, 1)); @@ -304,14 +272,15 @@ private long inferTransitivityStatements(String[] args) job.setReducerClass(OWLTransitivityReducer.class); job.waitForCompletion(true); - long stepNotFilteredDerivation = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); - - long stepDerivation = 0; - if (stepNotFilteredDerivation > 0) { - stepDerivation = db.getRowCountAccordingInferredSteps(level); - } - derivation += stepDerivation; - derivedNewStatements = stepDerivation > 0; + + // About duplication, we will modify the checkTransitivity to return transitive triple counts + // and then do subtraction. + + long afterInferCount = db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE); + derivation = afterInferCount - beforeInferCount; + derivedNewStatements = (derivation > 0); + beforeInferCount = afterInferCount; // Update beforeInferCount + //System.out.println(" loop "); } previousTransitiveDerivation = step; @@ -327,7 +296,7 @@ private long inferSameAsStatements(String[] args) { try { boolean derivedSynonyms = true; int derivationStep = 1; -// long previousStepDerived = 0; // Added by WuGang 2015-01-30 + long previousStepDerived = 0; // Added by WuGang 2015-01-30 while (derivedSynonyms) { if (db.getRowCountAccordingTripleType(TriplesUtils.DATA_TRIPLE_SAME_AS)==0) // We need not to infer on SameAs @@ -339,26 +308,23 @@ private long inferSameAsStatements(String[] args) { OWLReasoner.class, "OWL reasoner: build the synonyms table from same as triples - step " + derivationStep++, filters, // FileUtils.FILTER_ONLY_HIDDEN.getClass(), - new HashSet(), // Added by WuGang, 2015-07-12 - step, // not used here numMapTasks, - numReduceTasks, true, true, 8); + numReduceTasks, true, true); job.setMapperClass(OWLSameAsMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLSameAsReducer.class); - + job.waitForCompletion(true); // System.out.println("In FilesOWLReasoner: " + job.getCounters().findCounter("synonyms", "replacements").getValue()); Counter cDerivedSynonyms = job.getCounters().findCounter("synonyms","replacements"); long currentStepDerived = cDerivedSynonyms.getValue(); // Added by WuGang 2015-01-30 - derivedSynonyms = currentStepDerived > 0; // Added by WuGang 2015-07-12 -// derivedTriples += currentStepDerived; -// derivedSynonyms = (currentStepDerived - previousStepDerived) > 0; // Modified by WuGang 2015-01-30 + derivedTriples += currentStepDerived; + derivedSynonyms = (currentStepDerived - previousStepDerived) > 0; // Modified by WuGang 2015-01-30 //derivedSynonyms = currentStepDerived > 0; -// previousStepDerived = currentStepDerived; // Added by WuGang 2015-01-30 + previousStepDerived = currentStepDerived; // Added by WuGang 2015-01-30 } //Filter the table. @@ -378,17 +344,11 @@ private long inferSameAsStatements(String[] args) { OWLReasoner.class, "OWL reasoner: sampling more common resources", new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), - new HashSet(), // Added by WuGang, 2015-07-12 - step, // not used here numMapTasks, - numReduceTasks, true, false, 9); // input from cassandra, but output to hdfs + numReduceTasks, true, false); // input from cassandra, but output to hdfs job.getConfiguration().setInt("reasoner.samplingPercentage", sampling); //Sampling at 10% job.getConfiguration().setInt("reasoner.threshold", resourceThreshold); //Threshold resources - /* - * output to hdfs - */ - job.setMapperClass(OWLSampleResourcesMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(LongWritable.class); @@ -401,7 +361,7 @@ private long inferSameAsStatements(String[] args) { SequenceFileOutputFormat.setOutputPath(job, commonResourcesPath); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); - + job.waitForCompletion(true); @@ -438,10 +398,8 @@ private long inferSameAsStatements(String[] args) { OWLReasoner.class, "OWL reasoner: replace triples using the sameAs synonyms: reconstruct triples", new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), - new HashSet(), // Added by WuGang, 2015-07-12 - step, // not used here numMapTasks, - numReduceTasks, false, true, 10); // input from hdfs, but output to cassandra + numReduceTasks, false, true); // input from hdfs, but output to cassandra SequenceFileInputFormat.addInputPath(job, tmpPath); job.setInputFormatClass(SequenceFileInputFormat.class); @@ -450,7 +408,6 @@ private long inferSameAsStatements(String[] args) { job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLSameAsReconstructReducer.class); - job.waitForCompletion(true); FileSystem fs = FileSystem.get(job.getConfiguration()); @@ -491,10 +448,8 @@ private long inferEquivalenceStatements(String[] args) throws IOException, Inter OWLReasoner.class, "OWL reasoner: infer equivalence from subclass and subprop. step " + step, filters, - new HashSet(), // Added by WuGang, 20150712 - step, // not used here numMapTasks, - numReduceTasks, true, true, 11); + numReduceTasks, true, true); job.getConfiguration().setInt("maptasks", Math.max(job.getConfiguration().getInt("maptasks", 0) / 10, 1)); job.getConfiguration().setInt("reasoner.step", step); @@ -502,7 +457,7 @@ private long inferEquivalenceStatements(String[] args) throws IOException, Inter job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLEquivalenceSCSPReducer.class); - + job.waitForCompletion(true); return job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); } @@ -514,7 +469,6 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, boolean derivedNewStatements = true; long totalDerivation = 0; int previousSomeAllValuesDerivation = -1; - boolean firstCycle = true; // Added by Wugang 20150111 //long countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15); // see OWLAllSomeValuesReducer @@ -522,15 +476,12 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, while (derivedNewStatements) { step++; - Job job = MapReduceReasonerJobConfig.createNewJob( OWLReasoner.class, "OWL reasoner: some and all values rule. step " + step, new HashSet(), - new HashSet(), - step, // not used here numMapTasks, - numReduceTasks, true, true, 12); + numReduceTasks, true, true); job.getConfiguration().setInt("reasoner.step", step); job.getConfiguration().setInt("reasoner.previousDerivation", previousSomeAllValuesDerivation); previousSomeAllValuesDerivation = step; @@ -539,7 +490,7 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLAllSomeValuesReducer.class); - + job.waitForCompletion(true); // Added by Wugang 20150111 @@ -547,28 +498,7 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, // countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer // totalDerivation = countRule15 + countRule16; - - Counter derivedTriples = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS"); - long notFilteredDerivation = derivedTriples.getValue(); - long stepDerivation = 0; - if (firstCycle) - notFilteredDerivation -= previousSomeAllValuesCycleDerivation; - if (notFilteredDerivation > 0) { - previousSomeAllValuesCycleDerivation += notFilteredDerivation; - //Modified by LiYang 2015/9/21 -// try { -// db.createIndexOnInferredSteps(); -// } catch (TException e) { -// // TODO Auto-generated catch block -// e.printStackTrace(); -// } - stepDerivation = db.getRowCountAccordingInferredSteps(step - 1); - totalDerivation += stepDerivation; - derivedNewStatements = stepDerivation > 0; - } else { - derivedNewStatements = false; - } - firstCycle = false; + derivedNewStatements = (totalDerivation > 0); } // Added by Wugang 20150111 @@ -594,10 +524,8 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup OWLReasoner.class, "OWL reasoner: hasValue rule. step " + step, new HashSet(), - new HashSet(), - step, // not used here numMapTasks, - numReduceTasks, true, true, 13); + numReduceTasks, true, true); long schemaOnPropertySize = db.getRowCountAccordingTripleType(TriplesUtils.SCHEMA_TRIPLE_ON_PROPERTY); if (schemaOnPropertySize == 0) @@ -619,8 +547,7 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup // countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a) - countRule14a; // see OWLAllSomeValuesReducer // countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b) - countRule14b; // see OWLAllSomeValuesReducer // return(countRule14a + countRule14b); - long stepDerivation = db.getRowCountAccordingInferredSteps(step - 1); - return stepDerivation; + return 0; } else { return 0; } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java index 0462b42..5b02e6f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java @@ -17,9 +17,9 @@ import cn.edu.neu.mitt.mrj.utils.FileUtils; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; + import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; -import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; public class OWLSameAsDeconstructMapper extends Mapper { @@ -82,8 +82,8 @@ public void map(TripleSource key, Triple value, Context context) throws IOExcept context.write(oKey, oValue); - //�����oKey��һ����Դ����ν�����п��ܣ�����sameas���͵Ļ��϶���һ����������Ҫ�ֱ���Ϊ��ν�����Σ� - //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte + //�����oKey��һ����Դ����ν�����п��ܣ�����sameas���͵Ļ��϶���һ����������Ҫ�ֱ���Ϊ��ν�����Σ� + //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte ++tripleId; } @@ -92,7 +92,7 @@ public void map(TripleSource key, Triple value, Context context) throws IOExcept @Override public void setup(Context context) { oValue = new BytesWritable(bValue); - + try { String taskId = context.getConfiguration().get("mapred.task.id").substring(context.getConfiguration().get("mapred.task.id").indexOf("_m_") + 3); taskId = taskId.replaceAll("_", ""); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java index 8d1a1a5..fa3135e 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java @@ -10,7 +10,6 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; -import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.utils.NumberUtils; public class OWLSameAsDeconstructReducer extends Reducer { @@ -18,7 +17,7 @@ public class OWLSameAsDeconstructReducer extends Reducer storage = new LinkedList(); @@ -39,9 +38,9 @@ public void reduce(LongWritable key, Iterable values, Context con byte[] bValue = iValue.getBytes(); // System.out.println("In processing things before storage, size of iValue is: " + iValue.getLength()); // System.out.println("In processing things before storage, size of bValue is: " + bValue.length); - // ���ڲ�һ���ں�ʱ��������bValue[0]=4���Ǹ�key��value�� - // ���������֮ǰ�Ƚ����滻��resource������storage�ﱣ����������һ��������֮��Ϳ���ֱ�ӽ����滻�ˣ���value�滻 - // ���(�����whileѭ��֮��)�ٰ�storage���汣���ֵ�����滻һ�¡� + // ���ڲ�һ���ں�ʱ��������bValue[0]=4���Ǹ�key��value�� + // ���������֮ǰ�Ƚ����滻��resource������storage�ﱣ����������һ��������֮��Ϳ���ֱ�ӽ����滻�ˣ���value�滻 + // ���(�����whileѭ��֮��)�ٰ�storage���汣���ֵ�����滻һ�¡� if (bValue[0] == 4) {//Same as long resource = NumberUtils.decodeLong(bValue, 1); replacement = true; @@ -55,14 +54,14 @@ public void reduce(LongWritable key, Iterable values, Context con byte[] bTempValue = new byte[15+8]; // Added by WuGang System.arraycopy(bValue, 0, bTempValue, 0, 15); // Added by WuGang System.arraycopy(bOriginalResource, 0, bTempValue, 15, 8); // Added by WuGang - iValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource + iValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource context.write(oKey, iValue); countOutput++; context.getCounter("reasoner", "substitutions").increment(1); } } - Iterator itr2 = storage.iterator(); //���storageΪ����˵���������������û�п�����sameas���滻�Ķ��� + Iterator itr2 = storage.iterator(); //���storageΪ����˵���������������û�п�����sameas���滻�Ķ��� while (itr2.hasNext()) { byte[] bValue = itr2.next(); oValue.set(bValue, 0, bValue.length); @@ -71,19 +70,15 @@ public void reduce(LongWritable key, Iterable values, Context con // System.out.println("In processing things in storage, size of bValue is: " + bValue.length); System.arraycopy(bValue, 0, bTempValue, 0, 15); // Added by WuGang System.arraycopy(bOriginalResource, 0, bTempValue, 15, 8); // Added by WuGang - oValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource + oValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource context.write(oKey, oValue); } - //�����oKey��һ����Դ������sameas����������Ķ����滻���ˣ���ʵ�����Ѿ�����sameas�滻����ˣ� - //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte + //�����oKey��һ����Դ������sameas����������Ķ����滻���ˣ���ʵ�����Ѿ�����sameas�滻����ˣ� + //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte if (replacement) { //Increment counter of replacements context.getCounter("reasoner", "substitutions").increment(countOutput + storage.size()); } } - public void setup(Context context) throws IOException, InterruptedException{ - CassandraDB.setConfigLocation(); - - } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java index ed4b73f..a526c85 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java @@ -29,7 +29,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup /* Source triple: s owl:sameAs o */ long olKey = 0; long olValue = 0; - if (value.getSubject() > value.getObject()) { //key�����Ǵ�ֵ��value����Сֵ + if (value.getSubject() > value.getObject()) { //key�����Ǵ�ֵ��value����Сֵ olKey = value.getSubject(); olValue = value.getObject(); } else { @@ -37,21 +37,18 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup olValue = value.getSubject(); } - // ����С�Ǹ�ֵ��ʶÿһ���� + // ����С�Ǹ�ֵ��ʶÿһ���� oKey.set(olKey); bValue[0] = 0; NumberUtils.encodeLong(bValue, 1, olValue); oValue.set(bValue, 0, bValue.length); - context.write(oKey, oValue); //����key�Ǵ�ֵ��value��Сֵ����������Ե�֪ÿһ��resource�������ĸ��� + context.write(oKey, oValue); //����key�Ǵ�ֵ��value��Сֵ����������Ե�֪ÿһ��resource�������ĸ��� oKey.set(olValue); bValue[0] = 1; NumberUtils.encodeLong(bValue, 1, olKey); oValue.set(bValue, 0, bValue.length); - context.write(oKey, oValue); //����key��Сֵ��value�Ǵ�ֵ����������Ե�֪ÿһ�����а�����Щresource - } - public void setup(Context context) throws IOException{ - + context.write(oKey, oValue); //����key��Сֵ��value�Ǵ�ֵ����������Ե�֪ÿһ�����а�����Щresource } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java index 887503b..827e360 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java @@ -8,7 +8,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.utils.NumberUtils; public class OWLSameAsReconstructMapper extends Mapper { @@ -18,26 +17,25 @@ public class OWLSameAsReconstructMapper extends Mapper values, Context context) throws IOException, InterruptedException { // System.out.println("In OWLSameAsReconstructReducer!!!"); @@ -31,31 +28,31 @@ public void reduce(BytesWritable key, Iterable values, Context co oKey.setDerivation(bKey[12]); int elements = 0; - Iterator itr = values.iterator(); ////�����1�ֽڿ�ʼ����owlsameas�������0�ֽڱ�������ڱ������滻��λ�ã����ܵ�ֵ��0,1,2,3,4��4�DZ�ʾν����owl:sameas������Ϊ��owl:sameas������0��ʾ���1��ʾν�2��3��ʾ��� + Iterator itr = values.iterator(); ////�����1�ֽڿ�ʼ����owlsameas�������0�ֽڱ�������ڱ������滻��λ�ã����ܵ�ֵ��0,1,2,3,4��4�DZ�ʾν����owl:sameas������Ϊ��owl:sameas������0��ʾ���1��ʾν�2��3��ʾ��� while (itr.hasNext()) { elements++; byte[] bValue = itr.next().getBytes(); - long resource = NumberUtils.decodeLong(bValue, 1); //�������owlsameas��������潫���������滻�� - long originalResource = NumberUtils.decodeLong(bValue, 9); // Added by Wugang, �����滻ǰ��resource + long resource = NumberUtils.decodeLong(bValue, 1); //�������owlsameas��������潫���������滻�� + long originalResource = NumberUtils.decodeLong(bValue, 9); // Added by Wugang, �����滻ǰ��resource switch (bValue[0]) { case 0: - oValue.setSubject(resource); //�滻���� - oValue.setRsubject(originalResource); // Added by Wugang, ԭʼ���� + oValue.setSubject(resource); //�滻���� + oValue.setRsubject(originalResource); // Added by Wugang, ԭʼ���� // System.out.println("Replacing subject: " + resource); break; case 1: - oValue.setPredicate(resource); //�滻ν�� - oValue.setRpredicate(originalResource); // Added by Wugang, ԭʼν�� + oValue.setPredicate(resource); //�滻ν�� + oValue.setRpredicate(originalResource); // Added by Wugang, ԭʼν�� // System.out.println("Replacing predicate: " + resource); break; - case 2: //�滻���� - case 3: //�滻���� + case 2: //�滻���� + case 3: //�滻���� if (bValue[0] == 2) oValue.setObjectLiteral(false); else oValue.setObjectLiteral(true); oValue.setObject(resource); - oValue.setRobject(originalResource); // Added by Wugang, ԭʼ���� + oValue.setRobject(originalResource); // Added by Wugang, ԭʼ���� // System.out.println("Replacing object: " + resource); break; default: @@ -64,24 +61,24 @@ public void reduce(BytesWritable key, Iterable values, Context co } if (elements == 3){ - // Added by WuGang, ���rule11 + // Added by WuGang, ���rule11 // oValue.setRsubject(rsubject) if ((oValue.getSubject() == oValue.getRsubject()) && (oValue.getPredicate() == oValue.getRpredicate()) && (oValue.getObject() == oValue.getRobject())) - oValue.setType(TriplesUtils.OWL_HORST_NA); // �滻ǰ��û�б仯������һ��sameasrule + oValue.setType(TriplesUtils.OWL_HORST_NA); // �滻ǰ��û�б仯������һ��sameasrule else { if ((oValue.getPredicate() == TriplesUtils.OWL_SAME_AS) && (oValue.getRpredicate() == TriplesUtils.OWL_SAME_AS)) - oValue.setType(TriplesUtils.OWL_HORST_7); // Ӧ��OWL Horst����7 + oValue.setType(TriplesUtils.OWL_HORST_7); // Ӧ��OWL Horst����7 else - oValue.setType(TriplesUtils.OWL_HORST_11); // Ӧ��OWL - // Horst����11 + oValue.setType(TriplesUtils.OWL_HORST_11); // Ӧ��OWL + // Horst����11 } // System.out.println("Find a complete replacment of triple: " + oValue); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(oValue, oKey, _output, "step10"); + CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context); // context.write(oKey, oValue); } } @@ -89,15 +86,5 @@ public void reduce(BytesWritable key, Iterable values, Context co @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - _output = new MrjMultioutput, List>(context); - - } - - @Override - protected void cleanup( - Reducer, List>.Context context) - throws IOException, InterruptedException { - _output.close(); - super.cleanup(context); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java index 02004c8..a7988da 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java @@ -11,14 +11,12 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; public class OWLSameAsReducer extends Reducer, List> { @@ -27,8 +25,7 @@ public class OWLSameAsReducer extends Reducer duplicates = new HashSet(); private List storage = new LinkedList(); - private MultipleOutputs _output; - + @Override public void reduce(LongWritable key, Iterable values, Context context) throws IOException, InterruptedException { @@ -47,12 +44,12 @@ public void reduce(LongWritable key, Iterable values, Context con BytesWritable value = itr.next(); long lValue = NumberUtils.decodeLong(value.getBytes(), 1); // System.out.println("processing " + lValue + " with the first byte is: " + value.getBytes()[0]); - if (value.getBytes()[0] != 0) { // 1��ÿһ��value����һ����Ա + if (value.getBytes()[0] != 0) { // 1��ÿһ��value����һ����Ա //Store in-memory storage.add(lValue); // System.out.println("Storage size is: " + storage.size()); //} - } else { // 0���ϲ�һ��resource�����ĸ����飨valueֵ�� + } else { // 0���ϲ�һ��resource�����ĸ����飨valueֵ�� // System.out.println("Prepare to repalce: lValue is " + lValue + " and oValue.getSubject() is " + oValue.getSubject()); if (lValue < oValue.getSubject()) { // System.out.println("Hahahahah, I'm here!"); @@ -68,7 +65,7 @@ public void reduce(LongWritable key, Iterable values, Context con long lValue = itr2.next(); if (!duplicates.contains(lValue)) { oValue.setObject(lValue); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(oValue, oKey, _output, "step8"); + CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context); duplicates.add(lValue); } } @@ -83,7 +80,6 @@ public void reduce(LongWritable key, Iterable values, Context con @Override public void setup(Context context) { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - _output = new MrjMultioutput, List>(context); oValue.setObjectLiteral(false); oValue.setPredicate(TriplesUtils.OWL_SAME_AS); @@ -95,12 +91,4 @@ public void setup(Context context) { oKey.setDerivation(TripleSource.OWL_DERIVED); oKey.setStep(context.getConfiguration().getInt("reasoner.step", 0)); } - - @Override - protected void cleanup( - Reducer, List>.Context context) - throws IOException, InterruptedException { - _output.close(); - super.cleanup(context); - } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java index 2c8aa57..ace1796 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java @@ -45,7 +45,6 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup } public void setup(Context context) { - threshold = context.getConfiguration().getInt("reasoner.samplingPercentage", 0); } } \ No newline at end of file diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java index 50dfe04..d2c658e 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java @@ -6,8 +6,6 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; -import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; - public class OWLSampleResourcesReducer extends Reducer { //private static Logger log = LoggerFactory.getLogger(OWLSampleResourcesReducer.class); @@ -36,8 +34,6 @@ public void reduce(LongWritable key, Iterable values, Context cont @Override public void setup(Context context) { - CassandraDB.setConfigLocation(); - threshold = context.getConfiguration().getInt("reasoner.threshold", 0); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java index 09232eb..b2b04bd 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java @@ -28,9 +28,7 @@ public class OWLTransitivityMapper extends Mapper minLevel) { + if (step > minLevel) { NumberUtils.encodeLong(keys,0,value.getPredicate()); NumberUtils.encodeLong(keys,8,value.getSubject()); oKey.set(keys, 0, 16); @@ -65,20 +63,19 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup values[0] = 3; else values[0] = 2; - NumberUtils.encodeLong(values, 1, level); + NumberUtils.encodeLong(values, 1, step); NumberUtils.encodeLong(values, 9, value.getObject()); oValue.set(values, 0, 17); context.write(oKey, oValue); } - //����u p w, w p v���������key��(p, w),�����value��(value[0], key.getStep(), value.getObject) + //����u p w, w p v���������key��(p, w),�����value��(value[0], key.getStep(), value.getObject) } } @Override public void setup(Context context) { - level = context.getConfiguration().getInt("reasoning.transitivityLevel", 0); baseLevel = context.getConfiguration().getInt("reasoning.baseLevel", 0) - 1; minLevel = Math.max(1, (int)Math.pow(2,level - 2)) + baseLevel; diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java index 6e30412..7ad71eb 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java @@ -10,7 +10,6 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -19,7 +18,6 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; public class OWLTransitivityReducer extends Reducer, List> { @@ -32,7 +30,6 @@ public class OWLTransitivityReducer extends Reducer values, Context context) throws IOException, InterruptedException { @@ -72,9 +69,9 @@ public void reduce(BytesWritable key, Iterable values, Context co triple.setPredicate(NumberUtils.decodeLong(key.getBytes(),0)); - // Added by WuGang,���extended triple������Ϊu p w,����w��һ���ؼ��resource�������ع�ԭʼ��ruleǰ�� + // Added by WuGang,���extended triple������Ϊu p w,����w��һ���ؼ���resource�������ع�ԭʼ��ruleǰ�� triple.setType(TriplesUtils.OWL_HORST_4); -// triple.setRsubject(rsubject); // �����������������ģ���μ�����Ĵ��� +// triple.setRsubject(rsubject); // �������������������ģ���μ�����Ĵ��� triple.setRpredicate(NumberUtils.decodeLong(key.getBytes(),0)); triple.setRobject(NumberUtils.decodeLong(key.getBytes(), 8)); @@ -90,15 +87,13 @@ public void reduce(BytesWritable key, Iterable values, Context co triple.setSubject(entry.getKey()); triple.setObject(entry2.getKey()); - // Added by Wugang, ���extended triple��ʵ�������rsubject�費���ö�����ν����Ϊ�˱������������ð� - triple.setRsubject(triple.getSubject()); // ��Ϊ��ѡȡu p w��Ϊ���triple��������е�u������������ĺ�������� + // Added by Wugang, ���extended triple��ʵ�������rsubject�費���ö�����ν����Ϊ�˱��������������ð� + triple.setRsubject(triple.getSubject()); // ��Ϊ��ѡȡu p w��Ϊ���triple��������е�u�������������ĺ�������� - // Modified by WuGang, 2015-07-15 - //source.setStep((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel)); - source.setTransitiveLevel((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel)); + source.setStep((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel)); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceMultipleOutputs(triple, source, _output, "step6"); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); // System.out.println("In OWLTransitivityReducer: " + triple); } @@ -109,21 +104,12 @@ public void reduce(BytesWritable key, Iterable values, Context co @Override public void setup(Context context) { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - _output = new MrjMultioutput, List>(context); + baseLevel = context.getConfiguration().getInt("reasoning.baseLevel", 1) - 1; level = context.getConfiguration().getInt("reasoning.transitivityLevel", -1); // Modified by WuGang 2015-01-28 //source.setDerivation(TripleSource.OWL_DERIVED); - source.setStep(baseLevel + 1); // Added by WuGang, 2015-07-15 source.setDerivation(TripleSource.TRANSITIVE_ENABLED); triple.setObjectLiteral(false); } - - @Override - protected void cleanup( - Reducer, List>.Context context) - throws IOException, InterruptedException { - _output.close(); - super.cleanup(context); - } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java index 722ea1f..b112445 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java @@ -2,20 +2,9 @@ import java.io.IOException; -import java.nio.ByteBuffer; import java.util.HashSet; -import java.util.List; import java.util.Set; -import org.apache.cassandra.hadoop.ColumnFamilyOutputFormat; -import org.apache.cassandra.hadoop.ConfigHelper; -import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; -import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; -import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; -import org.apache.cassandra.thrift.InvalidRequestException; -import org.apache.cassandra.thrift.SchemaDisagreementException; -import org.apache.cassandra.thrift.TimedOutException; -import org.apache.cassandra.thrift.UnavailableException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.BytesWritable; @@ -24,15 +13,11 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.reasoner.MapReduceReasonerJobConfig; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; -//import org.apache.hadoop.mapred.lib.MultipleOutputs; public class RDFSReasoner extends Configured implements Tool { @@ -41,7 +26,7 @@ public class RDFSReasoner extends Configured implements Tool { private int numReduceTasks = -1; public static int step = 0; private int lastExecutionPropInheritance = -1; - private int lastExecutionDomRange = -1; + private int lastExecutionDomRange = -1; private void parseArgs(String[] args) { @@ -84,25 +69,22 @@ public static void main(String[] args) { // The derivation will be launched in run() - public long launchDerivation(String[] args) throws IOException, InterruptedException, ClassNotFoundException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { + public long launchDerivation(String[] args) throws IOException, InterruptedException, ClassNotFoundException { long time = System.currentTimeMillis(); + parseArgs(args); Job job = null; long derivation = 0; - - + // RDFS subproperty inheritance reasoning // job = createNewJob("RDFS subproperty inheritance reasoning", "FILTER_ONLY_HIDDEN"); job = MapReduceReasonerJobConfig.createNewJob( RDFSReasoner.class, "RDFS subproperty inheritance reasoning", new HashSet(), - new HashSet(), // Added by WuGang, 2015-07-13 - step, // not used here numMapTasks, - numReduceTasks, true, true, 1); - + numReduceTasks, true, true); job.setMapperClass(RDFSSubPropInheritMapper.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); @@ -111,12 +93,10 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep job.getConfiguration().setInt("lastExecution.step", lastExecutionPropInheritance); lastExecutionPropInheritance = step; //TODO: configureOutputJob(job, args[0], "dir-rdfs-derivation/dir-subprop-inherit"); - job.waitForCompletion(true); long propInheritanceDerivation = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); derivation += propInheritanceDerivation; - // RDFS subproperty domain and range reasoning // job = createNewJob("RDFS subproperty domain and range reasoning", "FILTER_ONLY_HIDDEN"); @@ -124,17 +104,14 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep RDFSReasoner.class, "RDFS subproperty domain and range reasoning", new HashSet(), - new HashSet(), // Added by WuGang, 2015-07-13 - step, // not used here numMapTasks, - numReduceTasks, true, true, 2); + numReduceTasks, true, true); job.setMapperClass(RDFSSubPropDomRangeMapper.class); job.setMapOutputKeyClass(BytesWritable.class); // Modified by WuGang, 2010-08-26 job.setMapOutputValueClass(LongWritable.class); //job.setPartitionerClass(MyHashPartitioner.class); // Is this ok? seems not necessary job.setReducerClass(RDFSSubpropDomRangeReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); - job.getConfiguration().setInt("lastExecution.step", lastExecutionDomRange); lastExecutionDomRange = step; //TODO: configureOutputJob(job, args[0], "dir-rdfs-derivation/dir-subprop-domain-range"); @@ -145,7 +122,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep // RDFS cleaning up subprop duplicates // We remove it for simplicity. That means we will not support stop and restart from breakpoints - + //RDFS subclass reasoning // job = createNewJob("RDFS subclass reasoning", "FILTER_ONLY_TYPE_SUBCLASS"); @@ -155,16 +132,13 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep RDFSReasoner.class, "RDFS subclass reasoning", filters, - new HashSet(), // Added by WuGang, 2015-07-13 - step, // not used here numMapTasks, - numReduceTasks, true, true, 3); + numReduceTasks, true, true); job.setMapperClass(RDFSSubclasMapper.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(RDFSSubclasReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); - // configureOutputJob(job, args[0], "dir-rdfs-output/dir-subclass-" + step); job.waitForCompletion(true); derivation += job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); @@ -189,16 +163,14 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep RDFSReasoner.class, "RDFS special properties reasoning", filters, - new HashSet(), // Added by WuGang, 2015-07-13 - step, // not used here numMapTasks, - numReduceTasks, true, true, 4); + numReduceTasks, true, true); job.setMapperClass(RDFSSpecialPropsMapper.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(RDFSSpecialPropsReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); - + // configureOutputJob(job, args[0], "dir-rdfs-output/dir-special-props-" + step); job.waitForCompletion(true); derivation += job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java index 9a8e1b4..f915446 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java @@ -87,7 +87,8 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup } @Override - public void setup(Context context) throws IOException { + public void setup(Context context) throws IOException { + try{ CassandraDB db = new CassandraDB(); if (memberProperties == null) { @@ -126,16 +127,5 @@ public void setup(Context context) throws IOException { } catch (TException e) { e.printStackTrace(); } - } - -// protected void cleanup(Context context) throws IOException, InterruptedException{ -// try { -// CassandraDB db = new CassandraDB(); -// db.UnIndex(); -// db.CassandraDBClose(); -// } catch (Exception e) { -// // TODO: handle exception -// } -// } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java index 97e886b..fc5ea85 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java @@ -2,44 +2,29 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.util.ArrayList; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import org.apache.cassandra.thrift.Cassandra; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; public class RDFSSpecialPropsReducer extends Reducer, List> { private TripleSource source = new TripleSource(); private Triple oTriple = new Triple(); - private MultipleOutputs _output; - private Map keys = new LinkedHashMap(); - private Map allkeys = new LinkedHashMap(); - private List allvariables = new ArrayList(); - private List allTValues = new ArrayList(); - private List stepsValues = new ArrayList(); @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { byte[] bKey = key.getBytes(); Iterator itr = values.iterator(); - - _output = new MrjMultioutput, List>(context); - - while (itr.hasNext()) { long value = itr.next().get(); if (value == TriplesUtils.RDFS_LITERAL && (bKey[0] == 0 || bKey[0] == 2)) @@ -69,7 +54,7 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY); - CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step4"); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subproperty of member").increment(1); break; @@ -84,7 +69,7 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_DATATYPE); - CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step4"); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subclass of literal").increment(1); break; @@ -100,11 +85,11 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CLASS); context.getCounter("RDFS derived triples", "subclass of resource").increment(1); - CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step4"); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); //context.write(source, oTriple); break; - case 4: // û�ж�Ӧ��rdfs rule�� - case 5: // û�ж�Ӧ��rdfs rule�� + case 4: // û�ж�Ӧ��rdfs rule�� + case 5: // û�ж�Ӧ��rdfs rule�� oTriple.setSubject(NumberUtils.decodeLong(bKey, 1)); oTriple.setPredicate(TriplesUtils.RDFS_MEMBER); // oTriple.setPredicate(NumberUtils.decodeLong(bKey, 9)); @@ -114,29 +99,18 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) else oTriple.setObjectLiteral(true); context.getCounter("RDFS derived triples", "subproperty inheritance of member").increment(1); - CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step4"); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); // context.write(source, oTriple); default: break; } - - _output.close(); - } @Override public void setup(Context context) { - CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); - - } - - @Override - protected void cleanup( - Reducer, List>.Context context) - throws IOException, InterruptedException { - super.cleanup(context); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java index 7ca4151..9821e66 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java @@ -4,7 +4,6 @@ import java.util.HashSet; import java.util.Set; -import org.apache.cassandra.thrift.Cassandra.AsyncProcessor.system_add_column_family; import org.apache.cassandra.thrift.InvalidRequestException; import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TimedOutException; @@ -12,7 +11,6 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.Mapper.Context; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -62,14 +60,15 @@ public void map(Long key, Row row, Context context) throws IOException, Interru return; Triple value = CassandraDB.readJustificationFromMapReduceRow(row); + //Check if the predicate has a domain if (domainSchemaTriples.contains(value.getPredicate())) { NumberUtils.encodeLong(bKey,0,value.getSubject()); // Added by WuGang, 2010-08-26 NumberUtils.encodeLong(bKey,8,value.getObject()); // Added by WuGang, 2010-08-26 // oKey.set(value.getSubject()); oKey.set(bKey, 0, 16); // Modified by WuGang, 2010-08-26 - oValue.set(value.getPredicate() << 1); // ����ͨ��oValue�����һλ��0��ȷ������ǰ�������domain - context.write(oKey, oValue); // ��<, p>����ȥ, for rule 2 + oValue.set(value.getPredicate() << 1); // ����ͨ��oValue�����һλ��0��ȷ������ǰ��������domain + context.write(oKey, oValue); // ��<, p>����ȥ, for rule 2 } //Check if the predicate has a range @@ -79,8 +78,8 @@ public void map(Long key, Row row, Context context) throws IOException, Interru NumberUtils.encodeLong(bKey,8,value.getSubject()); // Added by WuGang, 2010-08-26 // oKey.set(value.getObject()); oKey.set(bKey, 0, 16); // Modified by WuGang, 2010-08-26 - oValue.set((value.getPredicate() << 1) | 1); // ����ͨ��oValue�����һλ��1��ȷ������ǰ�������range - context.write(oKey, oValue); // ��<, p>����ȥ, for rule 3 + oValue.set((value.getPredicate() << 1) | 1); // ����ͨ��oValue�����һλ��1��ȷ������ǰ��������range + context.write(oKey, oValue); // ��<, p>����ȥ, for rule 3 } } @@ -89,25 +88,23 @@ public void map(Long key, Row row, Context context) throws IOException, Interru protected void setup(Context context) throws IOException { hasSchemaChanged = false; previousExecutionStep = context.getConfiguration().getInt("lastExecution.step", -1); - - try{ - CassandraDB db = new CassandraDB(); + try{ + CassandraDB db = new CassandraDB(); if (domainSchemaTriples == null) { domainSchemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_DOMAIN_PROPERTY); hasSchemaChanged = db.loadSetIntoMemory(domainSchemaTriples, filters, previousExecutionStep); - // db not close } if (rangeSchemaTriples == null) { rangeSchemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_RANGE_PROPERTY); - + hasSchemaChanged |= db.loadSetIntoMemory(rangeSchemaTriples, filters, previousExecutionStep); - db.CassandraDBClose(); + db.CassandraDBClose(); } }catch(TTransportException tte){ tte.printStackTrace(); @@ -124,15 +121,13 @@ protected void setup(Context context) throws IOException { } // Some debug codes - System.out.println("In mapper setup, peviousExecutionStep= " + previousExecutionStep + " and hasSchemaChanged status: " + hasSchemaChanged); - System.out.println("Input split: " + context.getInputSplit()); - try { - System.out.println("Input split length: " + context.getInputSplit().getLength()); - } catch (InterruptedException e) { - e.printStackTrace(); - } +// System.out.println("In mapper setup, peviousExecutionStep= " + previousExecutionStep + " and hasSchemaChanged status: " + hasSchemaChanged); +// System.out.println("Input split: " + context.getInputSplit()); +// try { +// System.out.println("Input split length: " + context.getInputSplit().getLength()); +// } catch (InterruptedException e) { +// e.printStackTrace(); +// } } - - } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java index 04f66fe..8347faf 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java @@ -7,7 +7,6 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.Mapper.Context; import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -58,16 +57,15 @@ protected void map(Long key, Row row, Context context) throws IOException, Inter oKey.set(bKey, 0, 17); oValue.set(pre); context.write(oKey, oValue); -// System.out.println(" i " + i); } - + //Check suprop transitivity if (pre == TriplesUtils.RDFS_SUBPROPERTY && subpropSchemaTriples.contains(obj)) { //Write the 05 + subject bKey[0] = 5; NumberUtils.encodeLong(bKey, 1, sub); oKey.set(bKey, 0, 9); - oValue.set(obj); + oValue.set(obj); context.write(oKey, oValue); } } @@ -86,9 +84,7 @@ protected void setup(Context context) throws IOException { hasSchemaChanged = db.loadSetIntoMemory(subpropSchemaTriples, filters, previousExecutionStep); // hasSchemaChanged = FilesTriplesReader.loadSetIntoMemory(subpropSchemaTriples, context, // "FILTER_ONLY_SUBPROP_SCHEMA", previousExecutionStep); -// System.out.println("AAA"); -// db.createIndexOnInferredSteps(); -// System.out.println("create on inferredsteps"); + db.CassandraDBClose(); } catch (TException e) { e.printStackTrace(); @@ -96,8 +92,5 @@ protected void setup(Context context) throws IOException { } else { log.debug("Subprop schema triples already loaded in memory"); } - - } - } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java index 56bd6cb..67ffb1f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java @@ -5,7 +5,6 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.Mapper.Context; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,20 +36,6 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup oKey.set(bKey, 0, 9); context.write(oKey, oValue); -// System.out.println("׼����RDFSSubclasMapper-"+value); +// System.out.println("׼����RDFSSubclasMapper-"+value); } - - protected void setup(Context context) throws IOException, InterruptedException{ - - } -// protected void cleanup(Context context) throws IOException, InterruptedException{ -// try { -// CassandraDB db = new CassandraDB(); -// db.UnIndex(); -// db.CassandraDBClose(); -// } catch (Exception e) { -// // TODO: handle exception -// } -// } - } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java index f8cd7c2..64f43f2 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java @@ -2,11 +2,9 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -18,7 +16,6 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -27,17 +24,13 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; -public class RDFSSubclasReducer - extends - Reducer, List> { - - protected static Logger log = LoggerFactory - .getLogger(RDFSSubclasReducer.class); - +public class RDFSSubclasReducer extends Reducer, List> { + + protected static Logger log = LoggerFactory.getLogger(RDFSSubclasReducer.class); + public static Map> subclassSchemaTriples = null; protected Set subclasURIs = new HashSet(); protected Set existingURIs = new HashSet(); @@ -45,12 +38,7 @@ public class RDFSSubclasReducer protected Set specialSuperclasses = new HashSet(); private TripleSource source = new TripleSource(); private Triple oTriple = new Triple(); - private MultipleOutputs _output; - private Map keys = new LinkedHashMap(); - private Map allkeys = new LinkedHashMap(); - private List allvariables = new ArrayList(); - private List allTValues = new ArrayList(); - private List stepsValues = new ArrayList(); + private void recursiveScanSuperclasses(long value, Set set) { Collection subclassValues = subclassSchemaTriples.get(value); if (subclassValues != null) { @@ -66,69 +54,65 @@ private void recursiveScanSuperclasses(long value, Set set) { } @Override - public void reduce(BytesWritable key, Iterable values, - Context context) throws IOException, InterruptedException { - // System.out.println("����RDFSSubclasReducer��-"); + public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { +// System.out.println("����RDFSSubclasReducer��-"); existingURIs.clear(); Iterator itr = values.iterator(); while (itr.hasNext()) { long value = itr.next().get(); - existingURIs.add(value); // ���еı��� + existingURIs.add(value); //���еı��� } - + Iterator oTypes = existingURIs.iterator(); subclasURIs.clear(); while (oTypes.hasNext()) { long existingURI = oTypes.next(); - recursiveScanSuperclasses(existingURI, subclasURIs); // subclasURIs�����е�subclass + recursiveScanSuperclasses(existingURI, subclasURIs); //subclasURIs�����е�subclass } - + subclasURIs.removeAll(existingURIs); - + oTypes = subclasURIs.iterator(); byte[] bKey = key.getBytes(); - long oKey = NumberUtils.decodeLong(bKey, 1); + long oKey = NumberUtils.decodeLong(bKey,1); oTriple.setSubject(oKey); boolean typeTriple = bKey[0] == 0; - if (!typeTriple) { // It's a subclass triple - oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); // Rule 11 + if (!typeTriple) { //It's a subclass triple + oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); // Rule 11 // Added by WuGang, 2010-08-26 oTriple.setType(TriplesUtils.RDFS_11); oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDFS_SUBCLASS); - } else { // It's a type triple - oTriple.setPredicate(TriplesUtils.RDF_TYPE); // Rule 9 + } else { //It's a type triple + oTriple.setPredicate(TriplesUtils.RDF_TYPE); // Rule 9 // Added by WuGang, 2010-08-26 oTriple.setType(TriplesUtils.RDFS_9); oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); } - // while (oTypes.hasNext()) { - // long oType = oTypes.next(); - // oTriple.setObject(oType); - // context.write(source, oTriple); - // } +// while (oTypes.hasNext()) { +// long oType = oTypes.next(); +// oTriple.setObject(oType); +// context.write(source, oTriple); +// } // Modified by WuGang, 2010-08-26 while (oTypes.hasNext()) { long oType = oTypes.next(); oTriple.setObject(oType); for (long obj : existingURIs) { oTriple.setRobject(obj); - CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step3"); - - // context.write(source, oTriple); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); +// context.write(source, oTriple); } - } - + } + if (typeTriple) { /* Check special rules */ - if ((subclasURIs - .contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY) || existingURIs - .contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY)) - && !memberProperties.contains(oTriple.getSubject())) { // Rule - // 12���μ�RDFSSpecialPropsReducer + if ((subclasURIs.contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY) + || existingURIs.contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY)) + && !memberProperties.contains(oTriple.getSubject())) { // Rule 12���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBPROPERTY); oTriple.setObject(TriplesUtils.RDFS_MEMBER); // Added by WuGang, 2010-08-26 @@ -137,20 +121,16 @@ public void reduce(BytesWritable key, Iterable values, oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY); - CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step3"); - - // context.write(source, oTriple); - context.getCounter("RDFS derived triples", - "subproperty of member").increment(1); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); +// context.write(source, oTriple); + context.getCounter("RDFS derived triples", "subproperty of member").increment(1); } - + if (subclasURIs.contains(TriplesUtils.RDFS_DATATYPE) || existingURIs.contains(TriplesUtils.RDFS_DATATYPE)) { specialSuperclasses.clear(); - recursiveScanSuperclasses(oTriple.getSubject(), - specialSuperclasses); - if (!specialSuperclasses.contains(TriplesUtils.RDFS_LITERAL)) { // Rule - // 13���μ�RDFSSpecialPropsReducer + recursiveScanSuperclasses(oTriple.getSubject(), specialSuperclasses); + if (!specialSuperclasses.contains(TriplesUtils.RDFS_LITERAL)) { // Rule 13���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); oTriple.setObject(TriplesUtils.RDFS_LITERAL); // Added by WuGang, 2010-08-26 @@ -159,21 +139,17 @@ public void reduce(BytesWritable key, Iterable values, oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_DATATYPE); - CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step3"); - - // context.write(source, oTriple); - context.getCounter("RDFS derived triples", - "subclass of Literal").increment(1); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); +// context.write(source, oTriple); + context.getCounter("RDFS derived triples", "subclass of Literal").increment(1); } } - + if (subclasURIs.contains(TriplesUtils.RDFS_CLASS) || existingURIs.contains(TriplesUtils.RDFS_CLASS)) { specialSuperclasses.clear(); - recursiveScanSuperclasses(oTriple.getSubject(), - specialSuperclasses); - if (!specialSuperclasses.contains(TriplesUtils.RDFS_RESOURCE)) { // Rule - // 8���μ�RDFSSpecialPropsReducer + recursiveScanSuperclasses(oTriple.getSubject(), specialSuperclasses); + if (!specialSuperclasses.contains(TriplesUtils.RDFS_RESOURCE)) { // Rule 8���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); oTriple.setObject(TriplesUtils.RDFS_RESOURCE); // Added by WuGang, 2010-08-26 @@ -182,30 +158,23 @@ public void reduce(BytesWritable key, Iterable values, oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CLASS); - CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, - keys, allkeys, stepsValues, allTValues,"step3"); - - // context.write(source, oTriple); - context.getCounter("RDFS derived triples", - "subclass of resource").increment(1); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); +// context.write(source, oTriple); + context.getCounter("RDFS derived triples", "subclass of resource").increment(1); } } } - - // Update the counters + + //Update the counters if (typeTriple) - context.getCounter("RDFS derived triples", - "subclass inheritance rule").increment(subclasURIs.size()); + context.getCounter("RDFS derived triples", "subclass inheritance rule").increment(subclasURIs.size()); else - context.getCounter("RDFS derived triples", - "subclass transitivity rule").increment(subclasURIs.size()); + context.getCounter("RDFS derived triples", "subclass transitivity rule").increment(subclasURIs.size()); } - + @Override public void setup(Context context) throws IOException { - CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works - // around. - _output = new MrjMultioutput, List>(context); + CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. if (subclassSchemaTriples == null) { CassandraDB db; @@ -229,17 +198,17 @@ public void setup(Context context) throws IOException { e.printStackTrace(); } } - + if (memberProperties == null) { CassandraDB db; try { db = new CassandraDB(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_MEMBER_SUBPROPERTY); - + memberProperties = new HashSet(); db.loadSetIntoMemory(memberProperties, filters, -1); - + db.CassandraDBClose(); } catch (TTransportException e) { e.printStackTrace(); @@ -253,20 +222,10 @@ public void setup(Context context) throws IOException { e.printStackTrace(); } catch (TException e) { e.printStackTrace(); - } + } } source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); - - } - - @Override - protected void cleanup( - Reducer, List>.Context context) - throws IOException, InterruptedException { - - _output.close(); - super.cleanup(context); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java index a95a989..d774a6d 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java @@ -3,11 +3,9 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.AbstractMap; -import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -17,11 +15,9 @@ import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TimedOutException; import org.apache.cassandra.thrift.UnavailableException; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -30,7 +26,6 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; @@ -47,120 +42,75 @@ public class RDFSSubpropDomRangeReducer extends Reducer keys = new LinkedHashMap(); - private Map allkeys = new LinkedHashMap(); - private List allvariables = new ArrayList(); - private List allTValues = new ArrayList(); - private List stepsValues = new ArrayList(); @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { - byte[] bKey = key.getBytes(); // Added by Wugang, 2010-08-26 - // long uri = key.get(); //��domain���ԣ���s����range���ԣ���o - long uri = NumberUtils.decodeLong(bKey, 0); // ��domain������s����range������o - long uri_opposite = NumberUtils.decodeLong(bKey, 8); // ��domain������o����range������s - - Configuration conf = context.getConfiguration(); - derivedProps.clear(); // ���x - - Logger logger = LoggerFactory.getLogger(CassandraDB.class); - long time = System.currentTimeMillis(); - - // Get the predicates with a range or domain associated to this URIs - propURIs.clear(); - Iterator itr = values.iterator(); - while (itr.hasNext()) - propURIs.add(itr.next().get()); // ���p - -// logger.info("while1 " + (System.currentTimeMillis() - time)); -// System.out.println("while1 " + (System.currentTimeMillis() - time)); - - Iterator itrProp = propURIs.iterator(); - while (itrProp.hasNext()) { - Collection objects = null; - long propURI = itrProp.next(); - if ((propURI & 0x1) == 1) { - objects = rangeSchemaTriples.get(propURI >> 1); - context.getCounter("derivation", "range matches").increment(1); - } else { - objects = domainSchemaTriples.get(propURI >> 1); - context.getCounter("derivation", "domain matches").increment(1); - } - - if (objects != null) { - Iterator itr3 = objects.iterator(); - while (itr3.hasNext()) - // derivedProps.add(itr3.next()); - derivedProps.add(new AbstractMap.SimpleEntry( - itr3.next(), propURI)); // Modified by WuGang, - // 2010-08-26 + byte[] bKey = key.getBytes(); // Added by Wugang, 2010-08-26 +// long uri = key.get(); //��domain���ԣ���s����range���ԣ���o + long uri = NumberUtils.decodeLong(bKey, 0); //��domain������s����range������o + long uri_opposite = NumberUtils.decodeLong(bKey, 8); //��domain������o����range������s + + derivedProps.clear(); //���x + + //Get the predicates with a range or domain associated to this URIs + propURIs.clear(); + Iterator itr = values.iterator(); + while (itr.hasNext()) + propURIs.add(itr.next().get()); //���p + + Iterator itrProp = propURIs.iterator(); + while (itrProp.hasNext()) { + Collection objects = null; + long propURI = itrProp.next(); + if ((propURI & 0x1) == 1) { + objects = rangeSchemaTriples.get(propURI >> 1); + context.getCounter("derivation", "range matches").increment(1); + } else { + objects = domainSchemaTriples.get(propURI >> 1); + context.getCounter("derivation", "domain matches").increment(1); + } + + if (objects != null) { + Iterator itr3 = objects.iterator(); + while (itr3.hasNext()) +// derivedProps.add(itr3.next()); + derivedProps.add(new AbstractMap.SimpleEntry(itr3.next(), propURI)); // Modified by WuGang, 2010-08-26 + } } - } - -// logger.info("while2 " + (System.currentTimeMillis() - time)); - time = System.currentTimeMillis(); -// System.out.println("while2 " + (System.currentTimeMillis() - time)); - - // Derive the new statements - // Iterator itr2 = derivedProps.iterator(); - Iterator> itr2 = derivedProps.iterator(); // Modified - // by - // WuGang, - // 2010-08-26 - oTriple.setSubject(uri); - oTriple.setPredicate(TriplesUtils.RDF_TYPE); - oTriple.setObjectLiteral(false); - while (itr2.hasNext()) { - // oTriple.setObject(itr2.next()); - Entry entry = itr2.next(); - oTriple.setObject(entry.getKey()); // Modified by WuGang, 2010-08-26 - // Added by WuGang, 2010-08-26 - long propURI = entry.getValue(); - oTriple.setRpredicate(propURI >> 1); // Modified by WuGang - // 2010-12-03����RDFSSubPropDomRangeMapper�������ˣ����ڱ��������ƻ��� - if ((propURI & 0x1) == 1) { // Rule 3, for range - oTriple.setType(TriplesUtils.RDFS_3); - oTriple.setRsubject(uri_opposite); - oTriple.setRobject(uri); - } else { // Rule 2, for domain - oTriple.setType(TriplesUtils.RDFS_2); - oTriple.setRsubject(uri); - oTriple.setRobject(uri_opposite); + + //Derive the new statements +// Iterator itr2 = derivedProps.iterator(); + Iterator> itr2 = derivedProps.iterator(); // Modified by WuGang, 2010-08-26 + oTriple.setSubject(uri); + oTriple.setPredicate(TriplesUtils.RDF_TYPE); + oTriple.setObjectLiteral(false); + while (itr2.hasNext()) { +// oTriple.setObject(itr2.next()); + Entry entry = itr2.next(); + oTriple.setObject(entry.getKey()); // Modified by WuGang, 2010-08-26 + // Added by WuGang, 2010-08-26 + long propURI = entry.getValue(); + oTriple.setRpredicate(propURI >> 1); // Modified by WuGang 2010-12-03����RDFSSubPropDomRangeMapper�������ˣ����ڱ��������ƻ��� + if ((propURI & 0x1) == 1) { // Rule 3, for range + oTriple.setType(TriplesUtils.RDFS_3); + oTriple.setRsubject(uri_opposite); + oTriple.setRobject(uri); + }else{ // Rule 2, for domain + oTriple.setType(TriplesUtils.RDFS_2); + oTriple.setRsubject(uri); + oTriple.setRobject(uri_opposite); + } + + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + //context.write(source, oTriple); } - - CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step2"); - -// logger.info("write " + (System.currentTimeMillis() - time)); - time = System.currentTimeMillis(); -// System.out.println("finish " + (System.currentTimeMillis() - time)); - // CassandraDB.writealltripleToMapReduceContext(oTriple, source, - // context); - // context.write(source, oTriple); - - // _output.write(conf.get(CassandraDB.COLUMNFAMILY_ALLTRIPLES), - // ByteBufferUtil.bytes(key.toString()), - // Collections.singletonList(m)); - // Reporter reporter = null ; - // _output.getCollector(CassandraDB.COLUMNFAMILY_ALLTRIPLES, - // reporter).collect(key, arg1);; - } - - // logger.info(" " + (System.currentTimeMillis() - time)); - context.getCounter("RDFS derive triples", - "subprop range and domain rule").increment(derivedProps.size()); - // logger.info("finish " + (System.currentTimeMillis() - time)); - // Mutation m = new Mutation(); - + context.getCounter("RDFS derived triples", "subprop range and domain rule").increment(derivedProps.size()); } - - + @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - _output = new MrjMultioutput, List>(context); -// outputKey = ByteBufferUtil.bytes(context.getConfiguration().get(CassandraDB.COLUMNFAMILY_ALLTRIPLES)); + try{ CassandraDB db = new CassandraDB(); if (domainSchemaTriples == null) { @@ -191,21 +141,5 @@ public void setup(Context context) throws IOException { source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); - } - - - @Override - protected void cleanup( - Reducer, List>.Context context) - throws IOException, InterruptedException { - - _output.close(); - - super.cleanup(context); - } - - - - } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java index c4315f2..6f295a1 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java @@ -2,17 +2,13 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; -import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; -import org.apache.cassandra.thrift.Cassandra.AsyncProcessor.system_add_column_family; import org.apache.cassandra.thrift.InvalidRequestException; import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TimedOutException; @@ -20,18 +16,14 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.sun.corba.se.spi.ior.Writeable; - import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.io.dbs.MrjMultioutput; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; @@ -46,13 +38,6 @@ public class RDFSSubpropInheritReducer extends Reducer keys = new LinkedHashMap(); - private Map allkeys = new LinkedHashMap(); - private List allvariables = new ArrayList(); - private List allTValues = new ArrayList(); - private List stepsValues = new ArrayList(); private void recursiveScanSubproperties(long value, Set set) { Collection subprops = subpropSchemaTriples.get(value); @@ -71,8 +56,8 @@ private void recursiveScanSubproperties(long value, Set set) { @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { - byte[] bKey = key.getBytes(); + switch(bKey[0]) { case 2: case 3: // rdfs rule 7 @@ -82,17 +67,11 @@ public void reduce(BytesWritable key, Iterable values, Context con propURIs.clear(); //filter the properties that are already present Iterator itr = values.iterator(); - /* - * values在使用iterator之后会将值清空,使用list记录values - */ - List list1 = new ArrayList(); while (itr.hasNext()) { long value = itr.next().get(); - list1.add(value); if (!propURIs.contains(value)) { recursiveScanSubproperties(value, propURIs); } - } Iterator itr3 = propURIs.iterator(); @@ -111,10 +90,10 @@ public void reduce(BytesWritable key, Iterable values, Context con // Modified by WuGang, 2010-08-26 while (itr3.hasNext()) { oTriple.setPredicate(itr3.next()); - for (Long pre : list1) { - oTriple.setRpredicate(pre); - CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step1"); - // context.write(source, oTriple); + for (LongWritable pre : values) { + oTriple.setRpredicate(pre.get()); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); +// context.write(source, oTriple); } } @@ -126,10 +105,8 @@ public void reduce(BytesWritable key, Iterable values, Context con propURIs.clear(); //filter the properties that are already present Iterator itr2 = values.iterator(); - List list2 = new ArrayList(); while (itr2.hasNext()) { long value = itr2.next().get(); - list2.add(value); if (!propURIs.contains(value)) { recursiveScanSubproperties(value, propURIs); } @@ -148,15 +125,13 @@ public void reduce(BytesWritable key, Iterable values, Context con // context.write(source, oTriple); // } // Modified by WuGang, 2010-08-26 - while (itr4.hasNext()) { oTriple.setObject(itr4.next()); - for(Long obj:list2){ - oTriple.setRobject(obj); - CassandraDB.writeJustificationToMapReduceMultipleOutputsLessObjects(oTriple, source, _output, keys, allkeys, stepsValues, allTValues,"step1"); + for(LongWritable obj:values){ + oTriple.setRobject(obj.get()); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); // context.write(source, oTriple); } - } context.getCounter("RDFS derived triples", "subprop transitivity rule").increment(propURIs.size()); @@ -165,13 +140,11 @@ public void reduce(BytesWritable key, Iterable values, Context con default: break; } - } @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - _output = new MrjMultioutput, List>(context); if (subpropSchemaTriples == null) { CassandraDB db; @@ -181,6 +154,7 @@ public void setup(Context context) throws IOException { filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); subpropSchemaTriples = db.loadMapIntoMemory(filters); // subpropSchemaTriples = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_SUBPROP_SCHEMA", context); + db.CassandraDBClose(); } catch (TTransportException e) { e.printStackTrace(); @@ -203,23 +177,6 @@ public void setup(Context context) throws IOException { source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); oTriple2.setPredicate(TriplesUtils.RDF_TYPE); - oTriple2.setObjectLiteral(false); - - + oTriple2.setObjectLiteral(false); } - - @Override - protected void cleanup( - Reducer, List>.Context context) - throws IOException, InterruptedException { - /* - * 不写close就会写不进数据库。 - */ - _output.close(); - - super.cleanup(context); - } - - - } diff --git a/mrj-0.1/src/prejustification/SelectInferRows.java b/mrj-0.1/src/prejustification/SelectInferRows.java new file mode 100644 index 0000000..2ecbf7a --- /dev/null +++ b/mrj-0.1/src/prejustification/SelectInferRows.java @@ -0,0 +1,143 @@ +package prejustification; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.cassandra.hadoop.ColumnFamilyInputFormat; +import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlInputFormat; +import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; +import org.apache.cassandra.thrift.SlicePredicate; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.reasoner.MapReduceReasonerJobConfig; + + +public class SelectInferRows extends Configured implements Tool{ + //private static final Logger logger = LoggerFactory.getLogger(); + public static void main(String[] args) throws Exception{ + int exitCode = ToolRunner.run(new Configuration(), new SelectInferRows(), args); + System.exit(exitCode); + } + + public int run(String[] args) throws Exception{ + + //Job job = new Job(getConf()); +// Job job = MapReduceReasonerJobConfig.createNewJob(SelectInferRows.class, "Select Rows", new HashSet(), 16, 16, true, true); + +// ConfigHelper.setInputInitialAddress(getConf(), CassandraDB.DEFAULT_HOST); +// ConfigHelper.setInputColumnFamily(getConf(), CassandraDB.KEYSPACE, CassandraDB.COLUMN_JUSTIFICATION); + + // job.setJobName("Del Rows"); +// job.setJarByClass(SelectInferRows.class); + + /* + * Select(map) + */ + + + Configuration conf = new Configuration(); + + Job job = new Job(conf); + job.setJobName(" Test "); + job.setJarByClass(SelectInferRows.class); + job.setNumReduceTasks(8); + + ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); + ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); + ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + CqlConfigHelper.setInputCql(job.getConfiguration(), + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " WHERE TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") > ? AND TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") <= ? ALLOW FILTERING"); + CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); + //Modifide by LiYang + ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); + job.setInputFormatClass(CqlInputFormat.class); + job.setOutputKeyClass(Map.class); + job.setOutputValueClass(List.class); + job.setOutputFormatClass(CqlOutputFormat.class); + ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); + ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); + + ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; + CqlConfigHelper.setOutputCql(job.getConfiguration(), query); + + job.setMapperClass(SelectInferRowsMap.class); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(ByteBuffer.class); + job.setReducerClass(SelectInferRowsReduce.class); + + +// job.setInputFormatClass(ColumnFamilyInputFormat.class); +// ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); +// ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); +// ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); +// CqlConfigHelper.setInputCql(job.getConfiguration(), +// "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + +// " WHERE RULE = 0"); +// CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); +// ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); +// job.setInputFormatClass(CqlInputFormat.class); +// +// +// /* +// * Insert(reduce) +// */ +//// job.setCombinerClass(SelectInferRowsReduce.class); +// job.setOutputKeyClass(Map.class); +// job.setOutputValueClass(List.class); +// //�൱�� ָ�����Ŀ¼ Ҫд�� �������ʾ�Ҳ������Ŀ¼ +// job.setOutputFormatClass(CqlOutputFormat.class); +// +// ConfigHelper.setOutputInitialAddress(job.getConfiguration(), CassandraDB.DEFAULT_HOST); +// ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner"); +// ConfigHelper.setOutputColumnFamily(getConf(), CassandraDB.KEYSPACE, "ruleiszero"); +// ConfigHelper.setOutputKeyspace(job.getConfiguration(), CassandraDB.KEYSPACE);// ** +//// String query = "INSERT INTO mrjks.ruleiszero (" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_IS_LITERAL + ", " + +//// CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 +", " +//// + CassandraDB.COLUMN_INFERRED_STEPS + ") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, )"; +// String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + +// " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; +// CqlConfigHelper.setOutputCql(job.getConfiguration(), query); +// +// +// ConfigHelper.getInputSplitSize(job.getConfiguration()); +// CqlConfigHelper.getInputPageRowSize(job.getConfiguration()); +//// String column_names = CassandraDB.COLUMN_SUB + CassandraDB.COLUMN_PRE + CassandraDB.COLUMN_OBJ + CassandraDB.COLUMN_IS_LITERAL + +//// CassandraDB.COLUMN_TRIPLE_TYPE + CassandraDB.COLUMN_RULE + CassandraDB.COLUMN_V1 + CassandraDB.COLUMN_V2 + CassandraDB.COLUMN_V3; +//// SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBufferUtil.bytes(column_names))); +// +// //��������䣬����������map �� reduce + job.waitForCompletion(true); + + System.out.println("Finished"); + return 0; + + } + +} diff --git a/mrj-0.1/src/prejustification/SelectInferRowsMap.java b/mrj-0.1/src/prejustification/SelectInferRowsMap.java new file mode 100644 index 0000000..201eea0 --- /dev/null +++ b/mrj-0.1/src/prejustification/SelectInferRowsMap.java @@ -0,0 +1,79 @@ +package prejustification; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Map; + +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.hadoop.mapreduce.Mapper; + + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Metadata; +import com.datastax.driver.core.ResultSet; + +import com.datastax.driver.core.Row; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.SimpleStatement; + + + +public class SelectInferRowsMap extends Mapper, ByteBuffer> { + private Cluster cluster; + private Session session; + + + public void map(ByteBuffer key, Row row, Context context) throws IOException, InterruptedException{ + SimpleStatement statement = new SimpleStatement("SELECT * FROM mrjks.justifications"); + statement.setFetchSize(100); + ResultSet results = session.execute(statement); + + System.out.println("---------MAP----------"); + Map keys = new HashMap<>(); + ByteBuffer inferredsteps; + for (Row rows : results){ + if (rows.getInt(CassandraDB.COLUMN_RULE) != 0) { + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_SUB))); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_PRE))); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_SUB))); + keys.put(CassandraDB.COLUMN_IS_LITERAL, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_IS_LITERAL))); + keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_TRIPLE_TYPE))); + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_RULE))); + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_V1))); + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_V2))); + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_V3))); + inferredsteps = ByteBufferUtil.bytes(rows.getInt(CassandraDB.COLUMN_INFERRED_STEPS)); + context.write(keys, inferredsteps); + } + } + } + + public void setup(Context context) throws IOException, InterruptedException{ + + cluster = Cluster.builder().addContactPoint(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host).build(); + Metadata metadata = cluster.getMetadata(); + System.out.printf("-------Connected to cluster: %s\n", metadata.getClusterName()); + session = cluster.connect(); + + String cquery1 = "CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + "." + "ruleiszero" + + " ( " + + CassandraDB.COLUMN_SUB + " bigint, " + // partition key + CassandraDB.COLUMN_PRE + " bigint, " + // partition key + CassandraDB.COLUMN_OBJ + " bigint, " + // partition key + CassandraDB.COLUMN_IS_LITERAL + " boolean, " + // partition key + CassandraDB.COLUMN_TRIPLE_TYPE + " int, " + + CassandraDB.COLUMN_RULE + " int, " + + CassandraDB.COLUMN_V1 + " bigint, " + + CassandraDB.COLUMN_V2 + " bigint, " + + CassandraDB. COLUMN_V3 + " bigint, " + + CassandraDB.COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key + " PRIMARY KEY ((" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_IS_LITERAL + "), " + + CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 + + " ) ) "; + session.execute(cquery1); + } + + +} diff --git a/mrj-0.1/src/prejustification/SelectInferRowsReduce.java b/mrj-0.1/src/prejustification/SelectInferRowsReduce.java new file mode 100644 index 0000000..adcfa19 --- /dev/null +++ b/mrj-0.1/src/prejustification/SelectInferRowsReduce.java @@ -0,0 +1,18 @@ +package prejustification; + +import org.apache.hadoop.mapreduce.Reducer; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Map; +public class SelectInferRowsReduce extends Reducer, ByteBuffer, Map, ByteBuffer> { + public void reduce(Map key, Iterable value, Context context) throws IOException, InterruptedException{ + + for (ByteBuffer inferredsteps : value) { + System.out.println(key); + context.write(key, inferredsteps); + } + + } + +} From 2efb201ac6f25898fe639fde8f4796c319ada879 Mon Sep 17 00:00:00 2001 From: Joe Date: Thu, 14 Jul 2016 09:35:30 +0800 Subject: [PATCH 16/16] =?UTF-8?q?=E5=AE=8C=E6=88=90=20=E6=8E=A8=E7=90=86?= =?UTF-8?q?=E5=92=8C=E9=80=89=E6=8B=A9=20Signed-off-by:=20Joe=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mrj-0.1/.classpath | 5 +- .../org.eclipse.core.resources.prefs | 1 + .../edu/neu/mitt/mrj/data/TripleSource.java | 11 + .../mrj/importtriples/FilesImportTriples.java | 34 +- .../ImportTriplesDeconstructReducer.java | 2 +- ...tTriplesReconstructReducerToCassandra.java | 40 +- .../ImportTriplesSampleMapper.java | 2 +- .../ImportTriplesSampleReducer.java | 2 +- .../edu/neu/mitt/mrj/io/dbs/CassandraDB.java | 713 ++++++++++++++++-- .../neu/mitt/mrj/io/dbs/MrjMultioutput.java | 49 +- .../justification/OWLHorstJustification.java | 92 ++- .../OWLHorstJustificationReducer.java | 56 +- .../neu/mitt/mrj/reasoner/Experiments.java | 90 +++ .../reasoner/MapReduceReasonerJobConfig.java | 213 ++++-- .../mrj/reasoner/ReasonedJustifications.java | 104 +-- .../ReasonedJustificationsMapper.java | 62 +- .../ReasonedJustificationsReducer.java | 10 +- .../reasoner/owl/OWLAllSomeValuesMapper.java | 40 +- .../reasoner/owl/OWLAllSomeValuesReducer.java | 26 +- .../owl/OWLEquivalenceSCSPMapper.java | 2 +- .../owl/OWLEquivalenceSCSPReducer.java | 25 +- .../mrj/reasoner/owl/OWLHasValueMapper.java | 6 +- .../mrj/reasoner/owl/OWLHasValueReducer.java | 18 +- .../reasoner/owl/OWLNotRecursiveMapper.java | 2 +- .../reasoner/owl/OWLNotRecursiveReducer.java | 32 +- .../mitt/mrj/reasoner/owl/OWLReasoner.java | 205 ++++- .../owl/OWLSameAsDeconstructMapper.java | 8 +- .../owl/OWLSameAsDeconstructReducer.java | 23 +- .../mrj/reasoner/owl/OWLSameAsMapper.java | 11 +- .../owl/OWLSameAsReconstructMapper.java | 18 +- .../owl/OWLSameAsReconstructReducer.java | 42 +- .../mrj/reasoner/owl/OWLSameAsReducer.java | 15 +- .../owl/OWLSampleResourcesMapper.java | 1 + .../owl/OWLSampleResourcesReducer.java | 4 + .../reasoner/owl/OWLTransitivityMapper.java | 15 +- .../reasoner/owl/OWLTransitivityReducer.java | 23 +- .../mitt/mrj/reasoner/rdfs/RDFSReasoner.java | 47 +- .../reasoner/rdfs/RDFSSpecialPropsMapper.java | 14 +- .../rdfs/RDFSSpecialPropsReducer.java | 38 +- .../rdfs/RDFSSubPropDomRangeMapper.java | 37 +- .../rdfs/RDFSSubPropInheritMapper.java | 13 +- .../mrj/reasoner/rdfs/RDFSSubclasMapper.java | 17 +- .../mrj/reasoner/rdfs/RDFSSubclasReducer.java | 142 ++-- .../rdfs/RDFSSubpropDomRangeReducer.java | 181 +++-- .../rdfs/RDFSSubpropInheritReducer.java | 57 +- 45 files changed, 1913 insertions(+), 635 deletions(-) create mode 100644 mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/Experiments.java diff --git a/mrj-0.1/.classpath b/mrj-0.1/.classpath index e1c1f9a..b1b26f7 100644 --- a/mrj-0.1/.classpath +++ b/mrj-0.1/.classpath @@ -5,7 +5,8 @@ - - + + + diff --git a/mrj-0.1/.settings/org.eclipse.core.resources.prefs b/mrj-0.1/.settings/org.eclipse.core.resources.prefs index 2dcd9cf..83eb0de 100644 --- a/mrj-0.1/.settings/org.eclipse.core.resources.prefs +++ b/mrj-0.1/.settings/org.eclipse.core.resources.prefs @@ -1,2 +1,3 @@ eclipse.preferences.version=1 +encoding//src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java=UTF-8 encoding//src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java=UTF-8 diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java index 8d5c320..afbc721 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java @@ -16,17 +16,20 @@ public class TripleSource implements WritableComparable { byte derivation = 0; int step = 0; + int transitive_level = 0; @Override public void readFields(DataInput in) throws IOException { derivation = in.readByte(); step = in.readInt(); + transitive_level = in.readInt(); } @Override public void write(DataOutput out) throws IOException { out.write(derivation); out.writeInt(step); + out.writeInt(transitive_level); } @Override @@ -47,6 +50,14 @@ public void setStep(int step) { this.step = step; } + public int getTransitiveLevel() { + return transitive_level; + } + + public void setTransitiveLevel(int level) { + this.transitive_level = level; + } + public void setDerivation(byte ruleset) { derivation = ruleset; } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java index 7140fbc..db2cad6 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java @@ -101,7 +101,7 @@ public void parseArgs(String[] args) { } public void sampleCommonResources(String[] args) throws Exception { -// System.out.println("��sampleCommonResources�����С�"); +// System.out.println("��sampleCommonResources�����С�"); Job job = createNewJob("Sample common resources"); //Input @@ -127,7 +127,7 @@ public void sampleCommonResources(String[] args) throws Exception { } public void assignIdsToNodes(String[] args) throws Exception { -// System.out.println("��assignIdsToNodes�����С�"); +// System.out.println("��assignIdsToNodes�����С�"); Job job = createNewJob("Deconstruct statements"); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); @@ -156,7 +156,7 @@ public void assignIdsToNodes(String[] args) throws Exception { } private void rewriteTriples(String[] args) throws Exception { -// System.out.println("��rewriteTriples�����С�"); +// System.out.println("��rewriteTriples�����С�"); Job job = createNewJob("Reconstruct statements"); @@ -188,9 +188,11 @@ private void rewriteTriples(String[] args) throws Exception { // is it useful below line? //job.getConfiguration().set(CASSANDRA_PRIMARY_KEY, "(sub, pre, obj)"); + /* + * 这个地方设置成了0, map那个地方add的时候就应该少加一列元素。 + */ String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + - " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; - + " SET " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "= null" + ","+ CassandraDB.COLUMN_INFERRED_STEPS + "=0"; CqlConfigHelper.setOutputCql(job.getConfiguration(), query); ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); @@ -223,13 +225,21 @@ public static void main(String[] args) throws Exception { long time = System.currentTimeMillis(); int res = ToolRunner.run(new Configuration(), new FilesImportTriples(), args); // log.info("Import time: " + (System.currentTimeMillis() - time)); - - //Modified by LiYang 2015/4/10 - CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); - db.init(); - db.createIndexOnTripleType(); - db.createIndexOnRule(); - db.CassandraDBClose(); +// +// //Modified by LiYang 2015/4/10 +// CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); +// db.init(); +// // Modified +// db.createIndexOnTripleType(); +// //db.createIndexOnRule(); +// +// /* +// * Add by LiYang +// * 2015.7.19 +// */ +// //db.createIndexOnInferredSteps(); +// //db.createIndexOnTransitiveLevel(); +// db.CassandraDBClose(); System.out.println("Import time: " + (System.currentTimeMillis() - time)); System.exit(res); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java index 8dd5dea..6aca38d 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java @@ -68,7 +68,7 @@ public void reduce(Text key, Iterable values, Context context)thr protected void setup(Context context) throws IOException, InterruptedException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. try { - db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); + db = new CassandraDB(); db.init(); } catch (InvalidRequestException e) { e.printStackTrace(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java index 4b7acc3..955693f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java @@ -2,7 +2,7 @@ * Project Name: mrj-0.1 * File Name: ImportTriplesReconstructReducerToCassandra.java * @author Gang Wu - * 2014��10��28�� ����10:35:24 + * 2014锟斤拷10锟斤拷28锟斤拷 锟斤拷锟斤拷10:35:24 * * Description: * Send reducer output to Cassandra DB by representing triples with ids @@ -16,8 +16,11 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.UUID; +import org.apache.cassandra.cli.CliParser.rowKey_return; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.UUIDGen; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; import org.slf4j.Logger; @@ -28,6 +31,7 @@ import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; + /** * @author gibeo_000 * @@ -78,7 +82,7 @@ protected void reduce(LongWritable key, Iterable values, Context c } if (counter != 3) { - // Modified by WuGang 2010-12-3, ��������3Ԫ����֣�����Ҫ������ + // Modified by WuGang 2010-12-3, 锟斤拷锟�?锟斤拷3元锟斤拷锟斤拷郑锟斤拷锟斤拷锟揭拷锟斤拷锟斤拷锟� log.error("Found a non-triple when reconstructing. The count num is " + counter + ", and triple is " + oValue); // throw new IOException("Triple is not reconstructed!"); } @@ -89,6 +93,35 @@ protected void reduce(LongWritable key, Iterable values, Context c byte one = 1; byte zero = 0; +// /* +// keys.put("sub", ByteBufferUtil.bytes(oValue.getSubject())); +// keys.put("pre", ByteBufferUtil.bytes(oValue.getPredicate())); +// keys.put("obj", ByteBufferUtil.bytes(oValue.getObject())); +// // the length of boolean type in cassandra is one byte!!!!!!!! +// keys.put(CassandraDB.COLUMN_IS_LITERAL, oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); +// keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); +//// keys.put("id", ByteBufferUtil.bytes(UUIDGen.getTimeUUID())); +// */ +// +// keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(oValue.getSubject())); +// keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(oValue.getPredicate())); +// keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(oValue.getObject())); +// keys.put(CassandraDB.COLUMN_IS_LITERAL, oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); +// keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); +// +// +// // Prepare variables, here is a boolean value for CassandraDB.COLUMN_IS_LITERAL +// List variables = new ArrayList(); +//// variables.add(ByteBufferUtil.bytes(oValue.getSubject())); +// // the length of boolean type in cassandra is one byte!!!!!!!! +// // For column inferred, init it as false i.e. zero +//// variables.add(ByteBuffer.wrap(new byte[]{zero})); +// variables.add(oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); +// variables.add(ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); +// +// context.write(keys, variables); + + // Prepare composite key (sub, pre, obj) keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(oValue.getSubject())); keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(oValue.getPredicate())); @@ -107,7 +140,8 @@ protected void reduce(LongWritable key, Iterable values, Context c // the length of boolean type in cassandra is one byte!!!!!!!! // For column inferred, init it as false i.e. zero // variables.add(ByteBuffer.wrap(new byte[]{zero})); - variables.add(ByteBufferUtil.bytes(0)); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple +// variables.add(ByteBufferUtil.bytes(0)); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple +// variables.add(ByteBufferUtil.bytes(0)); // Added by WuGang, 2015-07-15, to support transitive level context.write(keys, variables); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java index c1153f9..8614816 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java @@ -19,7 +19,7 @@ public class ImportTriplesSampleMapper extends Mapper preloadedURIs = TriplesUtils.getInstance().getPreloadedURIs(); protected void map(Text key, Text value, Context context) { - System.out.println("��ImportTriplesSampleMapper��"); + //System.out.println("��ImportTriplesSampleMapper��"); try { String[] uris = TriplesUtils.parseTriple(value.toString(), key.toString()); for(String uri : uris) { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java index 2738f80..bb81c8a 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java @@ -55,7 +55,7 @@ public void reduce(Text key, Iterable values, Context context) th protected void setup(Context context) throws IOException, InterruptedException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. try { - db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); + db = new CassandraDB(); db.init(); } catch (InvalidRequestException e) { e.printStackTrace(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index dbfceca..c017711 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -10,6 +10,7 @@ package cn.edu.neu.mitt.mrj.io.dbs; +import java.awt.print.Printable; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; @@ -23,6 +24,8 @@ import java.util.Map; import java.util.Set; +import org.apache.cassandra.cli.CliParser.rowKey_return; +import org.apache.cassandra.cql3.statements.MultiColumnRestriction.EQ; import org.apache.cassandra.exceptions.RequestExecutionException; import org.apache.cassandra.thrift.Cassandra; import org.apache.cassandra.thrift.Column; @@ -39,6 +42,7 @@ import org.apache.cassandra.thrift.UnavailableException; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.hadoop.mapreduce.Reducer.Context; +//import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.protocol.TProtocol; @@ -46,14 +50,18 @@ import org.apache.thrift.transport.TSocket; import org.apache.thrift.transport.TTransport; import org.apache.thrift.transport.TTransportException; +import org.hsqldb.ResultBase.ResultIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; +import cn.edu.neu.mitt.mrj.justification.OWLHorstJustification; +import cn.edu.neu.mitt.mrj.reasoner.Experiments; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Cluster.Builder; import com.datastax.driver.core.ResultSet; import com.datastax.driver.core.Row; import com.datastax.driver.core.Session; @@ -62,12 +70,8 @@ import com.datastax.driver.core.Statement; //modified cassandra java 2.0.5 import com.datastax.driver.core.TupleValue; -import com.datastax.driver.core.Cluster.Builder; -import com.datastax.driver.core.querybuilder.Delete.Where; -import com.datastax.driver.core.querybuilder.Insert; import com.datastax.driver.core.querybuilder.QueryBuilder; //modified -import com.datastax.driver.core.querybuilder.Select; /** @@ -79,11 +83,12 @@ public class CassandraDB { public static final String KEYSPACE = "mrjks"; // mr.j keyspace public static final String COLUMNFAMILY_JUSTIFICATIONS = "justifications"; // mr.j keyspace public static final String COLUMNFAMILY_RESOURCES = "resources"; // mr.j keyspace - public static final String COLUMNFAMILY_RESULTS = "results"; // mr.j keyspace + public static final String COLUMNFAMILY_RESULTS = "results"; // mr.j keyspace +// public static final String COLUMNFAMILY_ALLTRIPLES = "alltriples"; public static final String COLUMN_SUB = "sub"; // mrjks.justifications.sub public static final String COLUMN_PRE = "pre"; // mrjks.justifications.pre public static final String COLUMN_OBJ = "obj"; // mrjks.justifications.obj - public static final String COLUMN_TRIPLE_TYPE = "tripletype" ; // mrjks.justifications.tripletype + public static final String COLUMN_TRIPLE_TYPE = "tripletype" ; // mrjks.justifications.tripletype public static final String COLUMN_IS_LITERAL = "isliteral" ; // mrjks.justifications.isliteral public static final String COLUMN_INFERRED_STEPS = "inferredsteps" ; // mrjks.justifications.inferredsteps public static final String COLUMN_RULE = "rule"; // mrjks.justifications.rule @@ -93,14 +98,36 @@ public class CassandraDB { public static final String COLUMN_ID = "id"; // mrjks.resources.id public static final String COLUMN_LABEL = "label"; // mrjks.resources.label public static final String COLUMN_JUSTIFICATION = "justification"; //mrjks.results.justification - public static final String COLUMN_STEP = "step"; // mrjks.results.step + public static final String COLUMN_TRANSITIVE_LEVELS = "transitivelevel"; // mrjks.results.step public static final String DEFAULT_HOST = cn.edu.neu.mitt.mrj.utils.Cassandraconf.host; public static final String DEFAULT_PORT = "9160"; // in version 2.1.2, cql3 port is 9042 public static final String CQL_PAGE_ROW_SIZE = "10000"; //3 modified by liyang - + // Added by WuGang 20160203 + public static Set domainSchemaTriples = null; + public static Set rangeSchemaTriples = null; + public static Set memberProperties = null; + public static Set resourceSubclasses = null; + public static Set literalSubclasses = null; + public static Set schemaFunctionalProperties = null; + public static Set schemaInverseFunctionalProperties = null; + public static Set schemaSymmetricProperties = null; + public static Set schemaInverseOfProperties = null; + public static Set schemaTransitiveProperties = null; + public static Set subclassSchemaTriples = null; + public static Set subpropSchemaTriples = null; + public static Set hasValue = null; + public static Set hasValueInverted = null; + public static Set onProperty = null; + public static Set onPropertyInverted = null; + + public static Map> subclassSchemaTriplesMap = null; + public static Map> domainSchemaTriplesMap = null; + public static Map> rangeSchemaTriplesMap = null; + public static Map> subpropSchemaTriplesMap = null; + // 2014-12-11, Very strange, this works around. public static final String CONFIG_LOCATION = cn.edu.neu.mitt.mrj.utils.Cassandraconf.CassandraConfFile; public static void setConfigLocation(){ @@ -116,6 +143,7 @@ private static Cassandra.Iface createConnection() throws TTransportException{ if (System.getProperty("cassandra.host") == null || System.getProperty("cassandra.port") == null){ logger.warn("cassandra.host or cassandra.port is not defined, using default"); } + System.out.println("Port : " + System.getProperty("cassandra.port", DEFAULT_PORT)); return createConnection(System.getProperty("cassandra.host", DEFAULT_HOST), Integer.valueOf(System.getProperty("cassandra.port", DEFAULT_PORT))); } @@ -124,19 +152,19 @@ private static Cassandra.Iface createConnection() throws TTransportException{ private static TSocket socket = null; private static TTransport trans = null; - private static Cassandra.Client client1 = null; + private static Cassandra.Client c1 = null; private static Cassandra.Client createConnection(String host, Integer port) throws TTransportException { - socket = new TSocket(host, port); - trans = new TFramedTransport(socket); + if (c1 != null) { + return c1; + } + socket = new TSocket(host, port); + trans = new TFramedTransport(socket); trans.open(); TProtocol protocol = new TBinaryProtocol(trans); - - if (client1 != null){ - return client1; - } - client1 = new Cassandra.Client(protocol); + + c1 = new Cassandra.Client(protocol); //Modified 2015/5/25 - return client1; + return c1; } private static void close(){ @@ -161,12 +189,12 @@ private static void setupKeyspace(Cassandra.Iface client) } catch(NotFoundException e){ logger.info("set up keyspace " + KEYSPACE); String query = "CREATE KEYSPACE " + KEYSPACE + - " WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 1}"; + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 2}"; - client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ANY); String verifyQuery = "select count(*) from system.peers"; - CqlResult result = client.execute_cql3_query(ByteBufferUtil.bytes(verifyQuery), Compression.NONE, ConsistencyLevel.ONE); + CqlResult result = client.execute_cql3_query(ByteBufferUtil.bytes(verifyQuery), Compression.NONE, ConsistencyLevel.ANY); long magnitude = ByteBufferUtil.toLong(result.rows.get(0).columns.get(0).value); try { @@ -177,12 +205,110 @@ private static void setupKeyspace(Cassandra.Iface client) } } + public static String getJustificationsSchema(){ + String schemaString = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + " ( " + + COLUMN_SUB + " bigint, " + // partition key + COLUMN_PRE + " bigint, " + // partition key + COLUMN_OBJ + " bigint, " + // partition key + COLUMN_IS_LITERAL + " boolean, " + // partition key + COLUMN_TRIPLE_TYPE + " int, " + + COLUMN_RULE + " int, " + + COLUMN_V1 + " bigint, " + + COLUMN_V2 + " bigint, " + + COLUMN_V3 + " bigint, " + + COLUMN_INFERRED_STEPS + " int, " + + COLUMN_TRANSITIVE_LEVELS + " int, " + + " PRIMARY KEY ((" + COLUMN_IS_LITERAL + ", " + COLUMN_RULE + ", " + COLUMN_SUB + "), " + COLUMN_TRIPLE_TYPE + + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + //", " + COLUMN_TRIPLE_TYPE + + " ) ) "; + return schemaString; + } + + /* + * ?? + */ + public static String getJustificationseStatement(){ + return ("INSERT INTO " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " (isliteral, rule, sub, tripletype, pre, obj, v1, v2, v3, inferredsteps, transitivelevel) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )"); + } + + +// public static String getAlltripleSchema(){ +// String ALLTRIPLE_SCHEMA = "CREATE TABLE " + KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + +// " ( " + +// COLUMN_SUB + " bigint, " + // partition key +// COLUMN_PRE + " bigint, " + // partition key +// COLUMN_OBJ + " bigint, " + // partition key +// COLUMN_IS_LITERAL + " boolean, " + // partition key +// COLUMN_TRIPLE_TYPE + " int, " + +// COLUMN_INFERRED_STEPS + " int, " + +// "PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + "," + COLUMN_OBJ + +// ")) ) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; +// return ALLTRIPLE_SCHEMA; +// } + + /* + public static String getStepsSchema(Integer step){ + String STEPS_SCHEMA = "CREATE TABLE " + CassandraDB.KEYSPACE + ".step" + step + + " ( " + + COLUMN_SUB + " bigint, " + + COLUMN_PRE + " bigint, " + + COLUMN_OBJ + " bigint, " + + COLUMN_RULE + " int, " + + COLUMN_V1 + " bigint, " + + COLUMN_V2 + " bigint, " + + COLUMN_V3 + " bigint, " + + COLUMN_TRANSITIVE_LEVELS + " int, " + + "PRIMARY KEY((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_RULE + + "), " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + ")) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; + return STEPS_SCHEMA; + } + + public static String getStepsSchema(String cfName){ + String STEPS_SCHEMA = "CREATE TABLE " + CassandraDB.KEYSPACE + "." + cfName + + " ( " + + COLUMN_SUB + " bigint, " + + COLUMN_PRE + " bigint, " + + COLUMN_OBJ + " bigint, " + + COLUMN_RULE + " int, " + + COLUMN_V1 + " bigint, " + + COLUMN_V2 + " bigint, " + + COLUMN_V3 + " bigint, " + + COLUMN_TRANSITIVE_LEVELS + " int, " + + "PRIMARY KEY((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_RULE + + "), " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + ")) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; + return STEPS_SCHEMA; + } + + public static String getStepsStatement(int step){ + String query = "INSERT INTO " + CassandraDB.KEYSPACE + ".step" + step + + " (sub, pre, obj, rule, v1, v2, v3, transitivelevel) VALUES(?, ?, ?, ?, ?, ?, ?, ?)"; + return query; + } + + public static String getStepsStatement(String cfName){ + String query = "INSERT INTO " + CassandraDB.KEYSPACE + "." + cfName + + " (sub, pre, obj, rule, v1, v2, v3, transitivelevel) VALUES(?, ?, ?, ?, ?, ?, ?, ?)"; + return query; + } + + public static String getAlltripleStatement(){ + return ("INSERT INTO " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + + " (sub, pre, obj, isliteral, tripletype, inferredsteps) VALUES(?, ?, ?, ?, ?, ?)"); + } + */ + private static void setupTables(Cassandra.Iface client) throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { + // Create justifications table String query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + @@ -197,9 +323,10 @@ private static void setupTables(Cassandra.Iface client) COLUMN_V2 + " bigint, " + COLUMN_V3 + " bigint, " + // COLUMN_TRIPLE_TYPE + " int, " + - COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key - " PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_IS_LITERAL + "), " + - COLUMN_TRIPLE_TYPE + ", " + COLUMN_RULE + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + COLUMN_INFERRED_STEPS + " int, " + // from this line, the fields are non-primary key + COLUMN_TRANSITIVE_LEVELS + " int, " + + " PRIMARY KEY ((" + COLUMN_IS_LITERAL + ", " + COLUMN_RULE + ", " + COLUMN_SUB + "), " + COLUMN_TRIPLE_TYPE + + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + //", " + COLUMN_TRIPLE_TYPE + " ) ) "; @@ -210,6 +337,7 @@ private static void setupTables(Cassandra.Iface client) logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS, e); } + // Create resources table query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_RESOURCES + " ( " + @@ -225,12 +353,13 @@ private static void setupTables(Cassandra.Iface client) logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_RESOURCES, e); } + // Create results table query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_RESULTS + " ( " + - COLUMN_ID + " uuid, " + + "id" + " int, " + COLUMN_JUSTIFICATION + " set>>, " + - " PRIMARY KEY (" + COLUMN_ID + ") ) "; + " PRIMARY KEY (" + "id" + ") ) "; try { logger.info("set up table " + COLUMNFAMILY_RESULTS); @@ -239,27 +368,67 @@ private static void setupTables(Cassandra.Iface client) catch (InvalidRequestException e) { logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_RESULTS, e); } + + //Create resultrow table - String cquery = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + "." + "resultrows" + + String cquery = "CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + "." + "resultrows" + + " ( " + + CassandraDB.COLUMN_IS_LITERAL + " boolean, " + // partition key + CassandraDB.COLUMN_RULE + " int, " + + CassandraDB.COLUMN_SUB + " bigint, " + // partition key + CassandraDB.COLUMN_TRIPLE_TYPE + " int, " + + CassandraDB.COLUMN_PRE + " bigint, " + // partition key + CassandraDB.COLUMN_OBJ + " bigint, " + // partition key + CassandraDB.COLUMN_V1 + " bigint, " + + CassandraDB.COLUMN_V2 + " bigint, " + + CassandraDB.COLUMN_V3 + " bigint, " + +// COLUMN_TRIPLE_TYPE + " int, " + + CassandraDB.COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key + CassandraDB.COLUMN_TRANSITIVE_LEVELS + " int, " + + " PRIMARY KEY ((" + CassandraDB.COLUMN_IS_LITERAL + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_SUB + "), " + + CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 + + //", " + COLUMN_TRIPLE_TYPE + + " ) ) "; + client.execute_cql3_query(ByteBufferUtil.bytes(cquery), Compression.NONE, ConsistencyLevel.ONE); + + /* + * 建立索引可能失败 + */ + +// String indexQuery = "CREATE INDEX on resultrows (sub) ;"; +// CqlPreparedResult indexPreparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(indexQuery), Compression.NONE); +// indexQuery = "CREATE INDEX on resultrows (obj) ;"; +// indexPreparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(indexQuery), Compression.NONE); +// indexQuery = "CREATE INDEX on resultrows (pre) ;"; +// indexPreparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(indexQuery), Compression.NONE); +// indexQuery = "CREATE INDEX on resultrows (isliteral) ;"; +// indexPreparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(indexQuery), Compression.NONE); + + + /* + //创建所有三元组的表 + cquery = "CREATE TABLE " + KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + " ( " + COLUMN_SUB + " bigint, " + // partition key COLUMN_PRE + " bigint, " + // partition key COLUMN_OBJ + " bigint, " + // partition key COLUMN_IS_LITERAL + " boolean, " + // partition key COLUMN_TRIPLE_TYPE + " int, " + - COLUMN_RULE + " int, " + - COLUMN_V1 + " bigint, " + - COLUMN_V2 + " bigint, " + - COLUMN_V3 + " bigint, " + -// COLUMN_TRIPLE_TYPE + " int, " + - COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key - " PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_IS_LITERAL + "), " + - COLUMN_TRIPLE_TYPE + ", " + COLUMN_RULE + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + - //", " + COLUMN_TRIPLE_TYPE + - " ) ) "; - client.execute_cql3_query(ByteBufferUtil.bytes(cquery), Compression.NONE, ConsistencyLevel.ONE); - + COLUMN_INFERRED_STEPS + " int, " + + "PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + "," + COLUMN_OBJ + + ")) ) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; + try { + logger.info("set up table " + "all triples"); + client.execute_cql3_query(ByteBufferUtil.bytes(cquery), Compression.NONE, ConsistencyLevel.ONE); + } catch (InvalidRequestException e) { + logger.error("failed to create table " + KEYSPACE + "." + "AllTriples", e); + } + + + query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + "(" + COLUMN_TRIPLE_TYPE + ")"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + */ } @@ -269,10 +438,6 @@ public CassandraDB() throws TTransportException { } - public CassandraDB(String host, Integer port) throws TTransportException { - client = createConnection(host, port); - } - public void CassandraDBClose(){ this.close(); } @@ -281,12 +446,51 @@ public void init() throws InvalidRequestException, UnavailableException, TimedOu setupKeyspace(client); client.set_keyspace(KEYSPACE); setupTables(client); + + createIndexOnTripleType(); + createIndexOnresultrows(); + } public Cassandra.Iface getDBClient(){ return client; } + + /** + * Get the row count according to the COLUMN_INFERRED_STEPS. + * @return row count. + */ + + /* + * Need to change + */ + + public long getRowCountAccordingInferredSteps(int level){ + //ALLOW FILTERING + String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + " WHERE " + COLUMN_INFERRED_STEPS + " = " + level + " ALLOW FILTERING"; + + long num = 0; + try { + CqlResult cqlresult = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + num = ByteBufferUtil.toLong(cqlresult.getRows().get(0).getColumns().get(0).value); + } catch (InvalidRequestException e) { + e.printStackTrace(); + } catch (UnavailableException e) { + e.printStackTrace(); + } catch (TimedOutException e) { + e.printStackTrace(); + } catch (SchemaDisagreementException e) { + e.printStackTrace(); + } catch (TException e) { + e.printStackTrace(); + } + + return num; + } + + //TriplesUtils.SYNONYMS_TABLE //TriplesUtils.TRANSITIVE_TRIPLE //TriplesUtils.DATA_TRIPLE_SAME_AS @@ -296,9 +500,10 @@ public Cassandra.Iface getDBClient(){ */ public long getRowCountAccordingTripleType(int tripletype){ //ALLOW FILTERING + String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " ALLOW FILTERING"; - +// System.out.println(query); long num = 0; try { CqlResult cqlresult = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); @@ -318,6 +523,40 @@ public long getRowCountAccordingTripleType(int tripletype){ return num; } + + /** + * Get the row count according to the triple type. + * @return row count. + */ + public long getRowCountAccordingTripleTypeWithLimitation(int tripletype, int limit){ + //ALLOW FILTERING + String query = ""; + if (limit <= 0) + query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " ALLOW FILTERING"; + else + query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " LIMIT " + limit + " ALLOW FILTERING "; + + long num = 0; + try { + CqlResult cqlresult = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + num = ByteBufferUtil.toLong(cqlresult.getRows().get(0).getColumns().get(0).value); + } catch (InvalidRequestException e) { + e.printStackTrace(); + } catch (UnavailableException e) { + e.printStackTrace(); + } catch (TimedOutException e) { + e.printStackTrace(); + } catch (SchemaDisagreementException e) { + e.printStackTrace(); + } catch (TException e) { + e.printStackTrace(); + } + + return num; + } + /** * Get the row count according to the type of rule. @@ -359,7 +598,7 @@ public void insertResources(long id, String label) throws InvalidRequestExceptio args.add(ByteBufferUtil.bytes(label)); CqlPreparedResult p_result = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); CqlResult result = client.execute_prepared_cql3_query(p_result.itemId, args, ConsistencyLevel.ANY); - logger.info("Number of results: " + result.getNum()); + //logger.info("Number of results: " + result.getNum()); } // TODO it's wrong!!!!!!!!!! @@ -381,10 +620,10 @@ public static Triple readJustificationFromMapReduceRow(Row row){ long pre = row.getLong(CassandraDB.COLUMN_PRE); long obj = row.getLong(CassandraDB.COLUMN_OBJ); boolean isObjectLiteral = row.getBool(CassandraDB.COLUMN_IS_LITERAL); - long v1 = row.getLong(CassandraDB.COLUMN_V1); - long v2 = row.getLong(CassandraDB.COLUMN_V2); - long v3 = row.getLong(CassandraDB.COLUMN_V3); - int rule = row.getInt(CassandraDB.COLUMN_RULE); + long v1 = -1; + long v2 = -2; + long v3 = -3; + int rule = -4; result.setObject(obj); result.setObjectLiteral(isObjectLiteral); @@ -402,7 +641,193 @@ public static int readStepFromMapReduceRow(Row row){ return step; } + /* + public static void writeJustificationToMapReduceMultipleOutputsLessObjects( + Triple triple, + TripleSource source, + MultipleOutputs output, + Map keys, + Map allkeys, + List stepsValues, + List allTValues, + String stepname) throws IOException, InterruptedException{ + + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long + + allkeys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + allkeys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + allkeys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + + allTValues.add(ByteBufferUtil.bytes(triple.getSubject())); + allTValues.add(ByteBufferUtil.bytes(triple.getPredicate())); + allTValues.add(ByteBufferUtil.bytes(triple.getObject())); + //用数字直接替代。 + allTValues.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{1}):ByteBuffer.wrap(new byte[]{0})); + allTValues.add(ByteBufferUtil.bytes( + TriplesUtils.getTripleType( + source, triple.getSubject(), + triple.getPredicate(), + triple.getObject()))); + allTValues.add(ByteBufferUtil.bytes((int)source.getStep())); + + stepsValues.add(ByteBufferUtil.bytes(triple.getSubject())); + stepsValues.add(ByteBufferUtil.bytes(triple.getPredicate())); + stepsValues.add(ByteBufferUtil.bytes(triple.getObject())); + stepsValues.add(ByteBufferUtil.bytes((int)triple.getType())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRsubject())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRpredicate())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRobject())); + stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel())); + + output.write(CassandraDB.COLUMNFAMILY_ALLTRIPLES, null, allTValues); + output.write(stepname, null, stepsValues); + + + keys.clear(); + allkeys.clear(); + allTValues.clear(); + stepsValues.clear(); + + } + */ + + /* + public static void writeJustificationToMapReduceMultipleOutputs( + Triple triple, + TripleSource source, + MultipleOutputs output, + String stepname) throws IOException, InterruptedException{ + Map keys = new LinkedHashMap(); + Map allkeys = new LinkedHashMap(); + List allvariables = new ArrayList(); +// long time = System.currentTimeMillis(); + + byte one = 1; + byte zero = 0; + // Prepare composite key (sub, pre, obj) + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + // the length of boolean type in cassandra is one byte!!!!!!!! + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long + + allkeys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + allkeys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + allkeys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + + allvariables.add(ByteBufferUtil.bytes(source.getStep())); + allvariables.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + allvariables.add(ByteBufferUtil.bytes((int)triple.getType())); + + // Prepare variables + List variables = new ArrayList(); +// variables.add(ByteBufferUtil.bytes(oValue.getSubject())); + // the length of boolean type in cassandra is one byte!!!!!!!! + // For column inferred, init it as false i.e. zero + //variables.add(ByteBuffer.wrap(new byte[]{zero})); + + variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); + + + + // Keys are not used for + // CqlBulkRecordWriter.write(Object key, List values), + // so it can be set to null. + // Only values are used there where the value correspond to + // the insert statement set in CqlBulkOutputFormat.setColumnFamilyInsertStatement() + // All triples columnfamily: + // sub, pre, obj, isliteral, tripletype, inferredsteps + // Steps columnfamily: + // sub, pre, obj, rule, v1, v2, v3, transitivelevel + + List allTValues = new ArrayList(); + allTValues.add(ByteBufferUtil.bytes(triple.getSubject())); + allTValues.add(ByteBufferUtil.bytes(triple.getPredicate())); + allTValues.add(ByteBufferUtil.bytes(triple.getObject())); + allTValues.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + allTValues.add(ByteBufferUtil.bytes( + TriplesUtils.getTripleType( + source, triple.getSubject(), + triple.getPredicate(), + triple.getObject()))); + allTValues.add(ByteBufferUtil.bytes((int)source.getStep())); + + List stepsValues = new ArrayList(); + stepsValues.add(ByteBufferUtil.bytes(triple.getSubject())); + stepsValues.add(ByteBufferUtil.bytes(triple.getPredicate())); + stepsValues.add(ByteBufferUtil.bytes(triple.getObject())); + stepsValues.add(ByteBufferUtil.bytes((int)triple.getType())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRsubject())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRpredicate())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRobject())); + stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel())); + +// time = System.currentTimeMillis(); + output.write(CassandraDB.COLUMNFAMILY_ALLTRIPLES, null, allTValues); +// System.out.println("wrote all " + (System.currentTimeMillis() - time)); +// System.out.println("write all " + (System.currentTimeMillis() - time));// _output.write(stepname, keys, variables); +// time = System.currentTimeMillis(); + output.write(stepname, null, stepsValues); +// System.out.println("wrote steps" + (System.currentTimeMillis() - time)); + + + } + */ +/* public static void writeJustificationToMapReduceContext( + Triple triple, + TripleSource source, + Context context, + String stepname) throws IOException, InterruptedException{ + Map keys = new LinkedHashMap(); + Map allkeys = new LinkedHashMap(); + List allvariables = new ArrayList(); + long time = System.currentTimeMillis(); + + byte one = 1; + byte zero = 0; + + // Prepare composite key (sub, pre, obj) + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + // the length of boolean type in cassandra is one byte!!!!!!!! + keys.put(CassandraDB.COLUMN_IS_LITERAL, + triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + int tripletype = TriplesUtils.DATA_TRIPLE; + if (triple.getType()==TriplesUtils.OWL_HORST_SYNONYMS_TABLE){ + tripletype = TriplesUtils.SYNONYMS_TABLE; // In this way, we can provide a special triple type for triples in synonyms table + }else{ + tripletype = TriplesUtils.getTripleType(source, triple.getSubject(), triple.getPredicate(), triple.getObject()); + } + keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(tripletype)); // Modified by WuGang 20150109 + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long + + // Prepare variables + List variables = new ArrayList(); +// variables.add(ByteBufferUtil.bytes(oValue.getSubject())); + // the length of boolean type in cassandra is one byte!!!!!!!! + // For column inferred, init it as false i.e. zero +// variables.add(ByteBuffer.wrap(new byte[]{zero})); + variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple + variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); // It corresponds to COLUMN_TRANSITIVE_LEVEL, only useful in owl's transitive + context.write(keys, variables); + } +*/ + /* + public static void writealltripleToMapReduceContext( Triple triple, TripleSource source, Context context) throws IOException, InterruptedException{ @@ -437,8 +862,74 @@ public static void writeJustificationToMapReduceContext( // For column inferred, init it as false i.e. zero // variables.add(ByteBuffer.wrap(new byte[]{zero})); variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple + variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); // It corresponds to COLUMN_TRANSITIVE_LEVEL, only useful in owl's transitive context.write(keys, variables); } + */ + + public static void writeJustificationToMapReduceContext( + Triple triple, + TripleSource source, + Context context) throws IOException, InterruptedException{ + Map keys = new LinkedHashMap(); + + byte one = 1; + byte zero = 0; + + // Prepare composite key (sub, pre, obj) +// keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); +// keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); +// keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); +// // the length of boolean type in cassandra is one byte!!!!!!!! +// keys.put(CassandraDB.COLUMN_IS_LITERAL, +// triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); +// int tripletype = TriplesUtils.DATA_TRIPLE; +// if (triple.getType()==TriplesUtils.OWL_HORST_SYNONYMS_TABLE){ +// tripletype = TriplesUtils.SYNONYMS_TABLE; // In this way, we can provide a special triple type for triples in synonyms table +// }else{ +// tripletype = TriplesUtils.getTripleType(source, triple.getSubject(), triple.getPredicate(), triple.getObject()); +// } +// keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(tripletype)); // Modified by WuGang 20150109 +// keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int +// keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long +// keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long +// keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long + + // Prepare variables + List variables = new ArrayList(); +// variables.add(ByteBufferUtil.bytes(oValue.getSubject())); + // the length of boolean type in cassandra is one byte!!!!!!!! + // For column inferred, init it as false i.e. zero +// variables.add(ByteBuffer.wrap(new byte[]{zero})); + + int tripletype = TriplesUtils.DATA_TRIPLE; + if (triple.getType() == TriplesUtils.OWL_HORST_SYNONYMS_TABLE) { + tripletype = TriplesUtils.SYNONYMS_TABLE; + } else { + tripletype = TriplesUtils.getTripleType(source, + triple.getSubject(), triple.getPredicate(), + triple.getObject()); + } + + + variables.add(triple.isObjectLiteral() ? ByteBuffer + .wrap(new byte[] { one }) : ByteBuffer + .wrap(new byte[] { zero })); + variables.add(ByteBufferUtil.bytes((int) triple.getType())); + variables.add(ByteBufferUtil.bytes(triple.getSubject())); + + variables.add(ByteBufferUtil.bytes(tripletype)); + variables.add(ByteBufferUtil.bytes(triple.getPredicate())); + variables.add(ByteBufferUtil.bytes(triple.getObject())); + + variables.add(ByteBufferUtil.bytes(triple.getRsubject())); + variables.add(ByteBufferUtil.bytes(triple.getRpredicate())); + variables.add(ByteBufferUtil.bytes(triple.getRobject())); + + variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple + variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); + context.write(null, variables); + } public boolean loadSetIntoMemory(Set schemaTriples, Set filters, int previousStep) throws IOException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { return loadSetIntoMemory(schemaTriples, filters, previousStep, false); @@ -465,11 +956,12 @@ public static Set> getJustifications() throws InvalidRequestExce //Modified 2015-6-25 //From COLUMNFAMILY_RESULTS to justifications ??\\ - Statement statement = QueryBuilder.select().all().from(KEYSPACE, COLUMNFAMILY_RESULTS); + Statement statement = QueryBuilder.select().all().from(KEYSPACE, "results").where(QueryBuilder.eq("id", OWLHorstJustification.id)); List rows = scds.getSession().execute(statement).all(); for (Row row : rows){ - Set testResult = row.getSet(COLUMN_JUSTIFICATION, TupleValue.class); + //modified + Set testResult = row.getSet("justification", TupleValue.class); Set> toBeDeletedFromResults = new HashSet>(); // Perform delete these from the results boolean beAdded = true; for (Set currentResult : results){ @@ -482,7 +974,7 @@ else if (currentResult.containsAll(testResult)){ toBeDeletedFromResults.add(currentResult); } } - if (beAdded) // The testResult is a candidate justification + if (beAdded) // The testResul2.5 getTracingEntries(Triple triple) throws InvalidRequestExcepti byte zero = 0; Set tracingEntries = new HashSet(); + //Fixed 2016/4/13 + String query = "SELECT * FROM " + KEYSPACE + "." + "resultrows" + " WHERE " + - COLUMN_SUB + "=? AND " + COLUMN_PRE + "=? AND " + COLUMN_OBJ + "=? AND " + COLUMN_IS_LITERAL + "=?"; + COLUMN_SUB + "=? AND " + COLUMN_PRE + "=? AND " + COLUMN_OBJ + "=? AND " + COLUMN_IS_LITERAL + "=? ALLOW FILTERING"; CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); List list = new ArrayList(); list.add(ByteBufferUtil.bytes(triple.getSubject())); @@ -568,20 +1062,27 @@ public boolean loadSetIntoMemory( * add ALLOW FILTERING * 2015/6/12 */ - String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS + + + + String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS + " FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + - " WHERE " + COLUMN_TRIPLE_TYPE + " = ? " + " ALLOW FILTERING"; -// System.out.println(query); + " WHERE " + COLUMN_TRIPLE_TYPE + " = ? ALLOW FILTERING"; + System.out.println(query); CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); for (int filter : filters){ List list = new ArrayList(); list.add(ByteBufferUtil.bytes(filter)); +// System.out.println("filter " + filter); CqlResult result = client.execute_prepared_cql3_query(preparedResult.itemId, list, ConsistencyLevel.ONE); - for(CqlRow row : result.rows){ + Iterator it =result.getRowsIterator(); + while(it.hasNext() ){ + CqlRow row = it.next(); +// for(CqlRow row : result.rows){ Iterator columnsIt = row.getColumnsIterator(); Long sub = null, obj = null; +// System.out.println("row : " + row); while (columnsIt.hasNext()) { Column column = columnsIt.next(); if (new String(column.getName()).equals(COLUMN_SUB)) @@ -595,9 +1096,11 @@ public boolean loadSetIntoMemory( } } if (!inverted) - schemaTriples.add(sub); + schemaTriples.add(sub); else schemaTriples.add(obj); + + System.out.println("schema : " + schemaTriples); } } @@ -627,7 +1130,7 @@ public Map> loadMapIntoMemory(Set filters, boole // Require an index created on COLUMN_TRIPLE_TYPE column String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS + " FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + - " WHERE " + COLUMN_TRIPLE_TYPE + " = ? " + " ALLOW FILTERING"; + " WHERE " + COLUMN_TRIPLE_TYPE + " = ? ALLOW FILTERING"; //partitonkey CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); @@ -665,7 +1168,7 @@ public Map> loadMapIntoMemory(Set filters, boole } } - logger.debug("Time for CassandraDB's loadMapIntoMemory " + (System.currentTimeMillis() - startTime)); + logger.info("Time for CassandraDB's loadMapIntoMemory " + (System.currentTimeMillis() - startTime)); return schemaTriples; } @@ -677,13 +1180,75 @@ public void createIndexOnTripleType() throws InvalidRequestException, Unavailabl client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); } - public void createIndexOnRule() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ - String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_RULE + ")"; + public void createIndexOnInferredSteps() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_INFERRED_STEPS + ")"; client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); } + + public void createIndexOnresultrows() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + + String query = "CREATE INDEX on resultrows (sub) ;"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + query = "CREATE INDEX on resultrows (obj) ;"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + query = "CREATE INDEX on resultrows (pre) ;"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + query = "CREATE INDEX on resultrows (isliteral) ;"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + + } - // Added by WuGang 2015-06-08 + +// public void createIndexOnRule() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ +// String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_RULE + ")"; +// client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); +// } +// +// +// public void createIndexOnTransitiveLevel() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ +// String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_TRANSITIVE_LEVELS + ")"; +// client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); +// } + + /* + public void Index() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + //createIndexOnInferredSteps(); + createIndexOnRule(); + createIndexOnTransitiveLevel(); + createIndexOnTripleType(); + System.out.println("IndexED"); + } + + public void DropTripleTypeIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "DROP INDEX mrjks.justifications_tripletype_idx"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } + + public void DropRuleIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "DROP INDEX mrjks.justifications_rule_idx"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } + + public void DropInferredStepsIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "DROP INDEX mrjks.justifications_inferredSteps_idx"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } + + public void DropTransitiveLevelIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "DROP INDEX mrjks.justifications_transitiveLevel_idx"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } + + public void UnIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + + this.DropInferredStepsIndex(); + this.DropRuleIndex(); + this.DropTransitiveLevelIndex(); + this.DropTripleTypeIndex(); + } + */ + // Added by WuGang 2015-06-08 public static ResultSet getRows(){ Builder builder = Cluster.builder(); @@ -699,12 +1264,12 @@ public static ResultSet getRows(){ } public static boolean delornot = false; - +/* public static void removeOriginalTriples(){ if (delornot == true) return; delornot = true; - //ִ�в�Ӧ�жϡ� + //ִ�в�Ӧ�жϡ� Builder builder = Cluster.builder(); builder.addContactPoint(DEFAULT_HOST); SocketOptions socketoptions= new SocketOptions().setKeepAlive(true).setReadTimeoutMillis(10 * 10000).setConnectTimeoutMillis(5 * 10000); @@ -723,7 +1288,8 @@ public static void removeOriginalTriples(){ COLUMN_V2 + " bigint, " + COLUMN_V3 + " bigint, " + // COLUMN_TRIPLE_TYPE + " int, " + - COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key + COLUMN_INFERRED_STEPS + " int, " + // from this line is non-primary key + COLUMN_TRANSITIVE_LEVELS + " int, " + " PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_IS_LITERAL + "), " + COLUMN_TRIPLE_TYPE + ", " + COLUMN_RULE + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + //", " + COLUMN_TRIPLE_TYPE + @@ -764,7 +1330,7 @@ public static void removeOriginalTriples(){ session.execute(delete); System.out.println(row); } - + */ // SimpleClientDataStax scds = new SimpleClientDataStax(); // scds.connect(DEFAULT_HOST); // @@ -800,7 +1366,7 @@ public static void removeOriginalTriples(){ // scds.close(); - } +// } //create by LiYang // public static void createReasonTable(){ @@ -838,16 +1404,19 @@ public static void removeOriginalTriples(){ public static void main(String[] args) { try { - CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); - db.init(); - db.createIndexOnTripleType(); - db.createIndexOnRule(); + CassandraDB db = new CassandraDB(); + db.init(); +// db.createIndexOnTripleType(); +// db.createIndexOnRule(); +// db.createIndexOnInferredSteps(); +// db.createIndexOnTransitiveLevel(); // db.insertResources(100, "Hello World!"); Set schemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); db.loadSetIntoMemory(schemaTriples, filters, 0); - + //db.loadMapIntoMemory(filters, inverted) + System.out.println(schemaTriples); //modified 2015/5/19 diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java index 71c422d..9bf3734 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java @@ -21,6 +21,7 @@ * @author L * */ + public class MrjMultioutput extends MultipleOutputs { private Map taskContexts = new HashMap(); @@ -81,15 +82,15 @@ protected synchronized RecordWriter getRecordWriter( ConfigHelper.setOutputColumnFamily(taskContext.getConfiguration(), columnFamilyName); // CqlConfigHelper.setOutputCql(taskContext.getConfiguration(), getCql(columnFamilyNameName)); - CqlBulkOutputFormat.setColumnFamilySchema( - taskContext.getConfiguration(), - columnFamilyName, - getSchema(columnFamilyName)); - - CqlBulkOutputFormat.setColumnFamilyInsertStatement( - taskContext.getConfiguration(), - columnFamilyName, - getInsertStatement(columnFamilyName)); +// CqlBulkOutputFormat.setColumnFamilySchema( +// taskContext.getConfiguration(), +// columnFamilyName, +// getSchema(columnFamilyName)); +// +// CqlBulkOutputFormat.setColumnFamilyInsertStatement( +// taskContext.getConfiguration(), +// columnFamilyName, +// getInsertStatement(columnFamilyName)); @@ -126,20 +127,20 @@ String getCql(String columnFamilyNameName){ return("UPDATE " + columnFamilyNameName + " SET transitivelevel =? "); } - String getSchema(String columnFamilyNameName){ -// System.out.println(columnFamilyNameName + " schema"); - if (columnFamilyNameName == "alltriples") { - return CassandraDB.getAlltripleSchema(); - } - return CassandraDB.getStepsSchema(columnFamilyNameName); - } - - String getInsertStatement(String columnFamilyNameName){ -// System.out.println(columnFamilyNameName + " insert statement"); - if (columnFamilyNameName == "alltriples") { - return CassandraDB.getAlltripleStatement(); - } - return CassandraDB.getStepsStatement(columnFamilyNameName); - } +// String getSchema(String columnFamilyNameName){ +//// System.out.println(columnFamilyNameName + " schema"); +// if (columnFamilyNameName == "alltriples") { +// return CassandraDB.getAlltripleSchema(); +// } +// return CassandraDB.getStepsSchema(columnFamilyNameName); +// } +// +// String getInsertStatement(String columnFamilyNameName){ +//// System.out.println(columnFamilyNameName + " insert statement"); +// if (columnFamilyNameName == "alltriples") { +// return CassandraDB.getAlltripleStatement(); +// } +// return CassandraDB.getStepsStatement(columnFamilyNameName); +// } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java index 5552170..c8a6157 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java @@ -9,10 +9,15 @@ */ package cn.edu.neu.mitt.mrj.justification; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.net.URI; import java.util.Set; +import jdk.internal.dynalink.beans.StaticClass; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; @@ -29,12 +34,22 @@ import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.omg.CORBA.PUBLIC_MEMBER; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import ch.qos.logback.core.Context; import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.reasoner.Experiments; import cn.edu.neu.mitt.mrj.utils.TripleKeyMapComparator; + + + + + + + //modified cassandra java 2.0.5 import com.datastax.driver.core.TupleValue; @@ -54,6 +69,10 @@ public class OWLHorstJustification extends Configured implements Tool { public static long obj = -1; public static Path justificationsDirBase = new Path("/justification"); + public static long totaltriples; + private static int tripleamount = 0; + public static int id; //?? + private boolean bClearOriginals = false; /** @@ -81,7 +100,7 @@ public void parseArgs(String[] args) { numMapTasks = Integer.valueOf(args[++i]); if (args[i].equalsIgnoreCase("--reducetasks")) numReduceTasks = Integer.valueOf(args[++i]); - + // Added by WuGang 2015-06-08 if (args[i].equalsIgnoreCase("--clearoriginals")) bClearOriginals = true; @@ -121,6 +140,9 @@ private Job createJustificationJob(int step) throws IOException { // Job Configuration conf = new JobConf(); conf.setInt("maptasks", numMapTasks); + + conf.setInt("id", id); + Job job = new Job(conf); job.setJobName("OWL Horst Justification - Step " + step); job.setJarByClass(OWLHorstJustification.class); @@ -148,8 +170,8 @@ private Job createJustificationJob(int step) throws IOException { job.setOutputKeyClass(Triple.class); // reduce output key (in next loop it will be tried to expanded) job.setOutputValueClass(MapWritable.class); // reduce output value is an explanation job.setOutputFormatClass(SequenceFileOutputFormat.class); - + return job; } @@ -158,8 +180,8 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio parseArgs(args); // Added by WuGang 2015-06-08 - if (bClearOriginals) - CassandraDB.removeOriginalTriples(); +// if (bClearOriginals) +// CassandraDB.removeOriginalTriples(); long total = 0; // Total justifications @@ -167,53 +189,95 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio long startTime = System.currentTimeMillis(); int step = 0; - + id = Experiments.id + 200; + System.out.println("id : " + id); + + prepareInput(sub, pre, obj, false); // Default it is not a literal. + File outputFile = new File("output"); + outputFile.createNewFile(); + BufferedWriter out = new BufferedWriter(new FileWriter(outputFile, true)); + +// out.write("id : " + id + "\r\n"); +// System.out.println(sub + " " + pre + " " + obj); + out.write("id : " + id + "\r\n"); + out.write("sub : " + sub + " pre : " + pre + " obj : " + obj + "\r\n"); + + // find justifications do{ log.info(">>>>>>>>>>>>>>>>>>>> Processing justification in step - " + step + " <<<<<<<<<<<<<<<<<<<<<<<<<"); + + out.write("step : " + step + "\r\n"); + +// out.write("total : " + totaltriples + "\r\n"); + Job job = createJustificationJob(step); - + job.waitForCompletion(true); +// int Retotal = 0; +// Retotal = conf.getInt("id", 111); + //需要在 job.waitForCompletion(true); 之后。 + Long result = job.getCounters().findCounter("Triples", "Triples").getValue(); + out.write("Reduce triples : " + result + "\r\n"); + + newExpanded = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); Counter counterToProcess = job.getCounters().findCounter("OWL Horst Justifications Job", "ExplanationOutputs"); total += counterToProcess.getValue(); + + step++; }while (newExpanded > 0); //modified cassandra java 2.0.5 CassandraDB db = null; + try{ - db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); + db = new CassandraDB(); db.getDBClient().set_keyspace(CassandraDB.KEYSPACE); Set> justifications = db.getJustifications(); int count = 0; + for (Set justification : justifications){ - System.out.println(">>>Justification - " + ++count + ":"); +// int tripleamount = 0; +// System.out.println(">>>Justification - " + ++count + ":"); +// out.write(">>>Justification - " + ++count + ":" + "\r\n"); for(TupleValue triple : justification){ long sub = triple.getLong(0); long pre = triple.getLong(1); long obj = triple.getLong(2); - System.out.println("\t<" + sub + ", " + pre + ", " + obj + ">" + - " - <" + db.idToLabel(sub) + ", " + db.idToLabel(pre) + ", " + db.idToLabel(obj) + ">"); +// System.out.println("\t<" + sub + ", " + pre + ", " + obj + ">" + +// " - <" + db.idToLabel(sub) + ", " + db.idToLabel(pre) + ", " + db.idToLabel(obj) + ">"); +// out.write("\t<" + sub + ", " + pre + ", " + obj + ">" + +// " - <" + db.idToLabel(sub) + ", " + db.idToLabel(pre) + ", " + db.idToLabel(obj) + ">" + "\r\n"); + tripleamount++; } +// System.out.println(tripleamount); + out.write("tripleamount : " + tripleamount + "\r\n"); } - + db.CassandraDBClose(); }catch(Exception e){ System.err.println(e.getMessage()); } - - - + System.out.println("Time (in seconds): " + (System.currentTimeMillis() - startTime) / 1000); System.out.println("Number justifications: " + total); +// out.write("tripleamount : " + tripleamount + "\r\n"); + + out.write("Time (in seconds): " + (System.currentTimeMillis() - startTime) / 1000 + "\r\n"); + out.write("Number justifications: " + total + "\r\n\r\n"); + out.flush(); + out.close(); + + return total; } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java index 73e4d54..b80060a 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java @@ -3,10 +3,17 @@ */ package cn.edu.neu.mitt.mrj.justification; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; import java.io.IOException; +import java.io.OutputStreamWriter; import java.util.HashSet; import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.MapWritable; import org.apache.hadoop.io.Writable; @@ -15,6 +22,7 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.io.dbs.SimpleClientDataStax; +import cn.edu.neu.mitt.mrj.reasoner.Experiments; import com.datastax.driver.core.DataType; //modified cassandra java 2.0.5 @@ -34,17 +42,53 @@ public class OWLHorstJustificationReducer extends // private static Logger log = LoggerFactory.getLogger(OWLHorstJustificationReducer.class); private static SimpleClientDataStax sClient = null; + private long triplenum = 0; + @Override protected void reduce(MapWritable key, Iterable values, Context context) throws IOException, InterruptedException { long total = 0; + int id = 0; + Configuration reduceconf = context.getConfiguration(); + id = reduceconf.getInt("id", 2); + for (LongWritable count:values){ total += count.get(); } -// System.out.println("Total count is: " + total); + triplenum = total; + System.out.println("Reduce total count is: " + total); //modified cassandra java 2.0.5 +// reduceconf.setInt("id", (int)total); + + + + +// File outputFile = new File("output"); +// outputFile.createNewFile(); +// BufferedWriter out = new BufferedWriter(new FileWriter(outputFile, true)); +// out.write("Total count is: " + total); +// out.flush(); +// out.close(); + +// try{ +// Path pt=new Path("./result"); +// FileSystem fs = FileSystem.get(new Configuration()); +// BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true))); +// // TO append data to a file, use fs.append(Path f) +// String line; +// line="Total count is: " + total; +// System.out.println(line); +// br.write(line); +// br.close(); +// }catch(Exception e){ +// System.out.println("File not found"); +// } + +// System.out.println("Reduce id : " + Experiments.id); //均是0 + + if (total == key.size()){ // Find a candidate justification, output it to the database Set resultJustification = new HashSet(); for(Writable triple : key.keySet()){ @@ -54,15 +98,19 @@ protected void reduce(MapWritable key, Iterable values, Context co theValue.setLong(1, ((Triple)triple).getPredicate()); theValue.setLong(2, ((Triple)triple).getObject()); resultJustification.add(theValue); + System.out.println(" _______ " + ((Triple)triple).getSubject()); } + System.out.println("Write a candidate justification to database=========== "); + System.out.println(resultJustification.toString()); // log.info("Write a candidate justification to database=========== "); // log.info(resultJustification.toString()); + System.out.println(" REDUCE id : " + id); Insert insert = QueryBuilder .insertInto(CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_RESULTS) .value(CassandraDB.COLUMN_JUSTIFICATION, resultJustification) - .value(CassandraDB.COLUMN_ID, UUIDs.timeBased()); + .value("id", id); sClient.getSession().execute(insert); // Added by WuGang 2015-02-14 @@ -74,6 +122,8 @@ protected void reduce(MapWritable key, Iterable values, Context co } } // else do nothing. +// OWLHorstJustification.totaltriples = total; + } @Override @@ -86,6 +136,8 @@ protected void setup(Context context) @Override protected void cleanup(Context context) throws IOException, InterruptedException { + context.getCounter("Triples", "Triples").increment(triplenum); + sClient.close(); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/Experiments.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/Experiments.java new file mode 100644 index 0000000..d856550 --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/Experiments.java @@ -0,0 +1,90 @@ +package cn.edu.neu.mitt.mrj.reasoner; + +import java.io.IOException; +import java.net.InetAddress; +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; + +import jdk.internal.dynalink.beans.StaticClass; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.ToolRunner; +import org.omg.CORBA.PUBLIC_MEMBER; + +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.justification.OWLHorstJustification; + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Host; +import com.datastax.driver.core.Metadata; +import com.datastax.driver.core.ResultSet; +import com.datastax.driver.core.Row; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.SocketOptions; +import com.datastax.driver.core.Cluster.Builder; +public class Experiments { + + public static int id; + + public static void main(String[] args){ + Builder builder = Cluster.builder(); + builder.addContactPoint(CassandraDB.DEFAULT_HOST); + SocketOptions socketoptions = new SocketOptions().setKeepAlive(true).setConnectTimeoutMillis(5 * 10000).setReadTimeoutMillis(100000); + builder.withSocketOptions(socketoptions); + Cluster cluster = builder.build(); + Metadata metadata = cluster.getMetadata(); + Session session = cluster.connect(); + +// Random r = new Random(System.currentTimeMillis()) ; +// int random = 0; +// if (r.nextBoolean()) { +// random = r.nextInt(101) ; +// } else { +// random = -r.nextInt(101) ; +// } + for (id = 0; id < 10; id++) { + long random = ThreadLocalRandom.current().nextLong(-9223372036854775808L, 9223372036854775807L); +// long startTime = System.currentTimeMillis(); + ResultSet results = session.execute("SELECT sub ,pre ,obj FROM mrjks.resultrows WHERE TOKEN(isliteral , rule , sub ) > " + random + " LIMIT 1;"); +// System.out.println(results); + for (Row row : results){ + Configuration conf = new Configuration(); + try { + FileSystem hdfs = FileSystem.get(conf); + Path deledir= new Path("/justification"); + boolean isDeleted=hdfs.delete(deledir,true); + } catch (IOException e1) { + e1.printStackTrace(); + } + +// System.out.println("id : " + id); + + Long sub, pre, obj; + sub = row.getLong("sub"); + pre = row.getLong("pre"); + obj = row.getLong("obj"); + System.out.println("sub : " + sub + " pre : " + pre + " obj : " + obj); + //不能加空格 + String[] argStrings = {"--maptasks" , "8" , "--reducetasks" , "8" , "--subject" , sub.toString() , "--predicate" , pre.toString() , "--object" , obj.toString() ,"--clearoriginals"}; +// OWLHorstJustification OWJ = new OWLHorstJustification(); + System.out.println(argStrings); + OWLHorstJustification.main(argStrings); + +// try { +// OWJ.launchClosure(argStrings); +// } catch (ClassNotFoundException | IOException +// | InterruptedException e) { +// System.out.println("launchClosure error"); +// e.printStackTrace(); +// } + + } +// System.out.println("Time (in seconds): " + (System.currentTimeMillis() - startTime) / 1000); + } + cluster.close(); + } +} diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java index b0f8c8e..dcc2125 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java @@ -2,23 +2,27 @@ * Project Name: mrj-0.1 * File Name: MapReduceJobConfig.java * @author Gang Wu - * 2014��12��28�� ����10:44:16 + * 2014��12��28�� ����10:44:16 * * Description: * TODO */ package cn.edu.neu.mitt.mrj.reasoner; + import java.io.IOException; +import java.nio.ByteBuffer; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; import org.apache.cassandra.hadoop.cql3.CqlInputFormat; import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; @@ -31,72 +35,115 @@ public class MapReduceReasonerJobConfig { // Input from CassandraDB.COLUMNFAMILY_JUSTIFICATIONS - private static void configureCassandraInput(Job job, Set filters) { + private static void configureCassandraInput(Job job, Set typeFilters, Set transitiveLevelFilters, int certainStep) { //Set the input ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); // Should not use 9160 port in cassandra 2.1.2 because new cql3 port is 9042, please refer to conf/cassandra.yaml //ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); - if (filters.size() == 0){ - CqlConfigHelper.setInputCql(job.getConfiguration(), - "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + - " WHERE TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") > ? AND TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") <= ? ALLOW FILTERING"); + if (typeFilters.size() == 0){ + + if (transitiveLevelFilters.size() == 0) + CqlConfigHelper.setInputCql(job.getConfiguration(), + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " WHERE TOKEN(" + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + ") > ? AND TOKEN(" + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + ") <= ? ALLOW FILTERING"); +// "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + +// " WHERE TOKEN(" + +// CassandraDB.COLUMN_SUB + ", " + +// CassandraDB.COLUMN_PRE + ", " + +// CassandraDB.COLUMN_OBJ + ", " + +// CassandraDB.COLUMN_IS_LITERAL + +// ") > ? AND TOKEN(" + +// CassandraDB.COLUMN_SUB + ", " + +// CassandraDB.COLUMN_PRE + ", " + +// CassandraDB.COLUMN_OBJ + ", " + +// CassandraDB.COLUMN_IS_LITERAL + +// ") <= ? ALLOW FILTERING"); + else{ + Integer max = java.util.Collections.max(transitiveLevelFilters); + Integer min = java.util.Collections.min(transitiveLevelFilters); + + + CqlConfigHelper.setInputCql(job.getConfiguration(), + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " WHERE TOKEN(" + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + ") > ? AND TOKEN(" + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + ") <= ? " + +// CassandraDB.COLUMN_INFERRED_STEPS + " = " + certainStep + " AND " + +// CassandraDB.COLUMN_TRANSITIVE_LEVELS + " >= " + min + " AND " + +// CassandraDB.COLUMN_TRANSITIVE_LEVELS + " <= " + max + + " ALLOW FILTERING"); + } + } - else if (filters.size() == 1){ + else if (typeFilters.size() == 1){ + if (transitiveLevelFilters.size() != 0){ // stepFilter is only for handling transitive property + System.err.println("This is not supported!!!"); + return; + } + CqlConfigHelper.setInputCql(job.getConfiguration(), "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + " WHERE TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + ") > ? AND TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") <= ? AND " + - CassandraDB.COLUMN_TRIPLE_TYPE + " = " + filters.toArray()[0] + - " ALLOW FILTERING"); + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + ") <= ? "); +// ") <= ? AND " + +// CassandraDB.COLUMN_TRIPLE_TYPE + " = " + typeFilters.toArray()[0] + +// " ALLOW FILTERING"); }else{ + if (transitiveLevelFilters.size() != 0){ // stepFilter is only for handling transitive property + System.err.println("This is not supported!!!"); + return; + } + + // The support of IN clause in cassandra db's SELECT is restricted. // So we have to try to manually cluster the values in the filters. // see http://www.datastax.com/documentation/cql/3.1/cql/cql_reference/select_r.html#reference_ds_d35_v2q_xj__selectIN System.out.println("<<<<<<<>>>>>>>>"); System.out.println("<<<<<<<>>>>>>>>"); - Integer max = java.util.Collections.max(filters); - Integer min = java.util.Collections.min(filters); + Integer max = java.util.Collections.max(typeFilters); + Integer min = java.util.Collections.min(typeFilters); CqlConfigHelper.setInputCql(job.getConfiguration(), "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + " WHERE TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + ") > ? AND TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") <= ? AND " + - CassandraDB.COLUMN_TRIPLE_TYPE + " >= " + min + " AND " + - CassandraDB.COLUMN_TRIPLE_TYPE + " <= " + max + - " ALLOW FILTERING"); + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + ") <= ? "); +// + "AND " + +// CassandraDB.COLUMN_TRIPLE_TYPE + " >= " + min + " AND " + +// CassandraDB.COLUMN_TRIPLE_TYPE + " <= " + max + +// " ALLOW FILTERING"); // String strFilter = filters.toString(); // String strInFilterClause = strFilter.substring(1, strFilter.length()-1); // remove "[" and "]" characters of Set.toString() @@ -130,39 +177,53 @@ else if (filters.size() == 1){ // Output to CassandraDB.COLUMNFAMILY_JUSTIFICATIONS - private static void configureCassandraOutput(Job job) { + private static void configureCassandraOutput(Job job, int step) { //Set the output job.setOutputKeyClass(Map.class); job.setOutputValueClass(List.class); - job.setOutputFormatClass(CqlOutputFormat.class); + + job.setOutputFormatClass(CqlBulkOutputFormat.class); + CqlBulkOutputFormat.setColumnFamilySchema(job.getConfiguration(), CassandraDB.COLUMNFAMILY_JUSTIFICATIONS, CassandraDB.getJustificationsSchema()); + CqlBulkOutputFormat.setColumnFamilyInsertStatement(job.getConfiguration(), CassandraDB.COLUMNFAMILY_JUSTIFICATIONS, CassandraDB.getJustificationseStatement()); + CqlBulkOutputFormat.setDeleteSourceOnSuccess(job.getConfiguration(), true); + ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); - ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); - String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + - " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; - CqlConfigHelper.setOutputCql(job.getConfiguration(), query); + ConfigHelper.setOutputKeyspace(job.getConfiguration(), CassandraDB.KEYSPACE); + ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + +// MrjMultioutput.addNamedOutput(job, CassandraDB.COLUMNFAMILY_ALLTRIPLES, CqlBulkOutputFormat.class, ByteBuffer.class, List.class); +// MrjMultioutput.addNamedOutput(job, "step" + step, CqlBulkOutputFormat.class, ByteBuffer.class, List.class); +// CqlConfigHelper.setOutputCql(conf, "select * from step1"); } - // In each derivation, we may create a set of jobs + // In each derivation, we may create a set of jobs + // certainStep is optional, if it is specified then we can use it to filter transitiveLevel with non-equal operator + // (see cql specification) public static Job createNewJob(Class classJar, String jobName, - Set filters, int numMapTasks, int numReduceTasks, - boolean bConfigCassandraInput, boolean bConfigCassandraOutput) + Set typeFilters, Set transitiveLevelFilters, int certainStep, int numMapTasks, int numReduceTasks, + boolean bConfigCassandraInput, boolean bConfigCassandraOutput, Integer step) throws IOException { Configuration conf = new Configuration(); conf.setInt("maptasks", numMapTasks); - conf.set("input.filter", filters.toString()); - + conf.set("input.filter", typeFilters.toString()); + + conf.set("mapreduce.output.bulkoutputformat.streamthrottlembits", "400"); + Job job = new Job(conf); job.setJobName(jobName); job.setJarByClass(classJar); job.setNumReduceTasks(numReduceTasks); + job.setNumReduceTasks(8); + if (bConfigCassandraInput) - configureCassandraInput(job, filters); + configureCassandraInput(job, typeFilters, transitiveLevelFilters, certainStep); if (bConfigCassandraOutput) - configureCassandraOutput(job); + configureCassandraOutput(job, step); + // Added by WuGang 2010-05-25 System.out.println("Create a job - " + jobName); @@ -171,6 +232,44 @@ public static Job createNewJob(Class classJar, String jobName, return job; } - - +/* + public static void CreateTables(String jobname){ + Builder builder = Cluster.builder(); + builder.addContactPoint(CassandraDB.DEFAULT_HOST); + SocketOptions socketoptions= new SocketOptions().setKeepAlive(true).setReadTimeoutMillis(10 * 10000).setConnectTimeoutMillis(5 * 10000); + Cluster clu = builder.build(); + Session session = clu.connect(); + + String query = ""; + if(jobname == "RDFS special properties reasoning"){ + query = "CREATE TABLE IF NOT EXISTS " + "mrjks" + "." + jobname + + " ( " + + "sub" + " bigint, " + + "pre" + " bigint, " + + "obj" + " bigint, " + + "rule int, " + + "v1" + " bigint, " + + "v2" + " bigint, " + + "v3" + " bigint, " + + "transitiveleves int" + + ", primary key((sub, pre, obj, rule) ,v1, v2, v3 ))"; + } + else { + query = "CREATE TABLE IF NOT EXISTS " + "mrjks" + "." + jobname + + " ( " + + "sub" + " bigint, " + + "pre" + " bigint, " + + "obj" + " bigint, " + + "rule int, " + + "v1" + " bigint, " + + "v2" + " bigint, " + + "v3" + " bigint, " + + ", primary key((id, rule) ,v1, v2, v3))"; + } + + session.execute(query); + System.out.println(query); + System.out.println("--------Create Table----------"); + } + */ } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java index e27c689..08fba3e 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java @@ -1,34 +1,22 @@ package cn.edu.neu.mitt.mrj.reasoner; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Map; -import org.apache.cassandra.hadoop.ColumnFamilyInputFormat; import org.apache.cassandra.hadoop.ConfigHelper; import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; import org.apache.cassandra.hadoop.cql3.CqlInputFormat; import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; -import org.apache.cassandra.thrift.SlicePredicate; -import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import prejustification.SelectInferRows; -import prejustification.SelectInferRowsMap; -import prejustification.SelectInferRowsReduce; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; -import cn.edu.neu.mitt.mrj.utils.Cassandraconf; public class ReasonedJustifications extends Configured implements Tool{ public int run(String[] args) throws Exception{ @@ -37,27 +25,27 @@ public int run(String[] args) throws Exception{ Job job = new Job(conf); job.setJobName(" Test "); - job.setJarByClass(SelectInferRows.class); + job.setJarByClass(ReasonedJustifications.class); job.setNumReduceTasks(8); ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); - CqlConfigHelper.setInputCql(job.getConfiguration(), + CqlConfigHelper.setInputCql(job.getConfiguration(), "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + " WHERE TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + //CassandraDB.COLUMN_IS_LITERAL + ") > ? AND TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + //CassandraDB.COLUMN_IS_LITERAL + ") <= ? ALLOW FILTERING"); CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); - //Modifide by LiYang + //Modified by LiYang ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); job.setInputFormatClass(CqlInputFormat.class); job.setOutputKeyClass(Map.class); @@ -68,7 +56,7 @@ public int run(String[] args) throws Exception{ ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); String query = "UPDATE " + CassandraDB.KEYSPACE + "." + "resultrows" + - " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; + " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=?, " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "=?"; CqlConfigHelper.setOutputCql(job.getConfiguration(), query); job.setMapperClass(ReasonedJustificationsMapper.class); @@ -77,74 +65,6 @@ public int run(String[] args) throws Exception{ job.setReducerClass(ReasonedJustificationsReducer.class); -// Configuration conf = getConf(); -// Job job = new Job(conf, "Select Reasoned Rows"); -// job.setJarByClass(ReasonedJustifications.class); -// /* -// //Set the predicate -// List columnNames = new ArrayList(); -// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_SUB)); -// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_PRE)); -// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_PRE)); -// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_IS_LITERAL)); -// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_TRIPLE_TYPE)); -// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_RULE)); -// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_V1)); -// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_V2)); -// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_V3)); -// columnNames.add(ByteBufferUtil.bytes(CassandraDB.COLUMN_INFERRED_STEPS)); -// SlicePredicate predicate = new SlicePredicate().setColumn_names(columnNames); -// ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); -// */ -// /* -// * Get Attention conf != job.getConfiguration() -// * thread "main" java.lang.NullPointerException at org.apache.cassandra.utils.FBUtilities.newPartitioner(FBUtilities.java:418) -// */ -// //Input -// -// ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); -// ConfigHelper.setInputPartitioner(job.getConfiguration(), Cassandraconf.partitioner); -// ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMN_JUSTIFICATION); -// CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), "1000"); -// -// -// -// CqlConfigHelper.setInputCql(job.getConfiguration(), "SELECT * FROM mrjks.justifications WHERE TOKEN(" + -// CassandraDB.COLUMN_SUB + ", " + -// CassandraDB.COLUMN_PRE + ", " + -// CassandraDB.COLUMN_OBJ + ", " + -// CassandraDB.COLUMN_IS_LITERAL + -// ") > ? AND TOKEN(" + -// CassandraDB.COLUMN_SUB + ", " + -// CassandraDB.COLUMN_PRE + ", " + -// CassandraDB.COLUMN_OBJ + ", " + -// CassandraDB.COLUMN_IS_LITERAL + -// ") <= ? AND " + -// CassandraDB.COLUMN_TRIPLE_TYPE + " = " + "5" + -// " ALLOW FILTERING;"); -// ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); -// job.setInputFormatClass(ColumnFamilyInputFormat.class); -// -// -// //output -// job.setOutputKeyClass(Text.class); -// job.setOutputValueClass(Text.class); -// job.setOutputFormatClass(CqlOutputFormat.class); -// ConfigHelper.setOutputInitialAddress(job.getConfiguration(), CassandraDB.DEFAULT_HOST); -// ConfigHelper.setOutputPartitioner(job.getConfiguration(), Cassandraconf.partitioner); -// -// ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); -// -// //ConfigHelper.setOutputKeyspace(job.getConfiguration(), CassandraDB.KEYSPACE); //*** -// String query = "UPDATE mrjks.resultrows SET " + CassandraDB.COLUMN_INFERRED_STEPS + "= ?"; -// CqlConfigHelper.setOutputCql(job.getConfiguration(), query); -// -// -// job.setMapperClass(ReasonedJustificationsMapper.class); -// job.setMapOutputKeyClass(Text.class); -// job.setMapOutputValueClass(IntWritable.class); -// job.setReducerClass(ReasonedJustificationsReducer.class); - job.waitForCompletion(true); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java index 7e719e1..e2142fc 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java @@ -5,6 +5,8 @@ import java.util.HashMap; import java.util.Map; +import org.apache.cassandra.thrift.Compression; +import org.apache.cassandra.thrift.CqlPreparedResult; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -25,29 +27,27 @@ public class ReasonedJustificationsMapper extends Mapper, List>{ @@ -21,7 +19,7 @@ public void reduce(Text key, Iterable values, Context context) thro for (IntWritable value : values) { //Prepare the insert keys collection - String[] splitkeys = key.toString().split("-"); + String[] splitkeys = key.toString().split("_"); Map keys = new LinkedHashMap(); keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(Long.parseLong(splitkeys[0]))); keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(Long.parseLong(splitkeys[1]))); @@ -36,8 +34,10 @@ public void reduce(Text key, Iterable values, Context context) thro //prepare the insert variables collection List variables = new ArrayList(); - int var = Integer.parseInt(value.toString()); - variables.add(ByteBufferUtil.bytes(var)); + int var_inferredsteps = Integer.parseInt(value.toString()); + variables.add(ByteBufferUtil.bytes(var_inferredsteps)); + int var_transitivelevel = Integer.parseInt(splitkeys[9]); + variables.add(ByteBufferUtil.bytes(var_transitivelevel)); context.write(keys, variables); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java index 5fc2e89..8cad3d8 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java @@ -36,7 +36,7 @@ public class OWLAllSomeValuesMapper extends Mapper values = someValues.get(value.getObject()); Iterator itr = values.iterator(); bKey[0] = 2; bValue[0] = 1; - bValue[17] = 0; // ��������һ��someValues + bValue[17] = 0; // ��������һ��someValues NumberUtils.encodeLong(bKey, 9, value.getSubject()); - NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��wд��value�� + NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��wд��value�� while (itr.hasNext()) { byte[] bytes = itr.next(); System.arraycopy(bytes, 0, bKey, 1, 8); System.arraycopy(bytes, 8, bValue, 1, 8); - context.write(oKey, oValue); //�������((p,x),v) -> ((p,x),(v,w,0)) + context.write(oKey, oValue); //�������((p,x),v) -> ((p,x),(v,w,0)) } } - // ��Ҫ���⴫��һ��v - if (allValues.containsKey(value.getObject())) { //�ҵ���һ��(w,rdf:type,v)��������Ԫ�飬����v����v owl:allValuesFrom u + // ��Ҫ���⴫��һ��v + if (allValues.containsKey(value.getObject())) { //�ҵ���һ��(w,rdf:type,v)��������Ԫ�飬����v����v owl:allValuesFrom u log.info("I met allValuesFrom: " + value); Collection values = allValues.get(value.getObject()); Iterator itr = values.iterator(); bKey[0] = 1; bValue[0] = 1; - bValue[17] = 1; // ��������һ��allValues + bValue[17] = 1; // ��������һ��allValues NumberUtils.encodeLong(bKey, 9, value.getSubject()); - NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��vд��value�� + NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��vд��value�� while (itr.hasNext()) { byte[] bytes = itr.next(); System.arraycopy(bytes, 0, bKey, 1, 8); System.arraycopy(bytes, 8, bValue, 1, 8); - context.write(oKey, oValue); //�������((p,w),u) -> ((p,w),(u,v,1)) + context.write(oKey, oValue); //�������((p,w),u) -> ((p,w),(u,v,1)) } } } else { - // onPropertySome�з��õ������е���onPropertySome�����õ����ԣ������е� v owl:someValuesFrom w��ʽ����Ԫ���е�w - if (onPropertySome.contains(value.getPredicate())) {//ijһ����Ԫ��u p x�е�p��һ��onPropertySome�����õ������� + // onPropertySome�з��õ������е���onPropertySome�����õ����ԣ������е� v owl:someValuesFrom w��ʽ����Ԫ���е�w + if (onPropertySome.contains(value.getPredicate())) {//ijһ����Ԫ��u p x�е�p��һ��onPropertySome�����õ������� //Rule 15 - someValuesFrom log.info("I met onPropertySome: " + value); bKey[0] = 2; @@ -103,19 +103,19 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup NumberUtils.encodeLong(bKey, 1, value.getPredicate()); NumberUtils.encodeLong(bKey, 9, value.getObject()); NumberUtils.encodeLong(bValue, 1, value.getSubject()); - context.write(oKey, oValue); //�����((p,x),(u,,)) value�ĺ�����field������ֵ,û�и�ֵ + context.write(oKey, oValue); //�����((p,x),(u,,)) value�ĺ�����field������ֵ,û�и�ֵ } - // onPropertyAll�з��õ������е���onPropertyAll�����õ����ԣ������е� v owl:allValuesFrom u��ʽ����Ԫ���е�u - if (onPropertyAll.contains(value.getPredicate())) {//ijһ����Ԫ��w p x�е�p��һ��onPropertyAll�����õ������� + // onPropertyAll�з��õ������е���onPropertyAll�����õ����ԣ������е� v owl:allValuesFrom u��ʽ����Ԫ���е�u + if (onPropertyAll.contains(value.getPredicate())) {//ijһ����Ԫ��w p x�е�p��һ��onPropertyAll�����õ������� //Rule 16 - allValuesFrom log.info("I met onPropertyAll: " + value); bKey[0] = 1; - bValue[0] = 0; // Added by WuGang, ԭ������������һ�У����ж����������Ԫ��ʱ�������������reduce�����л���ɴ��� + bValue[0] = 0; // Added by WuGang, ԭ������������һ�У����ж����������Ԫ��ʱ�������������reduce�����л���ɴ��� NumberUtils.encodeLong(bKey, 1, value.getPredicate()); NumberUtils.encodeLong(bKey, 9, value.getSubject()); NumberUtils.encodeLong(bValue, 1, value.getObject()); - context.write(oKey, oValue); //�������((p,w),(x,,)) value�ĺ�����field������ֵ,û�и�ֵ + context.write(oKey, oValue); //�������((p,w),(x,,)) value�ĺ�����field������ֵ,û�и�ֵ } } } @@ -123,7 +123,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup @Override public void setup(Context context) throws IOException { previousDerivation = context.getConfiguration().getInt("reasoner.previousDerivation", -1); - + // List filesProperty = MultiFilesReader.recursiveListStatus(context, "FILTER_ONLY_OWL_ON_PROPERTY"); // Map> allValuesTmp = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_OWL_ALL_VALUES", context); // Map> someValuesTmp = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_OWL_SOME_VALUES", context); @@ -212,7 +212,7 @@ protected void makeJoin(Map> onPropertyTmp, Context contex } if (allValuesTmp.containsKey(sub)) { - // col��Ӧ�������е�subject��Ӧ��object������subject��object����subject, owl:allValuesFrom, object + // col��Ӧ�������е�subject��Ӧ��object������subject��object����subject, owl:allValuesFrom, object Collection col = allValuesTmp.get(sub); if (col != null) { Iterator itr = col.iterator(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java index e8bad41..0161e11 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java @@ -28,8 +28,8 @@ public class OWLAllSomeValuesReducer extends Reducer resources = new LinkedList(); // Added by WuGang - private LinkedList others = new LinkedList(); // ��types����һ�� - private LinkedList s_a_types = new LinkedList(); // ��types����һ��,���ڴ洢��someValues(0)����allValues(1)���� + private LinkedList others = new LinkedList(); // ��types����һ�� + private LinkedList s_a_types = new LinkedList(); // ��types����һ��,���ڴ洢��someValues(0)����allValues(1)���� @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { @@ -39,7 +39,7 @@ public void reduce(BytesWritable key, Iterable values, Context co resources.clear(); byte[] bKey = key.getBytes(); - long rSubject = NumberUtils.decodeLong(bKey, 9); // rSubject����key�ĵڶ���Long����ʼλ��Ϊ9���ͷ����һ��byte�� + long rSubject = NumberUtils.decodeLong(bKey, 9); // rSubject����key�ĵڶ���Long����ʼλ��Ϊ9���ͷ����һ��byte�� long predicate = NumberUtils.decodeLong(bKey, 1); // Added by WuGang 2010-07-14 Iterator itr = values.iterator(); @@ -48,7 +48,7 @@ public void reduce(BytesWritable key, Iterable values, Context co byte[] bValue = value.getBytes(); if (bValue[0] == 1) { //Type triple types.add(NumberUtils.decodeLong(bValue, 1)); - others.add(NumberUtils.decodeLong(bValue, 9)); // Added by WuGang, ��types����Ҫ���⴫��һ��long�ͣ���һ��byte + others.add(NumberUtils.decodeLong(bValue, 9)); // Added by WuGang, ��types����Ҫ���⴫��һ��long�ͣ���һ��byte s_a_types.add(bValue[17]); } else { //Resource triple resources.add(NumberUtils.decodeLong(bValue, 1)); @@ -66,7 +66,7 @@ public void reduce(BytesWritable key, Iterable values, Context co while (itrResource.hasNext()) { long resource = itrResource.next(); triple.setSubject(resource); - // ����Types���͵�value����someValuesΪ����((p,x),(v,w))����allValuesΪ����((p,w),(u,v)) + // ����Types���͵�value����someValuesΪ����((p,x),(v,w))����allValuesΪ����((p,w),(u,v)) Iterator itrTypes = types.listIterator(); Iterator itrOthers = others.listIterator(); Iterator itrSATypes = s_a_types.listIterator(); @@ -74,14 +74,14 @@ public void reduce(BytesWritable key, Iterable values, Context co long type = itrTypes.next(); triple.setObject(type); - // Added by WuGang����triple��ֵ + // Added by WuGang����triple��ֵ long other = itrOthers.next(); byte s_a_type = itrSATypes.next(); - triple.setRsubject(rSubject); // ��someValues������x,��allValues������w + triple.setRsubject(rSubject); // ��someValues������x,��allValues������w // Modified by WuGang 2010-07-14 // triple.setRpredicate(TriplesUtils.RDF_TYPE); //rdf:type triple.setRpredicate(predicate); - triple.setRobject(other); // ��someValues������w,��allValues������v + triple.setRobject(other); // ��someValues������w,��allValues������v switch (s_a_type) { case 0: triple.setType(TriplesUtils.OWL_HORST_15); @@ -95,7 +95,7 @@ public void reduce(BytesWritable key, Iterable values, Context co // System.out.println("Generate an extended triple for OWLAllSomeValues: " + triple); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } } @@ -110,4 +110,12 @@ public void setup(Context context) { triple.setObjectLiteral(false); triple.setPredicate(TriplesUtils.RDF_TYPE); } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); + } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java index ab4cfc0..3323bd6 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java @@ -85,7 +85,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup @Override public void setup(Context context) throws IOException { - + CassandraDB db; try { db = new CassandraDB(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java index c755300..731fb98 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java @@ -33,7 +33,7 @@ public class OWLEquivalenceSCSPReducer extends Reducer> subpropSchemaTriples = null; public static Map> subclassSchemaTriples = null; public static Map> equivalenceClassesSchemaTriples = null; // Added by WuGang @@ -90,7 +90,7 @@ public void reduce(LongWritable key, Iterable values, Context con } } - if (!found) { // ���������ó��Ľ�� + if (!found) { // ��������ó��Ľ�� triple.setObject(resource); triple.setSubject(key.get()); triple.setPredicate(TriplesUtils.RDFS_SUBCLASS); @@ -107,9 +107,8 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRpredicate(TriplesUtils.OWL_EQUIVALENT_CLASS); triple.setRobject(triple.getSubject()); } - // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } @@ -146,12 +145,12 @@ public void reduce(LongWritable key, Iterable values, Context con } // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } //Subproperties - // Modified by WuGang,����ò��Ӧ����superProperties + // Modified by WuGang,����ò��Ӧ����superProperties // itr2 = equivalenceProperties.iterator(); itr2 = superProperties.iterator(); while (itr2.hasNext()) { @@ -180,12 +179,12 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRobject(triple.getObject()); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } //Subclasses - // Modified by WuGang,����ò��Ӧ����superClasses + // Modified by WuGang,����ò��Ӧ����superClasses // itr2 = equivalenceClasses.iterator(); itr2 = superClasses.iterator(); while (itr2.hasNext()) { @@ -213,9 +212,8 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRsubject(triple.getSubject()); triple.setRpredicate(TriplesUtils.RDFS_SUBCLASS); triple.setRobject(triple.getObject()); - // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } } @@ -272,4 +270,11 @@ public void setup(Context context) throws IOException { } } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java index 3cd6514..2ca8a07 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java @@ -43,7 +43,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup } //TODO: check whether also the schema is modified oKey.set(value.getSubject()); - if (value.getPredicate() == TriplesUtils.RDF_TYPE && // ����14b������value����(u rdf:type v)��Ŀ������reduce������(u p w)�����ǻ�Ҫ����14b(v owl:hasValue w) + if (value.getPredicate() == TriplesUtils.RDF_TYPE && // ����14b������value����(u rdf:type v)��Ŀ������reduce������(u p w)�����ǻ�Ҫ���14b(v owl:hasValue w) hasValue.contains(value.getObject()) && onProperty.contains(value.getObject())) { // System.out.println("In OWLHasValueMapper for 14b: " + value); // Added by Wugang @@ -52,7 +52,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup oValue.set(values, 0, 9); context.write(oKey, oValue); - } else if (value.getPredicate() != TriplesUtils.RDF_TYPE // ����14a������value����(u p w)��Ŀ������reduce������(u rdf:type v)�����ǻ�Ҫ����14a(v owl:hasValue w) + } else if (value.getPredicate() != TriplesUtils.RDF_TYPE // ����14a������value����(u p w)��Ŀ������reduce������(u rdf:type v)�����ǻ�Ҫ���14a(v owl:hasValue w) && hasValueInverted.contains(value.getObject()) && onPropertyInverted.contains(value.getPredicate())) { // System.out.println("In OWLHasValueMapper for 14a: " + value); // Added by Wugang @@ -71,7 +71,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup public void setup(Context context) throws IOException { previousStep = context.getConfiguration().getInt("reasoner.previousStep", -1); - + try{ CassandraDB db = new CassandraDB(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java index 8a6a562..c85b693 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java @@ -44,8 +44,10 @@ public class OWLHasValueReducer extends Reducer values, Context context) throws IOException, InterruptedException { Iterator itr = values.iterator(); + System.out.println("step 6"); while (itr.hasNext()) { byte[] v = itr.next().getBytes(); + System.out.println("step6 has values reduce"); if (v.length > 0) { if (v[0] == 0) { //Rule 14b // System.out.println("In OWLHasValueReducer for 14b: "); // Added by Wugang @@ -69,9 +71,8 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRsubject(object); // v triple.setRpredicate(TriplesUtils.OWL_HAS_VALUE); // owl:hasValue triple.setRobject(triple.getObject()); // w -// System.out.println("In OWLHasValueReducer for 14b output: "+triple); // Added by Wugang - - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + System.out.println("In OWLHasValueReducer for 14b output: "+triple); // Added by Wugang + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); // context.write(source, triple); } } @@ -97,11 +98,11 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setType(TriplesUtils.OWL_HORST_14a); triple.setRsubject(triple.getObject()); // v // triple.setRpredicate(TriplesUtils.OWL_HAS_VALUE); // owl:hasValue - triple.setRpredicate(predicate); // p // Modified by WuGang, 2010-08-26,�����Ϣ�������»ָ��� + triple.setRpredicate(predicate); // p // Modified by WuGang, 2010-08-26,�����Ϣ�������»ָ��� triple.setRobject(object); // w // System.out.println("In OWLHasValueReducer for 14a output: "+triple); // Added by Wugang - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); // context.write(source, triple); } } @@ -145,4 +146,11 @@ public void setup(Context context) throws IOException { e.printStackTrace(); } } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java index ef5bce4..d6bf4a6 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java @@ -105,7 +105,7 @@ protected void setup(Context context) throws IOException { previousTransDerivation = context.getConfiguration().getInt("reasoner.previosTransitiveDerivation", -1); previousDerivation = context.getConfiguration().getInt("reasoner.previousDerivation", -1); hasSchemaChanged = false; - + try{ CassandraDB db = new CassandraDB(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java index cc08af6..486af50 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java @@ -37,27 +37,28 @@ public class OWLNotRecursiveReducer extends Reducer set = new HashSet(); protected Map> schemaInverseOfProperties = null; - + protected void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { byte[] bytes = key.getBytes(); long rsubject=0, rpredicate=0, robject=0; long key1=0, key2=0, value1 = 0; - + switch(bytes[0]) { // case 0: // case 1: //Functional and inverse functional property case 0: // Modified by WuGang, Functional case 1: // Modified by WuGang, Inverse Functional // System.out.println("Processing Functional & Inverse Functional Property."); - key1 = NumberUtils.decodeLong(bytes, 1); // ����Functional������subject������Inverse Functional������object + key1 = NumberUtils.decodeLong(bytes, 1); // ����Functional������subject������Inverse Functional������object key2 = NumberUtils.decodeLong(bytes, 9); // predicate long minimum = Long.MAX_VALUE; set.clear(); Iterator itr = values.iterator(); + while (itr.hasNext()) { long value = itr.next().get(); - value1 = value; // Added by Wugang���������ֵ������Functional������ԭʼ��Ԫ���object������Inverse Functional������ԭʼ��Ԫ���subject + value1 = value; // Added by Wugang���������ֵ������Functional������ԭʼ��Ԫ���object������Inverse Functional������ԭʼ��Ԫ���subject if (value < minimum) { if (minimum != Long.MAX_VALUE) set.add(minimum); @@ -97,7 +98,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setObject(object); // System.out.println("Find a derive in functional and inverse functional property!" + triple); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); outputSize++; } context.getCounter("OWL derived triples", "functional and inverse functional property").increment(outputSize); @@ -116,13 +117,13 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setRsubject(subject); triple.setRobject(object); triple.setType(TriplesUtils.OWL_HORST_3); - + itr = values.iterator(); while (itr.hasNext()) { triple.setPredicate(itr.next().get()); triple.setRpredicate(triple.getPredicate()); // Added by WuGang // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); context.getCounter("OWL derived triples", "simmetric property").increment(1); } @@ -144,7 +145,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setRsubject(subject); triple.setRobject(object); triple.setRpredicate(predicate); - + /* I only output the last key of the inverse */ Collection inverse = schemaInverseOfProperties.get(predicate); if (inverse != null) { @@ -154,7 +155,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setPredicate(derivedPredicate); // Only one of the inverse, the others will be completed in outputInverseOf() //triple.setPredicate(itrInverse.next()); // Commented by WuGang 2015-01-27 // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); context.getCounter("OWL derived triples", "inverse of").increment(1); // Moved to here by WuGang, 2015-01-27 @@ -171,7 +172,7 @@ else if (bytes[0] == 1){ //Inverse Functional break; case 4: case 5: - // �ⲿ���Ƿ�����inferTransitivityStatements�д������أ��˴���û���� + // �ⲿ���Ƿ�����inferTransitivityStatements�д�����أ��˴���û���� //Transitive property. I copy to a temporary directory setting a special triple source subject = NumberUtils.decodeLong(bytes, 1); object = NumberUtils.decodeLong(bytes, 9); @@ -191,7 +192,7 @@ else if (bytes[0] == 1){ //Inverse Functional transitiveSource.setDerivation(TripleSource.TRANSITIVE_ENABLED); triple.setPredicate(Math.abs(predicate)); // context.write(transitiveSource, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, transitiveSource, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); context.getCounter("OWL derived triples", "transitive property input").increment(1); } default: @@ -213,7 +214,7 @@ private void outputInverseOf(long subject, long object, long predicate, Set, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java index 85cb0d9..24381fd 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java @@ -1,9 +1,18 @@ package cn.edu.neu.mitt.mrj.reasoner.owl; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.HashSet; +import java.util.List; import java.util.Set; +import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; +import org.apache.cassandra.thrift.InvalidRequestException; +import org.apache.cassandra.thrift.SchemaDisagreementException; +import org.apache.cassandra.thrift.TimedOutException; +import org.apache.cassandra.thrift.UnavailableException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; @@ -17,9 +26,11 @@ import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import ch.qos.logback.classic.db.DBAppender; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.io.files.readers.FilesTriplesReader; import cn.edu.neu.mitt.mrj.partitioners.MyHashPartitioner; @@ -49,7 +60,7 @@ public class OWLReasoner extends Configured implements Tool { public static final String OWL_ALL_VALUE_TMP = "/dir-tmp-all-some-values/"; public static final String OWL_HAS_VALUE_TMP = "/dir-tmp-has-value/"; - private CassandraDB db; + public CassandraDB db; private int numMapTasks = -1; private int numReduceTasks = -1; @@ -101,10 +112,12 @@ public static void main(String[] args) { try { OWLReasoner owlreasoner = new OWLReasoner(); - owlreasoner.db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); - owlreasoner.db.init(); +// owlreasoner.db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); +// owlreasoner.db.init(); ToolRunner.run(new Configuration(), owlreasoner, args); + +// owlreasoner.db.CassandraDBClose(); } catch (Exception e) { e.printStackTrace(); } @@ -124,12 +137,16 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio //Modified 2015/6/28 try { - db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); - db.init(); + db = new CassandraDB(); +// db.init(); // 这不要init() 否则会出现 TTransportException: java.net.SocketException: 断开的管道 + /* + * getRowCountAccordingInferredSteps 类似的函数中出错。 + * 具体原因不确定,可能跟client使用有关。 + */ } catch (Exception e) { e.printStackTrace(); } - + do { if (!firstCycle && lastDerivationStep == (currentStep - 4)) @@ -204,8 +221,10 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter OWLReasoner.class, "OWL reasoner: infer properties inherited statements (not recursive), step " + step, new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), // not supported + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 5); job.getConfiguration().setInt("reasoner.step", step); job.getConfiguration().setInt("reasoner.previosTransitiveDerivation", previousTransitiveDerivation); job.getConfiguration().setInt("reasoner.previousDerivation", previousInferPropertiesDerivation); @@ -215,7 +234,7 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(OWLNotRecursiveReducer.class); - + job.waitForCompletion(true); @@ -249,19 +268,42 @@ private long inferTransitivityStatements(String[] args) int level = 0; //modified 2015/5/19 - long beforeInferCount = db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE); + long beforeInferCount = db.getRowCountAccordingTripleTypeWithLimitation(TriplesUtils.TRANSITIVE_TRIPLE, 1); while ((beforeInferCount > 0) && derivedNewStatements && shouldInferTransitivity) { // System.out.println("��ʼ��inferTransitivityStatements��whileѭ����Ѱ�ҡ�"); level++; + Set levels = new HashSet(); + levels.add(new Integer(level-1)); + if (level > 1) + levels.add(new Integer(level-2)); + //Configure input. Take only the directories that are two levels below - Job job = MapReduceReasonerJobConfig.createNewJob( - OWLReasoner.class, - "OWL reasoner: transitivity rule. Level " + level, - new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), - numMapTasks, - numReduceTasks, true, true); + Job job = null; + + // for the first two level, we use the whole data in the database + if (level <= 2) + job = MapReduceReasonerJobConfig.createNewJob( + OWLReasoner.class, + "OWL reasoner: transitivity rule. Level " + level, + new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), + 0, + numMapTasks, + numReduceTasks, true, true, 6); + // for the level more than two, we only consider the last two level derived data in the current step + if (level > 2) + job = MapReduceReasonerJobConfig.createNewJob( + OWLReasoner.class, + "OWL reasoner: transitivity rule. Level " + level, + new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + levels, + step, + numMapTasks, + numReduceTasks, true, true ,7); + + job.getConfiguration().setInt("reasoning.baseLevel", step); job.getConfiguration().setInt("reasoning.transitivityLevel", level); job.getConfiguration().setInt("maptasks", Math.max(numMapTasks / 10, 1)); @@ -272,15 +314,32 @@ private long inferTransitivityStatements(String[] args) job.setReducerClass(OWLTransitivityReducer.class); job.waitForCompletion(true); - - // About duplication, we will modify the checkTransitivity to return transitive triple counts - // and then do subtraction. - - long afterInferCount = db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE); - derivation = afterInferCount - beforeInferCount; - derivedNewStatements = (derivation > 0); - beforeInferCount = afterInferCount; // Update beforeInferCount - //System.out.println(" loop "); + long stepNotFilteredDerivation = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); + + long stepDerivation = 0; + if (stepNotFilteredDerivation > 0) { + try { + db.createIndexOnInferredSteps(); + } catch (InvalidRequestException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (UnavailableException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (TimedOutException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (SchemaDisagreementException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (TException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + stepDerivation = db.getRowCountAccordingInferredSteps(level); + } + derivation += stepDerivation; + derivedNewStatements = stepDerivation > 0; } previousTransitiveDerivation = step; @@ -296,7 +355,7 @@ private long inferSameAsStatements(String[] args) { try { boolean derivedSynonyms = true; int derivationStep = 1; - long previousStepDerived = 0; // Added by WuGang 2015-01-30 +// long previousStepDerived = 0; // Added by WuGang 2015-01-30 while (derivedSynonyms) { if (db.getRowCountAccordingTripleType(TriplesUtils.DATA_TRIPLE_SAME_AS)==0) // We need not to infer on SameAs @@ -308,23 +367,26 @@ private long inferSameAsStatements(String[] args) { OWLReasoner.class, "OWL reasoner: build the synonyms table from same as triples - step " + derivationStep++, filters, // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), // Added by WuGang, 2015-07-12 + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 8); job.setMapperClass(OWLSameAsMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLSameAsReducer.class); - + job.waitForCompletion(true); // System.out.println("In FilesOWLReasoner: " + job.getCounters().findCounter("synonyms", "replacements").getValue()); Counter cDerivedSynonyms = job.getCounters().findCounter("synonyms","replacements"); long currentStepDerived = cDerivedSynonyms.getValue(); // Added by WuGang 2015-01-30 - derivedTriples += currentStepDerived; - derivedSynonyms = (currentStepDerived - previousStepDerived) > 0; // Modified by WuGang 2015-01-30 + derivedSynonyms = currentStepDerived > 0; // Added by WuGang 2015-07-12 +// derivedTriples += currentStepDerived; +// derivedSynonyms = (currentStepDerived - previousStepDerived) > 0; // Modified by WuGang 2015-01-30 //derivedSynonyms = currentStepDerived > 0; - previousStepDerived = currentStepDerived; // Added by WuGang 2015-01-30 +// previousStepDerived = currentStepDerived; // Added by WuGang 2015-01-30 } //Filter the table. @@ -344,11 +406,17 @@ private long inferSameAsStatements(String[] args) { OWLReasoner.class, "OWL reasoner: sampling more common resources", new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), // Added by WuGang, 2015-07-12 + step, // not used here numMapTasks, - numReduceTasks, true, false); // input from cassandra, but output to hdfs + numReduceTasks, true, false, 9); // input from cassandra, but output to hdfs job.getConfiguration().setInt("reasoner.samplingPercentage", sampling); //Sampling at 10% job.getConfiguration().setInt("reasoner.threshold", resourceThreshold); //Threshold resources + /* + * output to hdfs + */ + job.setMapperClass(OWLSampleResourcesMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(LongWritable.class); @@ -361,7 +429,7 @@ private long inferSameAsStatements(String[] args) { SequenceFileOutputFormat.setOutputPath(job, commonResourcesPath); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); - + job.waitForCompletion(true); @@ -398,8 +466,10 @@ private long inferSameAsStatements(String[] args) { OWLReasoner.class, "OWL reasoner: replace triples using the sameAs synonyms: reconstruct triples", new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), // Added by WuGang, 2015-07-12 + step, // not used here numMapTasks, - numReduceTasks, false, true); // input from hdfs, but output to cassandra + numReduceTasks, false, true, 10); // input from hdfs, but output to cassandra SequenceFileInputFormat.addInputPath(job, tmpPath); job.setInputFormatClass(SequenceFileInputFormat.class); @@ -408,6 +478,7 @@ private long inferSameAsStatements(String[] args) { job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLSameAsReconstructReducer.class); + job.waitForCompletion(true); FileSystem fs = FileSystem.get(job.getConfiguration()); @@ -448,8 +519,10 @@ private long inferEquivalenceStatements(String[] args) throws IOException, Inter OWLReasoner.class, "OWL reasoner: infer equivalence from subclass and subprop. step " + step, filters, + new HashSet(), // Added by WuGang, 20150712 + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 11); job.getConfiguration().setInt("maptasks", Math.max(job.getConfiguration().getInt("maptasks", 0) / 10, 1)); job.getConfiguration().setInt("reasoner.step", step); @@ -457,7 +530,7 @@ private long inferEquivalenceStatements(String[] args) throws IOException, Inter job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLEquivalenceSCSPReducer.class); - + job.waitForCompletion(true); return job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); } @@ -469,6 +542,7 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, boolean derivedNewStatements = true; long totalDerivation = 0; int previousSomeAllValuesDerivation = -1; + boolean firstCycle = true; // Added by Wugang 20150111 //long countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15); // see OWLAllSomeValuesReducer @@ -476,12 +550,15 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, while (derivedNewStatements) { step++; + Job job = MapReduceReasonerJobConfig.createNewJob( OWLReasoner.class, "OWL reasoner: some and all values rule. step " + step, new HashSet(), + new HashSet(), + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 12); job.getConfiguration().setInt("reasoner.step", step); job.getConfiguration().setInt("reasoner.previousDerivation", previousSomeAllValuesDerivation); previousSomeAllValuesDerivation = step; @@ -490,7 +567,7 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLAllSomeValuesReducer.class); - + job.waitForCompletion(true); // Added by Wugang 20150111 @@ -498,7 +575,34 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, // countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer // totalDerivation = countRule15 + countRule16; - derivedNewStatements = (totalDerivation > 0); + + Counter derivedTriples = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS"); + long notFilteredDerivation = derivedTriples.getValue(); + long stepDerivation = 0; + if (firstCycle) + notFilteredDerivation -= previousSomeAllValuesCycleDerivation; + if (notFilteredDerivation > 0) { + previousSomeAllValuesCycleDerivation += notFilteredDerivation; + //Modified by LiYang 2015/9/21 +// try { +// db.createIndexOnInferredSteps(); +// } catch (TException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } + try { + db.createIndexOnInferredSteps(); + } catch (TException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + stepDerivation = db.getRowCountAccordingInferredSteps(step - 1); + totalDerivation += stepDerivation; + derivedNewStatements = stepDerivation > 0; + } else { + derivedNewStatements = false; + } + firstCycle = false; } // Added by Wugang 20150111 @@ -524,8 +628,10 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup OWLReasoner.class, "OWL reasoner: hasValue rule. step " + step, new HashSet(), + new HashSet(), + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 13); long schemaOnPropertySize = db.getRowCountAccordingTripleType(TriplesUtils.SCHEMA_TRIPLE_ON_PROPERTY); if (schemaOnPropertySize == 0) @@ -547,7 +653,26 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup // countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a) - countRule14a; // see OWLAllSomeValuesReducer // countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b) - countRule14b; // see OWLAllSomeValuesReducer // return(countRule14a + countRule14b); - return 0; + try { + db.createIndexOnInferredSteps(); + } catch (InvalidRequestException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (UnavailableException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (TimedOutException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (SchemaDisagreementException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (TException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + long stepDerivation = db.getRowCountAccordingInferredSteps(step - 1); + return stepDerivation; } else { return 0; } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java index 5b02e6f..0462b42 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java @@ -17,9 +17,9 @@ import cn.edu.neu.mitt.mrj.utils.FileUtils; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; - import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; public class OWLSameAsDeconstructMapper extends Mapper { @@ -82,8 +82,8 @@ public void map(TripleSource key, Triple value, Context context) throws IOExcept context.write(oKey, oValue); - //�����oKey��һ����Դ����ν�����п��ܣ�����sameas���͵Ļ��϶���һ����������Ҫ�ֱ���Ϊ��ν�����Σ� - //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte + //�����oKey��һ����Դ����ν�����п��ܣ�����sameas���͵Ļ��϶���һ����������Ҫ�ֱ���Ϊ��ν�����Σ� + //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte ++tripleId; } @@ -92,7 +92,7 @@ public void map(TripleSource key, Triple value, Context context) throws IOExcept @Override public void setup(Context context) { oValue = new BytesWritable(bValue); - + try { String taskId = context.getConfiguration().get("mapred.task.id").substring(context.getConfiguration().get("mapred.task.id").indexOf("_m_") + 3); taskId = taskId.replaceAll("_", ""); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java index fa3135e..8d1a1a5 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java @@ -10,6 +10,7 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.utils.NumberUtils; public class OWLSameAsDeconstructReducer extends Reducer { @@ -17,7 +18,7 @@ public class OWLSameAsDeconstructReducer extends Reducer storage = new LinkedList(); @@ -38,9 +39,9 @@ public void reduce(LongWritable key, Iterable values, Context con byte[] bValue = iValue.getBytes(); // System.out.println("In processing things before storage, size of iValue is: " + iValue.getLength()); // System.out.println("In processing things before storage, size of bValue is: " + bValue.length); - // ���ڲ�һ���ں�ʱ��������bValue[0]=4���Ǹ�key��value�� - // ���������֮ǰ�Ƚ����滻��resource������storage�ﱣ����������һ��������֮��Ϳ���ֱ�ӽ����滻�ˣ���value�滻 - // ���(�����whileѭ��֮��)�ٰ�storage���汣���ֵ�����滻һ�¡� + // ���ڲ�һ���ں�ʱ��������bValue[0]=4���Ǹ�key��value�� + // ���������֮ǰ�Ƚ����滻��resource������storage�ﱣ����������һ��������֮��Ϳ���ֱ�ӽ����滻�ˣ���value�滻 + // ���(�����whileѭ��֮��)�ٰ�storage���汣���ֵ�����滻һ�¡� if (bValue[0] == 4) {//Same as long resource = NumberUtils.decodeLong(bValue, 1); replacement = true; @@ -54,14 +55,14 @@ public void reduce(LongWritable key, Iterable values, Context con byte[] bTempValue = new byte[15+8]; // Added by WuGang System.arraycopy(bValue, 0, bTempValue, 0, 15); // Added by WuGang System.arraycopy(bOriginalResource, 0, bTempValue, 15, 8); // Added by WuGang - iValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource + iValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource context.write(oKey, iValue); countOutput++; context.getCounter("reasoner", "substitutions").increment(1); } } - Iterator itr2 = storage.iterator(); //���storageΪ����˵���������������û�п�����sameas���滻�Ķ��� + Iterator itr2 = storage.iterator(); //���storageΪ����˵���������������û�п�����sameas���滻�Ķ��� while (itr2.hasNext()) { byte[] bValue = itr2.next(); oValue.set(bValue, 0, bValue.length); @@ -70,15 +71,19 @@ public void reduce(LongWritable key, Iterable values, Context con // System.out.println("In processing things in storage, size of bValue is: " + bValue.length); System.arraycopy(bValue, 0, bTempValue, 0, 15); // Added by WuGang System.arraycopy(bOriginalResource, 0, bTempValue, 15, 8); // Added by WuGang - oValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource + oValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource context.write(oKey, oValue); } - //�����oKey��һ����Դ������sameas����������Ķ����滻���ˣ���ʵ�����Ѿ�����sameas�滻����ˣ� - //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte + //�����oKey��һ����Դ������sameas����������Ķ����滻���ˣ���ʵ�����Ѿ�����sameas�滻����ˣ� + //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte if (replacement) { //Increment counter of replacements context.getCounter("reasoner", "substitutions").increment(countOutput + storage.size()); } } + public void setup(Context context) throws IOException, InterruptedException{ + CassandraDB.setConfigLocation(); + + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java index a526c85..ed4b73f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java @@ -29,7 +29,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup /* Source triple: s owl:sameAs o */ long olKey = 0; long olValue = 0; - if (value.getSubject() > value.getObject()) { //key�����Ǵ�ֵ��value����Сֵ + if (value.getSubject() > value.getObject()) { //key�����Ǵ�ֵ��value����Сֵ olKey = value.getSubject(); olValue = value.getObject(); } else { @@ -37,18 +37,21 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup olValue = value.getSubject(); } - // ����С�Ǹ�ֵ��ʶÿһ���� + // ����С�Ǹ�ֵ��ʶÿһ���� oKey.set(olKey); bValue[0] = 0; NumberUtils.encodeLong(bValue, 1, olValue); oValue.set(bValue, 0, bValue.length); - context.write(oKey, oValue); //����key�Ǵ�ֵ��value��Сֵ����������Ե�֪ÿһ��resource�������ĸ��� + context.write(oKey, oValue); //����key�Ǵ�ֵ��value��Сֵ����������Ե�֪ÿһ��resource�������ĸ��� oKey.set(olValue); bValue[0] = 1; NumberUtils.encodeLong(bValue, 1, olKey); oValue.set(bValue, 0, bValue.length); - context.write(oKey, oValue); //����key��Сֵ��value�Ǵ�ֵ����������Ե�֪ÿһ�����а�����Щresource + context.write(oKey, oValue); //����key��Сֵ��value�Ǵ�ֵ����������Ե�֪ÿһ�����а�����Щresource + } + public void setup(Context context) throws IOException{ + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java index 827e360..887503b 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java @@ -8,6 +8,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.utils.NumberUtils; public class OWLSameAsReconstructMapper extends Mapper { @@ -17,25 +18,26 @@ public class OWLSameAsReconstructMapper extends Mapper values, Context context) throws IOException, InterruptedException { // System.out.println("In OWLSameAsReconstructReducer!!!"); @@ -28,31 +28,31 @@ public void reduce(BytesWritable key, Iterable values, Context co oKey.setDerivation(bKey[12]); int elements = 0; - Iterator itr = values.iterator(); ////�����1�ֽڿ�ʼ����owlsameas�������0�ֽڱ�������ڱ������滻��λ�ã����ܵ�ֵ��0,1,2,3,4��4�DZ�ʾν����owl:sameas������Ϊ��owl:sameas������0��ʾ���1��ʾν�2��3��ʾ��� + Iterator itr = values.iterator(); ////�����1�ֽڿ�ʼ����owlsameas�������0�ֽڱ�������ڱ������滻��λ�ã����ܵ�ֵ��0,1,2,3,4��4�DZ�ʾν����owl:sameas������Ϊ��owl:sameas������0��ʾ���1��ʾν�2��3��ʾ��� while (itr.hasNext()) { elements++; byte[] bValue = itr.next().getBytes(); - long resource = NumberUtils.decodeLong(bValue, 1); //�������owlsameas��������潫���������滻�� - long originalResource = NumberUtils.decodeLong(bValue, 9); // Added by Wugang, �����滻ǰ��resource + long resource = NumberUtils.decodeLong(bValue, 1); //�������owlsameas��������潫���������滻�� + long originalResource = NumberUtils.decodeLong(bValue, 9); // Added by Wugang, �����滻ǰ��resource switch (bValue[0]) { case 0: - oValue.setSubject(resource); //�滻���� - oValue.setRsubject(originalResource); // Added by Wugang, ԭʼ���� + oValue.setSubject(resource); //�滻���� + oValue.setRsubject(originalResource); // Added by Wugang, ԭʼ���� // System.out.println("Replacing subject: " + resource); break; case 1: - oValue.setPredicate(resource); //�滻ν�� - oValue.setRpredicate(originalResource); // Added by Wugang, ԭʼν�� + oValue.setPredicate(resource); //�滻ν�� + oValue.setRpredicate(originalResource); // Added by Wugang, ԭʼν�� // System.out.println("Replacing predicate: " + resource); break; - case 2: //�滻���� - case 3: //�滻���� + case 2: //�滻���� + case 3: //�滻���� if (bValue[0] == 2) oValue.setObjectLiteral(false); else oValue.setObjectLiteral(true); oValue.setObject(resource); - oValue.setRobject(originalResource); // Added by Wugang, ԭʼ���� + oValue.setRobject(originalResource); // Added by Wugang, ԭʼ���� // System.out.println("Replacing object: " + resource); break; default: @@ -61,24 +61,24 @@ public void reduce(BytesWritable key, Iterable values, Context co } if (elements == 3){ - // Added by WuGang, ���rule11 + // Added by WuGang, ���rule11 // oValue.setRsubject(rsubject) if ((oValue.getSubject() == oValue.getRsubject()) && (oValue.getPredicate() == oValue.getRpredicate()) && (oValue.getObject() == oValue.getRobject())) - oValue.setType(TriplesUtils.OWL_HORST_NA); // �滻ǰ��û�б仯������һ��sameasrule + oValue.setType(TriplesUtils.OWL_HORST_NA); // �滻ǰ��û�б仯������һ��sameasrule else { if ((oValue.getPredicate() == TriplesUtils.OWL_SAME_AS) && (oValue.getRpredicate() == TriplesUtils.OWL_SAME_AS)) - oValue.setType(TriplesUtils.OWL_HORST_7); // Ӧ��OWL Horst����7 + oValue.setType(TriplesUtils.OWL_HORST_7); // Ӧ��OWL Horst����7 else - oValue.setType(TriplesUtils.OWL_HORST_11); // Ӧ��OWL - // Horst����11 + oValue.setType(TriplesUtils.OWL_HORST_11); // Ӧ��OWL + // Horst����11 } // System.out.println("Find a complete replacment of triple: " + oValue); - CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context); + CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context); // context.write(oKey, oValue); } } @@ -86,5 +86,13 @@ public void reduce(BytesWritable key, Iterable values, Context co @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + + } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java index a7988da..83fbdf7 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java @@ -25,7 +25,7 @@ public class OWLSameAsReducer extends Reducer duplicates = new HashSet(); private List storage = new LinkedList(); - + @Override public void reduce(LongWritable key, Iterable values, Context context) throws IOException, InterruptedException { @@ -44,12 +44,12 @@ public void reduce(LongWritable key, Iterable values, Context con BytesWritable value = itr.next(); long lValue = NumberUtils.decodeLong(value.getBytes(), 1); // System.out.println("processing " + lValue + " with the first byte is: " + value.getBytes()[0]); - if (value.getBytes()[0] != 0) { // 1��ÿһ��value����һ����Ա + if (value.getBytes()[0] != 0) { // 1��ÿһ��value����һ����Ա //Store in-memory storage.add(lValue); // System.out.println("Storage size is: " + storage.size()); //} - } else { // 0���ϲ�һ��resource�����ĸ����飨valueֵ�� + } else { // 0���ϲ�һ��resource�����ĸ����飨valueֵ�� // System.out.println("Prepare to repalce: lValue is " + lValue + " and oValue.getSubject() is " + oValue.getSubject()); if (lValue < oValue.getSubject()) { // System.out.println("Hahahahah, I'm here!"); @@ -65,7 +65,7 @@ public void reduce(LongWritable key, Iterable values, Context con long lValue = itr2.next(); if (!duplicates.contains(lValue)) { oValue.setObject(lValue); - CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context); + CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context); duplicates.add(lValue); } } @@ -91,4 +91,11 @@ public void setup(Context context) { oKey.setDerivation(TripleSource.OWL_DERIVED); oKey.setStep(context.getConfiguration().getInt("reasoner.step", 0)); } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java index ace1796..2c8aa57 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java @@ -45,6 +45,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup } public void setup(Context context) { + threshold = context.getConfiguration().getInt("reasoner.samplingPercentage", 0); } } \ No newline at end of file diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java index d2c658e..50dfe04 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java @@ -6,6 +6,8 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; + public class OWLSampleResourcesReducer extends Reducer { //private static Logger log = LoggerFactory.getLogger(OWLSampleResourcesReducer.class); @@ -34,6 +36,8 @@ public void reduce(LongWritable key, Iterable values, Context cont @Override public void setup(Context context) { + CassandraDB.setConfigLocation(); + threshold = context.getConfiguration().getInt("reasoner.threshold", 0); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java index b2b04bd..09232eb 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java @@ -28,7 +28,9 @@ public class OWLTransitivityMapper extends Mapper minLevel) { + if (level > minLevel) { NumberUtils.encodeLong(keys,0,value.getPredicate()); NumberUtils.encodeLong(keys,8,value.getSubject()); oKey.set(keys, 0, 16); @@ -63,19 +65,20 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup values[0] = 3; else values[0] = 2; - NumberUtils.encodeLong(values, 1, step); + NumberUtils.encodeLong(values, 1, level); NumberUtils.encodeLong(values, 9, value.getObject()); oValue.set(values, 0, 17); context.write(oKey, oValue); } - //����u p w, w p v���������key��(p, w),�����value��(value[0], key.getStep(), value.getObject) + //����u p w, w p v���������key��(p, w),�����value��(value[0], key.getStep(), value.getObject) } } @Override public void setup(Context context) { + level = context.getConfiguration().getInt("reasoning.transitivityLevel", 0); baseLevel = context.getConfiguration().getInt("reasoning.baseLevel", 0) - 1; minLevel = Math.max(1, (int)Math.pow(2,level - 2)) + baseLevel; diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java index 7ad71eb..beb7b8d 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java @@ -69,9 +69,9 @@ public void reduce(BytesWritable key, Iterable values, Context co triple.setPredicate(NumberUtils.decodeLong(key.getBytes(),0)); - // Added by WuGang,���extended triple������Ϊu p w,����w��һ���ؼ���resource�������ع�ԭʼ��ruleǰ�� + // Added by WuGang,���extended triple������Ϊu p w,����w��һ���ؼ��resource�������ع�ԭʼ��ruleǰ�� triple.setType(TriplesUtils.OWL_HORST_4); -// triple.setRsubject(rsubject); // �������������������ģ���μ�����Ĵ��� +// triple.setRsubject(rsubject); // �����������������ģ���μ�����Ĵ��� triple.setRpredicate(NumberUtils.decodeLong(key.getBytes(),0)); triple.setRobject(NumberUtils.decodeLong(key.getBytes(), 8)); @@ -87,13 +87,15 @@ public void reduce(BytesWritable key, Iterable values, Context co triple.setSubject(entry.getKey()); triple.setObject(entry2.getKey()); - // Added by Wugang, ���extended triple��ʵ�������rsubject�費���ö�����ν����Ϊ�˱��������������ð� - triple.setRsubject(triple.getSubject()); // ��Ϊ��ѡȡu p w��Ϊ���triple��������е�u�������������ĺ�������� + // Added by Wugang, ���extended triple��ʵ�������rsubject�費���ö�����ν����Ϊ�˱������������ð� + triple.setRsubject(triple.getSubject()); // ��Ϊ��ѡȡu p w��Ϊ���triple��������е�u������������ĺ�������� - source.setStep((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel)); + // Modified by WuGang, 2015-07-15 + //source.setStep((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel)); + source.setTransitiveLevel((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel)); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); // System.out.println("In OWLTransitivityReducer: " + triple); } @@ -104,12 +106,19 @@ public void reduce(BytesWritable key, Iterable values, Context co @Override public void setup(Context context) { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - baseLevel = context.getConfiguration().getInt("reasoning.baseLevel", 1) - 1; level = context.getConfiguration().getInt("reasoning.transitivityLevel", -1); // Modified by WuGang 2015-01-28 //source.setDerivation(TripleSource.OWL_DERIVED); + source.setStep(baseLevel + 1); // Added by WuGang, 2015-07-15 source.setDerivation(TripleSource.TRANSITIVE_ENABLED); triple.setObjectLiteral(false); } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java index b112445..d709301 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java @@ -2,9 +2,20 @@ import java.io.IOException; +import java.nio.ByteBuffer; import java.util.HashSet; +import java.util.List; import java.util.Set; +import org.apache.cassandra.hadoop.ColumnFamilyOutputFormat; +import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; +import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; +import org.apache.cassandra.thrift.InvalidRequestException; +import org.apache.cassandra.thrift.SchemaDisagreementException; +import org.apache.cassandra.thrift.TimedOutException; +import org.apache.cassandra.thrift.UnavailableException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.BytesWritable; @@ -13,11 +24,14 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.reasoner.MapReduceReasonerJobConfig; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; +//import org.apache.hadoop.mapred.lib.MultipleOutputs; public class RDFSReasoner extends Configured implements Tool { @@ -26,7 +40,7 @@ public class RDFSReasoner extends Configured implements Tool { private int numReduceTasks = -1; public static int step = 0; private int lastExecutionPropInheritance = -1; - private int lastExecutionDomRange = -1; + private int lastExecutionDomRange = -1; private void parseArgs(String[] args) { @@ -69,22 +83,25 @@ public static void main(String[] args) { // The derivation will be launched in run() - public long launchDerivation(String[] args) throws IOException, InterruptedException, ClassNotFoundException { + public long launchDerivation(String[] args) throws IOException, InterruptedException, ClassNotFoundException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { long time = System.currentTimeMillis(); - parseArgs(args); Job job = null; long derivation = 0; - + + // RDFS subproperty inheritance reasoning // job = createNewJob("RDFS subproperty inheritance reasoning", "FILTER_ONLY_HIDDEN"); job = MapReduceReasonerJobConfig.createNewJob( RDFSReasoner.class, "RDFS subproperty inheritance reasoning", new HashSet(), + new HashSet(), // Added by WuGang, 2015-07-13 + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 1); + job.setMapperClass(RDFSSubPropInheritMapper.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); @@ -93,10 +110,12 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep job.getConfiguration().setInt("lastExecution.step", lastExecutionPropInheritance); lastExecutionPropInheritance = step; //TODO: configureOutputJob(job, args[0], "dir-rdfs-derivation/dir-subprop-inherit"); + job.waitForCompletion(true); long propInheritanceDerivation = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); derivation += propInheritanceDerivation; + // RDFS subproperty domain and range reasoning // job = createNewJob("RDFS subproperty domain and range reasoning", "FILTER_ONLY_HIDDEN"); @@ -104,14 +123,17 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep RDFSReasoner.class, "RDFS subproperty domain and range reasoning", new HashSet(), + new HashSet(), // Added by WuGang, 2015-07-13 + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 2); job.setMapperClass(RDFSSubPropDomRangeMapper.class); job.setMapOutputKeyClass(BytesWritable.class); // Modified by WuGang, 2010-08-26 job.setMapOutputValueClass(LongWritable.class); //job.setPartitionerClass(MyHashPartitioner.class); // Is this ok? seems not necessary job.setReducerClass(RDFSSubpropDomRangeReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); + job.getConfiguration().setInt("lastExecution.step", lastExecutionDomRange); lastExecutionDomRange = step; //TODO: configureOutputJob(job, args[0], "dir-rdfs-derivation/dir-subprop-domain-range"); @@ -122,7 +144,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep // RDFS cleaning up subprop duplicates // We remove it for simplicity. That means we will not support stop and restart from breakpoints - + //RDFS subclass reasoning // job = createNewJob("RDFS subclass reasoning", "FILTER_ONLY_TYPE_SUBCLASS"); @@ -132,13 +154,16 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep RDFSReasoner.class, "RDFS subclass reasoning", filters, + new HashSet(), // Added by WuGang, 2015-07-13 + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 3); job.setMapperClass(RDFSSubclasMapper.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(RDFSSubclasReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); + // configureOutputJob(job, args[0], "dir-rdfs-output/dir-subclass-" + step); job.waitForCompletion(true); derivation += job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); @@ -163,14 +188,16 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep RDFSReasoner.class, "RDFS special properties reasoning", filters, + new HashSet(), // Added by WuGang, 2015-07-13 + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 4); job.setMapperClass(RDFSSpecialPropsMapper.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(RDFSSpecialPropsReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); - + // configureOutputJob(job, args[0], "dir-rdfs-output/dir-special-props-" + step); job.waitForCompletion(true); derivation += job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java index f915446..9a8e1b4 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java @@ -87,8 +87,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup } @Override - public void setup(Context context) throws IOException { - + public void setup(Context context) throws IOException { try{ CassandraDB db = new CassandraDB(); if (memberProperties == null) { @@ -127,5 +126,16 @@ public void setup(Context context) throws IOException { } catch (TException e) { e.printStackTrace(); } + } + +// protected void cleanup(Context context) throws IOException, InterruptedException{ +// try { +// CassandraDB db = new CassandraDB(); +// db.UnIndex(); +// db.CassandraDBClose(); +// } catch (Exception e) { +// // TODO: handle exception +// } +// } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java index fc5ea85..34913b0 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java @@ -2,10 +2,13 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import org.apache.cassandra.thrift.Cassandra; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; @@ -20,11 +23,19 @@ public class RDFSSpecialPropsReducer extends Reducer keys = new LinkedHashMap(); + private Map allkeys = new LinkedHashMap(); + private List allvariables = new ArrayList(); + private List allTValues = new ArrayList(); + private List stepsValues = new ArrayList(); @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { byte[] bKey = key.getBytes(); Iterator itr = values.iterator(); + + + while (itr.hasNext()) { long value = itr.next().get(); if (value == TriplesUtils.RDFS_LITERAL && (bKey[0] == 0 || bKey[0] == 2)) @@ -54,9 +65,9 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); context.getCounter("RDFS derived triples", "subproperty of member").increment(1); + + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); break; case 2: // Rule 13 oTriple.setSubject(NumberUtils.decodeLong(bKey, 1)); @@ -69,8 +80,8 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_DATATYPE); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subclass of literal").increment(1); break; case 3: // Rule 8 @@ -85,11 +96,11 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CLASS); context.getCounter("RDFS derived triples", "subclass of resource").increment(1); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); //context.write(source, oTriple); break; - case 4: // û�ж�Ӧ��rdfs rule�� - case 5: // û�ж�Ӧ��rdfs rule�� + case 4: // û�ж�Ӧ��rdfs rule�� + case 5: // û�ж�Ӧ��rdfs rule�� oTriple.setSubject(NumberUtils.decodeLong(bKey, 1)); oTriple.setPredicate(TriplesUtils.RDFS_MEMBER); // oTriple.setPredicate(NumberUtils.decodeLong(bKey, 9)); @@ -99,18 +110,27 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) else oTriple.setObjectLiteral(true); context.getCounter("RDFS derived triples", "subproperty inheritance of member").increment(1); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); // context.write(source, oTriple); default: break; } + } @Override public void setup(Context context) { - CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); + + } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java index 9821e66..7ca4151 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java @@ -4,6 +4,7 @@ import java.util.HashSet; import java.util.Set; +import org.apache.cassandra.thrift.Cassandra.AsyncProcessor.system_add_column_family; import org.apache.cassandra.thrift.InvalidRequestException; import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TimedOutException; @@ -11,6 +12,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Mapper.Context; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -60,15 +62,14 @@ public void map(Long key, Row row, Context context) throws IOException, Interru return; Triple value = CassandraDB.readJustificationFromMapReduceRow(row); - //Check if the predicate has a domain if (domainSchemaTriples.contains(value.getPredicate())) { NumberUtils.encodeLong(bKey,0,value.getSubject()); // Added by WuGang, 2010-08-26 NumberUtils.encodeLong(bKey,8,value.getObject()); // Added by WuGang, 2010-08-26 // oKey.set(value.getSubject()); oKey.set(bKey, 0, 16); // Modified by WuGang, 2010-08-26 - oValue.set(value.getPredicate() << 1); // ����ͨ��oValue�����һλ��0��ȷ������ǰ��������domain - context.write(oKey, oValue); // ��<, p>����ȥ, for rule 2 + oValue.set(value.getPredicate() << 1); // ����ͨ��oValue�����һλ��0��ȷ������ǰ�������domain + context.write(oKey, oValue); // ��<, p>����ȥ, for rule 2 } //Check if the predicate has a range @@ -78,8 +79,8 @@ public void map(Long key, Row row, Context context) throws IOException, Interru NumberUtils.encodeLong(bKey,8,value.getSubject()); // Added by WuGang, 2010-08-26 // oKey.set(value.getObject()); oKey.set(bKey, 0, 16); // Modified by WuGang, 2010-08-26 - oValue.set((value.getPredicate() << 1) | 1); // ����ͨ��oValue�����һλ��1��ȷ������ǰ��������range - context.write(oKey, oValue); // ��<, p>����ȥ, for rule 3 + oValue.set((value.getPredicate() << 1) | 1); // ����ͨ��oValue�����һλ��1��ȷ������ǰ�������range + context.write(oKey, oValue); // ��<, p>����ȥ, for rule 3 } } @@ -88,23 +89,25 @@ public void map(Long key, Row row, Context context) throws IOException, Interru protected void setup(Context context) throws IOException { hasSchemaChanged = false; previousExecutionStep = context.getConfiguration().getInt("lastExecution.step", -1); - - try{ + + try{ CassandraDB db = new CassandraDB(); + if (domainSchemaTriples == null) { domainSchemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_DOMAIN_PROPERTY); hasSchemaChanged = db.loadSetIntoMemory(domainSchemaTriples, filters, previousExecutionStep); + // db not close } if (rangeSchemaTriples == null) { rangeSchemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_RANGE_PROPERTY); - + hasSchemaChanged |= db.loadSetIntoMemory(rangeSchemaTriples, filters, previousExecutionStep); - db.CassandraDBClose(); + db.CassandraDBClose(); } }catch(TTransportException tte){ tte.printStackTrace(); @@ -121,13 +124,15 @@ protected void setup(Context context) throws IOException { } // Some debug codes -// System.out.println("In mapper setup, peviousExecutionStep= " + previousExecutionStep + " and hasSchemaChanged status: " + hasSchemaChanged); -// System.out.println("Input split: " + context.getInputSplit()); -// try { -// System.out.println("Input split length: " + context.getInputSplit().getLength()); -// } catch (InterruptedException e) { -// e.printStackTrace(); -// } + System.out.println("In mapper setup, peviousExecutionStep= " + previousExecutionStep + " and hasSchemaChanged status: " + hasSchemaChanged); + System.out.println("Input split: " + context.getInputSplit()); + try { + System.out.println("Input split length: " + context.getInputSplit().getLength()); + } catch (InterruptedException e) { + e.printStackTrace(); + } } + + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java index 8347faf..04f66fe 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java @@ -7,6 +7,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Mapper.Context; import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,15 +58,16 @@ protected void map(Long key, Row row, Context context) throws IOException, Inter oKey.set(bKey, 0, 17); oValue.set(pre); context.write(oKey, oValue); +// System.out.println(" i " + i); } - + //Check suprop transitivity if (pre == TriplesUtils.RDFS_SUBPROPERTY && subpropSchemaTriples.contains(obj)) { //Write the 05 + subject bKey[0] = 5; NumberUtils.encodeLong(bKey, 1, sub); oKey.set(bKey, 0, 9); - oValue.set(obj); + oValue.set(obj); context.write(oKey, oValue); } } @@ -84,7 +86,9 @@ protected void setup(Context context) throws IOException { hasSchemaChanged = db.loadSetIntoMemory(subpropSchemaTriples, filters, previousExecutionStep); // hasSchemaChanged = FilesTriplesReader.loadSetIntoMemory(subpropSchemaTriples, context, // "FILTER_ONLY_SUBPROP_SCHEMA", previousExecutionStep); - +// System.out.println("AAA"); +// db.createIndexOnInferredSteps(); +// System.out.println("create on inferredsteps"); db.CassandraDBClose(); } catch (TException e) { e.printStackTrace(); @@ -92,5 +96,8 @@ protected void setup(Context context) throws IOException { } else { log.debug("Subprop schema triples already loaded in memory"); } + + } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java index 67ffb1f..56bd6cb 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java @@ -5,6 +5,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Mapper.Context; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,6 +37,20 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup oKey.set(bKey, 0, 9); context.write(oKey, oValue); -// System.out.println("׼����RDFSSubclasMapper-"+value); +// System.out.println("׼����RDFSSubclasMapper-"+value); } + + protected void setup(Context context) throws IOException, InterruptedException{ + + } +// protected void cleanup(Context context) throws IOException, InterruptedException{ +// try { +// CassandraDB db = new CassandraDB(); +// db.UnIndex(); +// db.CassandraDBClose(); +// } catch (Exception e) { +// // TODO: handle exception +// } +// } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java index 64f43f2..69332a7 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java @@ -2,9 +2,11 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -27,10 +29,13 @@ import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; -public class RDFSSubclasReducer extends Reducer, List> { - - protected static Logger log = LoggerFactory.getLogger(RDFSSubclasReducer.class); - +public class RDFSSubclasReducer + extends + Reducer, List> { + + protected static Logger log = LoggerFactory + .getLogger(RDFSSubclasReducer.class); + public static Map> subclassSchemaTriples = null; protected Set subclasURIs = new HashSet(); protected Set existingURIs = new HashSet(); @@ -38,7 +43,11 @@ public class RDFSSubclasReducer extends Reducer specialSuperclasses = new HashSet(); private TripleSource source = new TripleSource(); private Triple oTriple = new Triple(); - + private Map keys = new LinkedHashMap(); + private Map allkeys = new LinkedHashMap(); + private List allvariables = new ArrayList(); + private List allTValues = new ArrayList(); + private List stepsValues = new ArrayList(); private void recursiveScanSuperclasses(long value, Set set) { Collection subclassValues = subclassSchemaTriples.get(value); if (subclassValues != null) { @@ -54,65 +63,69 @@ private void recursiveScanSuperclasses(long value, Set set) { } @Override - public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { -// System.out.println("����RDFSSubclasReducer��-"); + public void reduce(BytesWritable key, Iterable values, + Context context) throws IOException, InterruptedException { + // System.out.println("����RDFSSubclasReducer��-"); existingURIs.clear(); Iterator itr = values.iterator(); while (itr.hasNext()) { long value = itr.next().get(); - existingURIs.add(value); //���еı��� + existingURIs.add(value); // ���еı��� } - + Iterator oTypes = existingURIs.iterator(); subclasURIs.clear(); while (oTypes.hasNext()) { long existingURI = oTypes.next(); - recursiveScanSuperclasses(existingURI, subclasURIs); //subclasURIs�����е�subclass + recursiveScanSuperclasses(existingURI, subclasURIs); // subclasURIs�����е�subclass } - + subclasURIs.removeAll(existingURIs); - + oTypes = subclasURIs.iterator(); byte[] bKey = key.getBytes(); - long oKey = NumberUtils.decodeLong(bKey,1); + long oKey = NumberUtils.decodeLong(bKey, 1); oTriple.setSubject(oKey); boolean typeTriple = bKey[0] == 0; - if (!typeTriple) { //It's a subclass triple - oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); // Rule 11 + if (!typeTriple) { // It's a subclass triple + oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); // Rule 11 // Added by WuGang, 2010-08-26 oTriple.setType(TriplesUtils.RDFS_11); oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDFS_SUBCLASS); - } else { //It's a type triple - oTriple.setPredicate(TriplesUtils.RDF_TYPE); // Rule 9 + } else { // It's a type triple + oTriple.setPredicate(TriplesUtils.RDF_TYPE); // Rule 9 // Added by WuGang, 2010-08-26 oTriple.setType(TriplesUtils.RDFS_9); oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); } -// while (oTypes.hasNext()) { -// long oType = oTypes.next(); -// oTriple.setObject(oType); -// context.write(source, oTriple); -// } + // while (oTypes.hasNext()) { + // long oType = oTypes.next(); + // oTriple.setObject(oType); + // context.write(source, oTriple); + // } // Modified by WuGang, 2010-08-26 while (oTypes.hasNext()) { long oType = oTypes.next(); oTriple.setObject(oType); for (long obj : existingURIs) { oTriple.setRobject(obj); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); + context.getCounter("RDFS derived triples", "subproperty of member").increment(1); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + // context.write(source, oTriple); } - } - + } + if (typeTriple) { /* Check special rules */ - if ((subclasURIs.contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY) - || existingURIs.contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY)) - && !memberProperties.contains(oTriple.getSubject())) { // Rule 12���μ�RDFSSpecialPropsReducer + if ((subclasURIs + .contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY) || existingURIs + .contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY)) + && !memberProperties.contains(oTriple.getSubject())) { // Rule + // 12���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBPROPERTY); oTriple.setObject(TriplesUtils.RDFS_MEMBER); // Added by WuGang, 2010-08-26 @@ -121,16 +134,21 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); context.getCounter("RDFS derived triples", "subproperty of member").increment(1); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + + // context.write(source, oTriple); + context.getCounter("RDFS derived triples", + "subproperty of member").increment(1); } - + if (subclasURIs.contains(TriplesUtils.RDFS_DATATYPE) || existingURIs.contains(TriplesUtils.RDFS_DATATYPE)) { specialSuperclasses.clear(); - recursiveScanSuperclasses(oTriple.getSubject(), specialSuperclasses); - if (!specialSuperclasses.contains(TriplesUtils.RDFS_LITERAL)) { // Rule 13���μ�RDFSSpecialPropsReducer + recursiveScanSuperclasses(oTriple.getSubject(), + specialSuperclasses); + if (!specialSuperclasses.contains(TriplesUtils.RDFS_LITERAL)) { // Rule + // 13���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); oTriple.setObject(TriplesUtils.RDFS_LITERAL); // Added by WuGang, 2010-08-26 @@ -139,17 +157,21 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_DATATYPE); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); - context.getCounter("RDFS derived triples", "subclass of Literal").increment(1); + context.getCounter("RDFS derived triples", "subproperty of member").increment(1); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + // context.write(source, oTriple); + context.getCounter("RDFS derived triples", + "subclass of Literal").increment(1); } } - + if (subclasURIs.contains(TriplesUtils.RDFS_CLASS) || existingURIs.contains(TriplesUtils.RDFS_CLASS)) { specialSuperclasses.clear(); - recursiveScanSuperclasses(oTriple.getSubject(), specialSuperclasses); - if (!specialSuperclasses.contains(TriplesUtils.RDFS_RESOURCE)) { // Rule 8���μ�RDFSSpecialPropsReducer + recursiveScanSuperclasses(oTriple.getSubject(), + specialSuperclasses); + if (!specialSuperclasses.contains(TriplesUtils.RDFS_RESOURCE)) { // Rule + // 8���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); oTriple.setObject(TriplesUtils.RDFS_RESOURCE); // Added by WuGang, 2010-08-26 @@ -158,23 +180,28 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CLASS); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); - context.getCounter("RDFS derived triples", "subclass of resource").increment(1); + context.getCounter("RDFS derived triples", "subproperty of member").increment(1); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + // context.write(source, oTriple); + context.getCounter("RDFS derived triples", + "subclass of resource").increment(1); } } } - - //Update the counters + + // Update the counters if (typeTriple) - context.getCounter("RDFS derived triples", "subclass inheritance rule").increment(subclasURIs.size()); + context.getCounter("RDFS derived triples", + "subclass inheritance rule").increment(subclasURIs.size()); else - context.getCounter("RDFS derived triples", "subclass transitivity rule").increment(subclasURIs.size()); + context.getCounter("RDFS derived triples", + "subclass transitivity rule").increment(subclasURIs.size()); } - + @Override public void setup(Context context) throws IOException { - CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works + // around. if (subclassSchemaTriples == null) { CassandraDB db; @@ -198,17 +225,17 @@ public void setup(Context context) throws IOException { e.printStackTrace(); } } - + if (memberProperties == null) { CassandraDB db; try { db = new CassandraDB(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_MEMBER_SUBPROPERTY); - + memberProperties = new HashSet(); db.loadSetIntoMemory(memberProperties, filters, -1); - + db.CassandraDBClose(); } catch (TTransportException e) { e.printStackTrace(); @@ -222,10 +249,19 @@ public void setup(Context context) throws IOException { e.printStackTrace(); } catch (TException e) { e.printStackTrace(); - } + } } source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); + + } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + + super.cleanup(context); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java index d774a6d..20c6e08 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java @@ -3,9 +3,11 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.AbstractMap; +import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -15,6 +17,7 @@ import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TimedOutException; import org.apache.cassandra.thrift.UnavailableException; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; @@ -42,75 +45,118 @@ public class RDFSSubpropDomRangeReducer extends Reducer keys = new LinkedHashMap(); + private Map allkeys = new LinkedHashMap(); + private List allvariables = new ArrayList(); + private List allTValues = new ArrayList(); + private List stepsValues = new ArrayList(); @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { - byte[] bKey = key.getBytes(); // Added by Wugang, 2010-08-26 -// long uri = key.get(); //��domain���ԣ���s����range���ԣ���o - long uri = NumberUtils.decodeLong(bKey, 0); //��domain������s����range������o - long uri_opposite = NumberUtils.decodeLong(bKey, 8); //��domain������o����range������s - - derivedProps.clear(); //���x - - //Get the predicates with a range or domain associated to this URIs - propURIs.clear(); - Iterator itr = values.iterator(); - while (itr.hasNext()) - propURIs.add(itr.next().get()); //���p - - Iterator itrProp = propURIs.iterator(); - while (itrProp.hasNext()) { - Collection objects = null; - long propURI = itrProp.next(); - if ((propURI & 0x1) == 1) { - objects = rangeSchemaTriples.get(propURI >> 1); - context.getCounter("derivation", "range matches").increment(1); - } else { - objects = domainSchemaTriples.get(propURI >> 1); - context.getCounter("derivation", "domain matches").increment(1); - } - - if (objects != null) { - Iterator itr3 = objects.iterator(); - while (itr3.hasNext()) -// derivedProps.add(itr3.next()); - derivedProps.add(new AbstractMap.SimpleEntry(itr3.next(), propURI)); // Modified by WuGang, 2010-08-26 - } + byte[] bKey = key.getBytes(); // Added by Wugang, 2010-08-26 + // long uri = key.get(); //��domain���ԣ���s����range���ԣ���o + long uri = NumberUtils.decodeLong(bKey, 0); // ��domain������s����range������o + long uri_opposite = NumberUtils.decodeLong(bKey, 8); // ��domain������o����range������s + + Configuration conf = context.getConfiguration(); + derivedProps.clear(); // ���x + + Logger logger = LoggerFactory.getLogger(CassandraDB.class); + long time = System.currentTimeMillis(); + + // Get the predicates with a range or domain associated to this URIs + propURIs.clear(); + Iterator itr = values.iterator(); + while (itr.hasNext()) + propURIs.add(itr.next().get()); // ���p + +// logger.info("while1 " + (System.currentTimeMillis() - time)); +// System.out.println("while1 " + (System.currentTimeMillis() - time)); + + Iterator itrProp = propURIs.iterator(); + while (itrProp.hasNext()) { + Collection objects = null; + long propURI = itrProp.next(); + if ((propURI & 0x1) == 1) { + objects = rangeSchemaTriples.get(propURI >> 1); + context.getCounter("derivation", "range matches").increment(1); + } else { + objects = domainSchemaTriples.get(propURI >> 1); + context.getCounter("derivation", "domain matches").increment(1); } - - //Derive the new statements -// Iterator itr2 = derivedProps.iterator(); - Iterator> itr2 = derivedProps.iterator(); // Modified by WuGang, 2010-08-26 - oTriple.setSubject(uri); - oTriple.setPredicate(TriplesUtils.RDF_TYPE); - oTriple.setObjectLiteral(false); - while (itr2.hasNext()) { -// oTriple.setObject(itr2.next()); - Entry entry = itr2.next(); - oTriple.setObject(entry.getKey()); // Modified by WuGang, 2010-08-26 - // Added by WuGang, 2010-08-26 - long propURI = entry.getValue(); - oTriple.setRpredicate(propURI >> 1); // Modified by WuGang 2010-12-03����RDFSSubPropDomRangeMapper�������ˣ����ڱ��������ƻ��� - if ((propURI & 0x1) == 1) { // Rule 3, for range - oTriple.setType(TriplesUtils.RDFS_3); - oTriple.setRsubject(uri_opposite); - oTriple.setRobject(uri); - }else{ // Rule 2, for domain - oTriple.setType(TriplesUtils.RDFS_2); - oTriple.setRsubject(uri); - oTriple.setRobject(uri_opposite); - } - - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); - //context.write(source, oTriple); + + if (objects != null) { + Iterator itr3 = objects.iterator(); + while (itr3.hasNext()) + // derivedProps.add(itr3.next()); + derivedProps.add(new AbstractMap.SimpleEntry( + itr3.next(), propURI)); // Modified by WuGang, + // 2010-08-26 } - context.getCounter("RDFS derived triples", "subprop range and domain rule").increment(derivedProps.size()); - } + } + +// logger.info("while2 " + (System.currentTimeMillis() - time)); + time = System.currentTimeMillis(); +// System.out.println("while2 " + (System.currentTimeMillis() - time)); + + // Derive the new statements + // Iterator itr2 = derivedProps.iterator(); + Iterator> itr2 = derivedProps.iterator(); // Modified + // by + // WuGang, + // 2010-08-26 + oTriple.setSubject(uri); + oTriple.setPredicate(TriplesUtils.RDF_TYPE); + oTriple.setObjectLiteral(false); + while (itr2.hasNext()) { + // oTriple.setObject(itr2.next()); + Entry entry = itr2.next(); + oTriple.setObject(entry.getKey()); // Modified by WuGang, 2010-08-26 + // Added by WuGang, 2010-08-26 + long propURI = entry.getValue(); + oTriple.setRpredicate(propURI >> 1); // Modified by WuGang + // 2010-12-03����RDFSSubPropDomRangeMapper�������ˣ����ڱ��������ƻ��� + if ((propURI & 0x1) == 1) { // Rule 3, for range + oTriple.setType(TriplesUtils.RDFS_3); + oTriple.setRsubject(uri_opposite); + oTriple.setRobject(uri); + } else { // Rule 2, for domain + oTriple.setType(TriplesUtils.RDFS_2); + oTriple.setRsubject(uri); + oTriple.setRobject(uri_opposite); + } + + context.getCounter("RDFS derived triples", "subproperty of member").increment(1); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + +// logger.info("write " + (System.currentTimeMillis() - time)); + time = System.currentTimeMillis(); +// System.out.println("finish " + (System.currentTimeMillis() - time)); + // CassandraDB.writealltripleToMapReduceContext(oTriple, source, + // context); + // context.write(source, oTriple); + // _output.write(conf.get(CassandraDB.COLUMNFAMILY_ALLTRIPLES), + // ByteBufferUtil.bytes(key.toString()), + // Collections.singletonList(m)); + // Reporter reporter = null ; + // _output.getCollector(CassandraDB.COLUMNFAMILY_ALLTRIPLES, + // reporter).collect(key, arg1);; + } + + // logger.info(" " + (System.currentTimeMillis() - time)); + context.getCounter("RDFS derive triples", + "subprop range and domain rule").increment(derivedProps.size()); + // logger.info("finish " + (System.currentTimeMillis() - time)); + // Mutation m = new Mutation(); + } + + @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - +// outputKey = ByteBufferUtil.bytes(context.getConfiguration().get(CassandraDB.COLUMNFAMILY_ALLTRIPLES)); try{ CassandraDB db = new CassandraDB(); if (domainSchemaTriples == null) { @@ -141,5 +187,20 @@ public void setup(Context context) throws IOException { source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); + } + + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + + + super.cleanup(context); + } + + + + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java index 6f295a1..0950e87 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java @@ -2,13 +2,17 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; +import org.apache.cassandra.thrift.Cassandra.AsyncProcessor.system_add_column_family; import org.apache.cassandra.thrift.InvalidRequestException; import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TimedOutException; @@ -21,6 +25,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.sun.corba.se.spi.ior.Writeable; + import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; @@ -38,6 +44,12 @@ public class RDFSSubpropInheritReducer extends Reducer keys = new LinkedHashMap(); + private Map allkeys = new LinkedHashMap(); + private List allvariables = new ArrayList(); + private List allTValues = new ArrayList(); + private List stepsValues = new ArrayList(); private void recursiveScanSubproperties(long value, Set set) { Collection subprops = subpropSchemaTriples.get(value); @@ -56,8 +68,8 @@ private void recursiveScanSubproperties(long value, Set set) { @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { + byte[] bKey = key.getBytes(); - switch(bKey[0]) { case 2: case 3: // rdfs rule 7 @@ -67,11 +79,17 @@ public void reduce(BytesWritable key, Iterable values, Context con propURIs.clear(); //filter the properties that are already present Iterator itr = values.iterator(); + /* + * values在使用iterator之后会将值清空,使用list记录values + */ + List list1 = new ArrayList(); while (itr.hasNext()) { long value = itr.next().get(); + list1.add(value); if (!propURIs.contains(value)) { recursiveScanSubproperties(value, propURIs); } + } Iterator itr3 = propURIs.iterator(); @@ -90,10 +108,9 @@ public void reduce(BytesWritable key, Iterable values, Context con // Modified by WuGang, 2010-08-26 while (itr3.hasNext()) { oTriple.setPredicate(itr3.next()); - for (LongWritable pre : values) { - oTriple.setRpredicate(pre.get()); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); + for (Long pre : list1) { + oTriple.setRpredicate(pre); + context.getCounter("RDFS derived triples", "subproperty of member").increment(1); } } @@ -105,8 +122,10 @@ public void reduce(BytesWritable key, Iterable values, Context con propURIs.clear(); //filter the properties that are already present Iterator itr2 = values.iterator(); + List list2 = new ArrayList(); while (itr2.hasNext()) { long value = itr2.next().get(); + list2.add(value); if (!propURIs.contains(value)) { recursiveScanSubproperties(value, propURIs); } @@ -125,13 +144,15 @@ public void reduce(BytesWritable key, Iterable values, Context con // context.write(source, oTriple); // } // Modified by WuGang, 2010-08-26 + while (itr4.hasNext()) { oTriple.setObject(itr4.next()); - for(LongWritable obj:values){ - oTriple.setRobject(obj.get()); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + for(Long obj:list2){ + oTriple.setRobject(obj); + context.getCounter("RDFS derived triples", "subproperty of member").increment(1); // context.write(source, oTriple); } + } context.getCounter("RDFS derived triples", "subprop transitivity rule").increment(propURIs.size()); @@ -140,6 +161,7 @@ public void reduce(BytesWritable key, Iterable values, Context con default: break; } + } @Override @@ -154,7 +176,6 @@ public void setup(Context context) throws IOException { filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); subpropSchemaTriples = db.loadMapIntoMemory(filters); // subpropSchemaTriples = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_SUBPROP_SCHEMA", context); - db.CassandraDBClose(); } catch (TTransportException e) { e.printStackTrace(); @@ -177,6 +198,22 @@ public void setup(Context context) throws IOException { source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); oTriple2.setPredicate(TriplesUtils.RDF_TYPE); - oTriple2.setObjectLiteral(false); + oTriple2.setObjectLiteral(false); + + } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + /* + * 不写close就会写不进数据库。 + */ + + super.cleanup(context); + } + + + }