diff --git a/mrj-0.1/.classpath b/mrj-0.1/.classpath
index 569943f..b1b26f7 100644
--- a/mrj-0.1/.classpath
+++ b/mrj-0.1/.classpath
@@ -1,10 +1,12 @@
-
-
-
-
-
+
+
+
+
+
+
+
diff --git a/mrj-0.1/.gitignore b/mrj-0.1/.gitignore
new file mode 100644
index 0000000..f8d886b
--- /dev/null
+++ b/mrj-0.1/.gitignore
@@ -0,0 +1,2 @@
+/bin
+/bin/
diff --git a/mrj-0.1/.idea/.name b/mrj-0.1/.idea/.name
new file mode 100644
index 0000000..bb04bae
--- /dev/null
+++ b/mrj-0.1/.idea/.name
@@ -0,0 +1 @@
+mrj-0.1
\ No newline at end of file
diff --git a/mrj-0.1/.idea/compiler.xml b/mrj-0.1/.idea/compiler.xml
new file mode 100644
index 0000000..a852314
--- /dev/null
+++ b/mrj-0.1/.idea/compiler.xml
@@ -0,0 +1,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/mrj-0.1/.idea/copyright/profiles_settings.xml b/mrj-0.1/.idea/copyright/profiles_settings.xml
new file mode 100644
index 0000000..e7bedf3
--- /dev/null
+++ b/mrj-0.1/.idea/copyright/profiles_settings.xml
@@ -0,0 +1,3 @@
+
+
+
\ No newline at end of file
diff --git a/mrj-0.1/.idea/encodings.xml b/mrj-0.1/.idea/encodings.xml
new file mode 100644
index 0000000..d821048
--- /dev/null
+++ b/mrj-0.1/.idea/encodings.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/mrj-0.1/.idea/misc.xml b/mrj-0.1/.idea/misc.xml
new file mode 100644
index 0000000..1a5ae83
--- /dev/null
+++ b/mrj-0.1/.idea/misc.xml
@@ -0,0 +1,178 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1.8
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/mrj-0.1/.idea/modules.xml b/mrj-0.1/.idea/modules.xml
new file mode 100644
index 0000000..39bb12e
--- /dev/null
+++ b/mrj-0.1/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/mrj-0.1/.idea/scopes/scope_settings.xml b/mrj-0.1/.idea/scopes/scope_settings.xml
new file mode 100644
index 0000000..922003b
--- /dev/null
+++ b/mrj-0.1/.idea/scopes/scope_settings.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/mrj-0.1/.idea/vcs.xml b/mrj-0.1/.idea/vcs.xml
new file mode 100644
index 0000000..6564d52
--- /dev/null
+++ b/mrj-0.1/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/mrj-0.1/.idea/workspace.xml b/mrj-0.1/.idea/workspace.xml
new file mode 100644
index 0000000..a30b4df
--- /dev/null
+++ b/mrj-0.1/.idea/workspace.xml
@@ -0,0 +1,341 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1426120853528
+
+ 1426120853528
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/mrj-0.1/.settings/org.eclipse.core.resources.prefs b/mrj-0.1/.settings/org.eclipse.core.resources.prefs
new file mode 100644
index 0000000..83eb0de
--- /dev/null
+++ b/mrj-0.1/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,3 @@
+eclipse.preferences.version=1
+encoding//src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java=UTF-8
+encoding//src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java=UTF-8
diff --git a/mrj-0.1/.settings/org.eclipse.jdt.core.prefs b/mrj-0.1/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..7341ab1
--- /dev/null
+++ b/mrj-0.1/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,11 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.7
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.source=1.7
diff --git a/mrj-0.1/mrj-0.1.iml b/mrj-0.1/mrj-0.1.iml
new file mode 100644
index 0000000..017bf6e
--- /dev/null
+++ b/mrj-0.1/mrj-0.1.iml
@@ -0,0 +1,533 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java
index 8d5c320..afbc721 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java
@@ -16,17 +16,20 @@ public class TripleSource implements WritableComparable {
byte derivation = 0;
int step = 0;
+ int transitive_level = 0;
@Override
public void readFields(DataInput in) throws IOException {
derivation = in.readByte();
step = in.readInt();
+ transitive_level = in.readInt();
}
@Override
public void write(DataOutput out) throws IOException {
out.write(derivation);
out.writeInt(step);
+ out.writeInt(transitive_level);
}
@Override
@@ -47,6 +50,14 @@ public void setStep(int step) {
this.step = step;
}
+ public int getTransitiveLevel() {
+ return transitive_level;
+ }
+
+ public void setTransitiveLevel(int level) {
+ this.transitive_level = level;
+ }
+
public void setDerivation(byte ruleset) {
derivation = ruleset;
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java
index c9e2781..db2cad6 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java
@@ -101,7 +101,7 @@ public void parseArgs(String[] args) {
}
public void sampleCommonResources(String[] args) throws Exception {
-// System.out.println("��sampleCommonResources������");
+// System.out.println("��sampleCommonResources������");
Job job = createNewJob("Sample common resources");
//Input
@@ -127,7 +127,7 @@ public void sampleCommonResources(String[] args) throws Exception {
}
public void assignIdsToNodes(String[] args) throws Exception {
-// System.out.println("��assignIdsToNodes������");
+// System.out.println("��assignIdsToNodes������");
Job job = createNewJob("Deconstruct statements");
job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
@@ -156,7 +156,7 @@ public void assignIdsToNodes(String[] args) throws Exception {
}
private void rewriteTriples(String[] args) throws Exception {
-// System.out.println("��rewriteTriples������");
+// System.out.println("��rewriteTriples������");
Job job = createNewJob("Reconstruct statements");
@@ -188,13 +188,15 @@ private void rewriteTriples(String[] args) throws Exception {
// is it useful below line?
//job.getConfiguration().set(CASSANDRA_PRIMARY_KEY, "(sub, pre, obj)");
+ /*
+ * 这个地方设置成了0, map那个地方add的时候就应该少加一列元素。
+ */
String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS +
- " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? ";
-
+ " SET " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "= null" + ","+ CassandraDB.COLUMN_INFERRED_STEPS + "=0";
CqlConfigHelper.setOutputCql(job.getConfiguration(), query);
- ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost");
- ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner");
+ ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host);
+ ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner);
//Launch
long time = System.currentTimeMillis();
@@ -223,6 +225,22 @@ public static void main(String[] args) throws Exception {
long time = System.currentTimeMillis();
int res = ToolRunner.run(new Configuration(), new FilesImportTriples(), args);
// log.info("Import time: " + (System.currentTimeMillis() - time));
+//
+// //Modified by LiYang 2015/4/10
+// CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160);
+// db.init();
+// // Modified
+// db.createIndexOnTripleType();
+// //db.createIndexOnRule();
+//
+// /*
+// * Add by LiYang
+// * 2015.7.19
+// */
+// //db.createIndexOnInferredSteps();
+// //db.createIndexOnTransitiveLevel();
+// db.CassandraDBClose();
+
System.out.println("Import time: " + (System.currentTimeMillis() - time));
System.exit(res);
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java
index b2d64d0..6aca38d 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java
@@ -68,7 +68,7 @@ public void reduce(Text key, Iterable values, Context context)thr
protected void setup(Context context) throws IOException, InterruptedException {
CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around.
try {
- db = new CassandraDB("localhost", 9160);
+ db = new CassandraDB();
db.init();
} catch (InvalidRequestException e) {
e.printStackTrace();
@@ -89,4 +89,9 @@ protected void setup(Context context) throws IOException, InterruptedException {
counter = (Long.valueOf(taskId) + 1) << 32;
log.debug("Start counter " + (Long.valueOf(taskId) + 1));
}
+
+ protected void cleanup(Context context) throws IOException, InterruptedException{
+ db.CassandraDBClose();
+ }
+
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java
index 4b7acc3..955693f 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java
@@ -2,7 +2,7 @@
* Project Name: mrj-0.1
* File Name: ImportTriplesReconstructReducerToCassandra.java
* @author Gang Wu
- * 2014��10��28�� ����10:35:24
+ * 2014锟斤拷10锟斤拷28锟斤拷 锟斤拷锟斤拷10:35:24
*
* Description:
* Send reducer output to Cassandra DB by representing triples with ids
@@ -16,8 +16,11 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.UUID;
+import org.apache.cassandra.cli.CliParser.rowKey_return;
import org.apache.cassandra.utils.ByteBufferUtil;
+import org.apache.cassandra.utils.UUIDGen;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;
import org.slf4j.Logger;
@@ -28,6 +31,7 @@
import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB;
import cn.edu.neu.mitt.mrj.utils.TriplesUtils;
+
/**
* @author gibeo_000
*
@@ -78,7 +82,7 @@ protected void reduce(LongWritable key, Iterable values, Context c
}
if (counter != 3) {
- // Modified by WuGang 2010-12-3, ��������3Ԫ����֣�����Ҫ������
+ // Modified by WuGang 2010-12-3, 锟斤拷锟�?锟斤拷3元锟斤拷锟斤拷郑锟斤拷锟斤拷锟揭拷锟斤拷锟斤拷锟�
log.error("Found a non-triple when reconstructing. The count num is " + counter + ", and triple is " + oValue);
// throw new IOException("Triple is not reconstructed!");
}
@@ -89,6 +93,35 @@ protected void reduce(LongWritable key, Iterable values, Context c
byte one = 1;
byte zero = 0;
+// /*
+// keys.put("sub", ByteBufferUtil.bytes(oValue.getSubject()));
+// keys.put("pre", ByteBufferUtil.bytes(oValue.getPredicate()));
+// keys.put("obj", ByteBufferUtil.bytes(oValue.getObject()));
+// // the length of boolean type in cassandra is one byte!!!!!!!!
+// keys.put(CassandraDB.COLUMN_IS_LITERAL, oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero}));
+// keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject())));
+//// keys.put("id", ByteBufferUtil.bytes(UUIDGen.getTimeUUID()));
+// */
+//
+// keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(oValue.getSubject()));
+// keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(oValue.getPredicate()));
+// keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(oValue.getObject()));
+// keys.put(CassandraDB.COLUMN_IS_LITERAL, oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero}));
+// keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject())));
+//
+//
+// // Prepare variables, here is a boolean value for CassandraDB.COLUMN_IS_LITERAL
+// List variables = new ArrayList();
+//// variables.add(ByteBufferUtil.bytes(oValue.getSubject()));
+// // the length of boolean type in cassandra is one byte!!!!!!!!
+// // For column inferred, init it as false i.e. zero
+//// variables.add(ByteBuffer.wrap(new byte[]{zero}));
+// variables.add(oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero}));
+// variables.add(ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject())));
+//
+// context.write(keys, variables);
+
+
// Prepare composite key (sub, pre, obj)
keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(oValue.getSubject()));
keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(oValue.getPredicate()));
@@ -107,7 +140,8 @@ protected void reduce(LongWritable key, Iterable values, Context c
// the length of boolean type in cassandra is one byte!!!!!!!!
// For column inferred, init it as false i.e. zero
// variables.add(ByteBuffer.wrap(new byte[]{zero}));
- variables.add(ByteBufferUtil.bytes(0)); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple
+// variables.add(ByteBufferUtil.bytes(0)); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple
+// variables.add(ByteBufferUtil.bytes(0)); // Added by WuGang, 2015-07-15, to support transitive level
context.write(keys, variables);
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java
index c1153f9..8614816 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java
@@ -19,7 +19,7 @@ public class ImportTriplesSampleMapper extends Mapper preloadedURIs = TriplesUtils.getInstance().getPreloadedURIs();
protected void map(Text key, Text value, Context context) {
- System.out.println("��ImportTriplesSampleMapper��");
+ //System.out.println("��ImportTriplesSampleMapper��");
try {
String[] uris = TriplesUtils.parseTriple(value.toString(), key.toString());
for(String uri : uris) {
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java
index 56f33a1..bb81c8a 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java
@@ -55,7 +55,7 @@ public void reduce(Text key, Iterable values, Context context) th
protected void setup(Context context) throws IOException, InterruptedException {
CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around.
try {
- db = new CassandraDB("localhost", 9160);
+ db = new CassandraDB();
db.init();
} catch (InvalidRequestException e) {
e.printStackTrace();
@@ -77,4 +77,7 @@ protected void setup(Context context) throws IOException, InterruptedException {
counter = (Long.valueOf(taskId)) << 13;
if (counter == 0) { counter +=100; }
}
+ protected void cleanup(Context context) throws IOException, InterruptedException{
+ db.CassandraDBClose();
+ }
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java
index 00877c1..c017711 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java
@@ -10,6 +10,7 @@
package cn.edu.neu.mitt.mrj.io.dbs;
+import java.awt.print.Printable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
@@ -23,9 +24,8 @@
import java.util.Map;
import java.util.Set;
-import org.apache.cassandra.cql3.QueryProcessor;
-import org.apache.cassandra.cql3.UntypedResultSet;
-import org.apache.cassandra.db.marshal.TupleType;
+import org.apache.cassandra.cli.CliParser.rowKey_return;
+import org.apache.cassandra.cql3.statements.MultiColumnRestriction.EQ;
import org.apache.cassandra.exceptions.RequestExecutionException;
import org.apache.cassandra.thrift.Cassandra;
import org.apache.cassandra.thrift.Column;
@@ -42,6 +42,7 @@
import org.apache.cassandra.thrift.UnavailableException;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.hadoop.mapreduce.Reducer.Context;
+//import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.protocol.TProtocol;
@@ -49,17 +50,28 @@
import org.apache.thrift.transport.TSocket;
import org.apache.thrift.transport.TTransport;
import org.apache.thrift.transport.TTransportException;
+import org.hsqldb.ResultBase.ResultIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import cn.edu.neu.mitt.mrj.data.Triple;
import cn.edu.neu.mitt.mrj.data.TripleSource;
+import cn.edu.neu.mitt.mrj.justification.OWLHorstJustification;
+import cn.edu.neu.mitt.mrj.reasoner.Experiments;
import cn.edu.neu.mitt.mrj.utils.TriplesUtils;
+import com.datastax.driver.core.Cluster;
+import com.datastax.driver.core.Cluster.Builder;
+import com.datastax.driver.core.ResultSet;
import com.datastax.driver.core.Row;
+import com.datastax.driver.core.Session;
+import com.datastax.driver.core.SimpleStatement;
+import com.datastax.driver.core.SocketOptions;
import com.datastax.driver.core.Statement;
+//modified cassandra java 2.0.5
import com.datastax.driver.core.TupleValue;
import com.datastax.driver.core.querybuilder.QueryBuilder;
+//modified
/**
@@ -71,11 +83,12 @@ public class CassandraDB {
public static final String KEYSPACE = "mrjks"; // mr.j keyspace
public static final String COLUMNFAMILY_JUSTIFICATIONS = "justifications"; // mr.j keyspace
public static final String COLUMNFAMILY_RESOURCES = "resources"; // mr.j keyspace
- public static final String COLUMNFAMILY_RESULTS = "results"; // mr.j keyspace
+ public static final String COLUMNFAMILY_RESULTS = "results"; // mr.j keyspace
+// public static final String COLUMNFAMILY_ALLTRIPLES = "alltriples";
public static final String COLUMN_SUB = "sub"; // mrjks.justifications.sub
public static final String COLUMN_PRE = "pre"; // mrjks.justifications.pre
public static final String COLUMN_OBJ = "obj"; // mrjks.justifications.obj
- public static final String COLUMN_TRIPLE_TYPE = "tripletype" ; // mrjks.justifications.tripletype
+ public static final String COLUMN_TRIPLE_TYPE = "tripletype" ; // mrjks.justifications.tripletype
public static final String COLUMN_IS_LITERAL = "isliteral" ; // mrjks.justifications.isliteral
public static final String COLUMN_INFERRED_STEPS = "inferredsteps" ; // mrjks.justifications.inferredsteps
public static final String COLUMN_RULE = "rule"; // mrjks.justifications.rule
@@ -85,16 +98,38 @@ public class CassandraDB {
public static final String COLUMN_ID = "id"; // mrjks.resources.id
public static final String COLUMN_LABEL = "label"; // mrjks.resources.label
public static final String COLUMN_JUSTIFICATION = "justification"; //mrjks.results.justification
- public static final String COLUMN_STEP = "step"; // mrjks.results.step
+ public static final String COLUMN_TRANSITIVE_LEVELS = "transitivelevel"; // mrjks.results.step
- public static final String DEFAULT_HOST = "localhost";
+ public static final String DEFAULT_HOST = cn.edu.neu.mitt.mrj.utils.Cassandraconf.host;
public static final String DEFAULT_PORT = "9160"; // in version 2.1.2, cql3 port is 9042
- public static final String CQL_PAGE_ROW_SIZE = "10"; //3
+ public static final String CQL_PAGE_ROW_SIZE = "10000"; //3 modified by liyang
+
+ // Added by WuGang 20160203
+ public static Set domainSchemaTriples = null;
+ public static Set rangeSchemaTriples = null;
+ public static Set memberProperties = null;
+ public static Set resourceSubclasses = null;
+ public static Set literalSubclasses = null;
+ public static Set schemaFunctionalProperties = null;
+ public static Set schemaInverseFunctionalProperties = null;
+ public static Set schemaSymmetricProperties = null;
+ public static Set schemaInverseOfProperties = null;
+ public static Set schemaTransitiveProperties = null;
+ public static Set subclassSchemaTriples = null;
+ public static Set subpropSchemaTriples = null;
+ public static Set hasValue = null;
+ public static Set hasValueInverted = null;
+ public static Set onProperty = null;
+ public static Set onPropertyInverted = null;
+
+ public static Map> subclassSchemaTriplesMap = null;
+ public static Map> domainSchemaTriplesMap = null;
+ public static Map> rangeSchemaTriplesMap = null;
+ public static Map> subpropSchemaTriplesMap = null;
-
// 2014-12-11, Very strange, this works around.
- public static final String CONFIG_LOCATION = "file:///home/gibeon/Software/apache-cassandra-2.1.2/conf/cassandra.yaml";
+ public static final String CONFIG_LOCATION = cn.edu.neu.mitt.mrj.utils.Cassandraconf.CassandraConfFile;
public static void setConfigLocation(){
setConfigLocation(CONFIG_LOCATION);
}
@@ -108,17 +143,36 @@ private static Cassandra.Iface createConnection() throws TTransportException{
if (System.getProperty("cassandra.host") == null || System.getProperty("cassandra.port") == null){
logger.warn("cassandra.host or cassandra.port is not defined, using default");
}
+ System.out.println("Port : " + System.getProperty("cassandra.port", DEFAULT_PORT));
return createConnection(System.getProperty("cassandra.host", DEFAULT_HOST),
Integer.valueOf(System.getProperty("cassandra.port", DEFAULT_PORT)));
}
+
+
+ private static TSocket socket = null;
+ private static TTransport trans = null;
+ private static Cassandra.Client c1 = null;
private static Cassandra.Client createConnection(String host, Integer port) throws TTransportException {
- TSocket socket = new TSocket(host, port);
- TTransport trans = new TFramedTransport(socket);
+ if (c1 != null) {
+ return c1;
+ }
+ socket = new TSocket(host, port);
+ trans = new TFramedTransport(socket);
trans.open();
TProtocol protocol = new TBinaryProtocol(trans);
-
- return new Cassandra.Client(protocol);
+
+ c1 = new Cassandra.Client(protocol);
+ //Modified 2015/5/25
+ return c1;
+ }
+
+ private static void close(){
+ if(trans != null)
+ trans.close();
+ if(socket != null)
+ socket.close();
+ return;
}
@@ -135,12 +189,12 @@ private static void setupKeyspace(Cassandra.Iface client)
} catch(NotFoundException e){
logger.info("set up keyspace " + KEYSPACE);
String query = "CREATE KEYSPACE " + KEYSPACE +
- " WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 1}";
+ " WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 2}";
- client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+ client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ANY);
String verifyQuery = "select count(*) from system.peers";
- CqlResult result = client.execute_cql3_query(ByteBufferUtil.bytes(verifyQuery), Compression.NONE, ConsistencyLevel.ONE);
+ CqlResult result = client.execute_cql3_query(ByteBufferUtil.bytes(verifyQuery), Compression.NONE, ConsistencyLevel.ANY);
long magnitude = ByteBufferUtil.toLong(result.rows.get(0).columns.get(0).value);
try {
@@ -151,12 +205,110 @@ private static void setupKeyspace(Cassandra.Iface client)
}
}
+ public static String getJustificationsSchema(){
+ String schemaString = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS +
+ " ( " +
+ COLUMN_SUB + " bigint, " + // partition key
+ COLUMN_PRE + " bigint, " + // partition key
+ COLUMN_OBJ + " bigint, " + // partition key
+ COLUMN_IS_LITERAL + " boolean, " + // partition key
+ COLUMN_TRIPLE_TYPE + " int, " +
+ COLUMN_RULE + " int, " +
+ COLUMN_V1 + " bigint, " +
+ COLUMN_V2 + " bigint, " +
+ COLUMN_V3 + " bigint, " +
+ COLUMN_INFERRED_STEPS + " int, " +
+ COLUMN_TRANSITIVE_LEVELS + " int, " +
+ " PRIMARY KEY ((" + COLUMN_IS_LITERAL + ", " + COLUMN_RULE + ", " + COLUMN_SUB + "), " + COLUMN_TRIPLE_TYPE +
+ ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 +
+ //", " + COLUMN_TRIPLE_TYPE +
+ " ) ) ";
+ return schemaString;
+ }
+
+ /*
+ * ??
+ */
+ public static String getJustificationseStatement(){
+ return ("INSERT INTO " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS +
+ " (isliteral, rule, sub, tripletype, pre, obj, v1, v2, v3, inferredsteps, transitivelevel) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )");
+ }
+
+
+// public static String getAlltripleSchema(){
+// String ALLTRIPLE_SCHEMA = "CREATE TABLE " + KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES +
+// " ( " +
+// COLUMN_SUB + " bigint, " + // partition key
+// COLUMN_PRE + " bigint, " + // partition key
+// COLUMN_OBJ + " bigint, " + // partition key
+// COLUMN_IS_LITERAL + " boolean, " + // partition key
+// COLUMN_TRIPLE_TYPE + " int, " +
+// COLUMN_INFERRED_STEPS + " int, " +
+// "PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + "," + COLUMN_OBJ +
+// ")) ) WITH compaction = {'class': 'LeveledCompactionStrategy'}";
+// return ALLTRIPLE_SCHEMA;
+// }
+
+ /*
+ public static String getStepsSchema(Integer step){
+ String STEPS_SCHEMA = "CREATE TABLE " + CassandraDB.KEYSPACE + ".step" + step +
+ " ( " +
+ COLUMN_SUB + " bigint, " +
+ COLUMN_PRE + " bigint, " +
+ COLUMN_OBJ + " bigint, " +
+ COLUMN_RULE + " int, " +
+ COLUMN_V1 + " bigint, " +
+ COLUMN_V2 + " bigint, " +
+ COLUMN_V3 + " bigint, " +
+ COLUMN_TRANSITIVE_LEVELS + " int, " +
+ "PRIMARY KEY((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_RULE +
+ "), " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 +
+ ")) WITH compaction = {'class': 'LeveledCompactionStrategy'}";
+ return STEPS_SCHEMA;
+ }
+
+ public static String getStepsSchema(String cfName){
+ String STEPS_SCHEMA = "CREATE TABLE " + CassandraDB.KEYSPACE + "." + cfName +
+ " ( " +
+ COLUMN_SUB + " bigint, " +
+ COLUMN_PRE + " bigint, " +
+ COLUMN_OBJ + " bigint, " +
+ COLUMN_RULE + " int, " +
+ COLUMN_V1 + " bigint, " +
+ COLUMN_V2 + " bigint, " +
+ COLUMN_V3 + " bigint, " +
+ COLUMN_TRANSITIVE_LEVELS + " int, " +
+ "PRIMARY KEY((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_RULE +
+ "), " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 +
+ ")) WITH compaction = {'class': 'LeveledCompactionStrategy'}";
+ return STEPS_SCHEMA;
+ }
+
+ public static String getStepsStatement(int step){
+ String query = "INSERT INTO " + CassandraDB.KEYSPACE + ".step" + step +
+ " (sub, pre, obj, rule, v1, v2, v3, transitivelevel) VALUES(?, ?, ?, ?, ?, ?, ?, ?)";
+ return query;
+ }
+
+ public static String getStepsStatement(String cfName){
+ String query = "INSERT INTO " + CassandraDB.KEYSPACE + "." + cfName +
+ " (sub, pre, obj, rule, v1, v2, v3, transitivelevel) VALUES(?, ?, ?, ?, ?, ?, ?, ?)";
+ return query;
+ }
+
+ public static String getAlltripleStatement(){
+ return ("INSERT INTO " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES +
+ " (sub, pre, obj, isliteral, tripletype, inferredsteps) VALUES(?, ?, ?, ?, ?, ?)");
+ }
+ */
+
private static void setupTables(Cassandra.Iface client)
throws InvalidRequestException,
UnavailableException,
TimedOutException,
SchemaDisagreementException,
TException {
+
// Create justifications table
String query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS +
@@ -171,9 +323,10 @@ private static void setupTables(Cassandra.Iface client)
COLUMN_V2 + " bigint, " +
COLUMN_V3 + " bigint, " +
// COLUMN_TRIPLE_TYPE + " int, " +
- COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key
- " PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_IS_LITERAL + "), " +
- COLUMN_TRIPLE_TYPE + ", " + COLUMN_RULE + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 +
+ COLUMN_INFERRED_STEPS + " int, " + // from this line, the fields are non-primary key
+ COLUMN_TRANSITIVE_LEVELS + " int, " +
+ " PRIMARY KEY ((" + COLUMN_IS_LITERAL + ", " + COLUMN_RULE + ", " + COLUMN_SUB + "), " + COLUMN_TRIPLE_TYPE +
+ ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 +
//", " + COLUMN_TRIPLE_TYPE +
" ) ) ";
@@ -184,6 +337,7 @@ private static void setupTables(Cassandra.Iface client)
logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS, e);
}
+
// Create resources table
query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_RESOURCES +
" ( " +
@@ -199,12 +353,13 @@ private static void setupTables(Cassandra.Iface client)
logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_RESOURCES, e);
}
+
// Create results table
query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_RESULTS +
" ( " +
- COLUMN_ID + " uuid, " +
+ "id" + " int, " +
COLUMN_JUSTIFICATION + " set>>, " +
- " PRIMARY KEY (" + COLUMN_ID + ") ) ";
+ " PRIMARY KEY (" + "id" + ") ) ";
try {
logger.info("set up table " + COLUMNFAMILY_RESULTS);
@@ -213,7 +368,67 @@ private static void setupTables(Cassandra.Iface client)
catch (InvalidRequestException e) {
logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_RESULTS, e);
}
+
+
+ //Create resultrow table
+ String cquery = "CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + "." + "resultrows" +
+ " ( " +
+ CassandraDB.COLUMN_IS_LITERAL + " boolean, " + // partition key
+ CassandraDB.COLUMN_RULE + " int, " +
+ CassandraDB.COLUMN_SUB + " bigint, " + // partition key
+ CassandraDB.COLUMN_TRIPLE_TYPE + " int, " +
+ CassandraDB.COLUMN_PRE + " bigint, " + // partition key
+ CassandraDB.COLUMN_OBJ + " bigint, " + // partition key
+ CassandraDB.COLUMN_V1 + " bigint, " +
+ CassandraDB.COLUMN_V2 + " bigint, " +
+ CassandraDB.COLUMN_V3 + " bigint, " +
+// COLUMN_TRIPLE_TYPE + " int, " +
+ CassandraDB.COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key
+ CassandraDB.COLUMN_TRANSITIVE_LEVELS + " int, " +
+ " PRIMARY KEY ((" + CassandraDB.COLUMN_IS_LITERAL + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_SUB + "), " +
+ CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 +
+ //", " + COLUMN_TRIPLE_TYPE +
+ " ) ) ";
+ client.execute_cql3_query(ByteBufferUtil.bytes(cquery), Compression.NONE, ConsistencyLevel.ONE);
+
+ /*
+ * 建立索引可能失败
+ */
+
+// String indexQuery = "CREATE INDEX on resultrows (sub) ;";
+// CqlPreparedResult indexPreparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(indexQuery), Compression.NONE);
+// indexQuery = "CREATE INDEX on resultrows (obj) ;";
+// indexPreparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(indexQuery), Compression.NONE);
+// indexQuery = "CREATE INDEX on resultrows (pre) ;";
+// indexPreparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(indexQuery), Compression.NONE);
+// indexQuery = "CREATE INDEX on resultrows (isliteral) ;";
+// indexPreparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(indexQuery), Compression.NONE);
+
+
+ /*
+ //创建所有三元组的表
+ cquery = "CREATE TABLE " + KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES +
+ " ( " +
+ COLUMN_SUB + " bigint, " + // partition key
+ COLUMN_PRE + " bigint, " + // partition key
+ COLUMN_OBJ + " bigint, " + // partition key
+ COLUMN_IS_LITERAL + " boolean, " + // partition key
+ COLUMN_TRIPLE_TYPE + " int, " +
+ COLUMN_INFERRED_STEPS + " int, " +
+ "PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + "," + COLUMN_OBJ +
+ ")) ) WITH compaction = {'class': 'LeveledCompactionStrategy'}";
+
+ try {
+ logger.info("set up table " + "all triples");
+ client.execute_cql3_query(ByteBufferUtil.bytes(cquery), Compression.NONE, ConsistencyLevel.ONE);
+ } catch (InvalidRequestException e) {
+ logger.error("failed to create table " + KEYSPACE + "." + "AllTriples", e);
+ }
+
+ query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + "(" + COLUMN_TRIPLE_TYPE + ")";
+ client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+ */
}
@@ -223,20 +438,59 @@ public CassandraDB() throws TTransportException {
}
- public CassandraDB(String host, Integer port) throws TTransportException {
- client = createConnection(host, port);
+ public void CassandraDBClose(){
+ this.close();
}
public void init() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{
setupKeyspace(client);
client.set_keyspace(KEYSPACE);
setupTables(client);
+
+ createIndexOnTripleType();
+ createIndexOnresultrows();
+
}
public Cassandra.Iface getDBClient(){
return client;
}
+
+ /**
+ * Get the row count according to the COLUMN_INFERRED_STEPS.
+ * @return row count.
+ */
+
+ /*
+ * Need to change
+ */
+
+ public long getRowCountAccordingInferredSteps(int level){
+ //ALLOW FILTERING
+ String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS +
+ " WHERE " + COLUMN_INFERRED_STEPS + " = " + level + " ALLOW FILTERING";
+
+ long num = 0;
+ try {
+ CqlResult cqlresult = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+ num = ByteBufferUtil.toLong(cqlresult.getRows().get(0).getColumns().get(0).value);
+ } catch (InvalidRequestException e) {
+ e.printStackTrace();
+ } catch (UnavailableException e) {
+ e.printStackTrace();
+ } catch (TimedOutException e) {
+ e.printStackTrace();
+ } catch (SchemaDisagreementException e) {
+ e.printStackTrace();
+ } catch (TException e) {
+ e.printStackTrace();
+ }
+
+ return num;
+ }
+
+
//TriplesUtils.SYNONYMS_TABLE
//TriplesUtils.TRANSITIVE_TRIPLE
//TriplesUtils.DATA_TRIPLE_SAME_AS
@@ -245,9 +499,11 @@ public Cassandra.Iface getDBClient(){
* @return row count.
*/
public long getRowCountAccordingTripleType(int tripletype){
+ //ALLOW FILTERING
+
String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS +
- " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype;
-
+ " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " ALLOW FILTERING";
+// System.out.println(query);
long num = 0;
try {
CqlResult cqlresult = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
@@ -267,14 +523,51 @@ public long getRowCountAccordingTripleType(int tripletype){
return num;
}
+
+ /**
+ * Get the row count according to the triple type.
+ * @return row count.
+ */
+ public long getRowCountAccordingTripleTypeWithLimitation(int tripletype, int limit){
+ //ALLOW FILTERING
+ String query = "";
+ if (limit <= 0)
+ query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS +
+ " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " ALLOW FILTERING";
+ else
+ query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS +
+ " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " LIMIT " + limit + " ALLOW FILTERING ";
+
+ long num = 0;
+ try {
+ CqlResult cqlresult = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+ num = ByteBufferUtil.toLong(cqlresult.getRows().get(0).getColumns().get(0).value);
+ } catch (InvalidRequestException e) {
+ e.printStackTrace();
+ } catch (UnavailableException e) {
+ e.printStackTrace();
+ } catch (TimedOutException e) {
+ e.printStackTrace();
+ } catch (SchemaDisagreementException e) {
+ e.printStackTrace();
+ } catch (TException e) {
+ e.printStackTrace();
+ }
+
+ return num;
+ }
+
/**
* Get the row count according to the type of rule.
* @return row count.
*/
+ //modified
+ /*
public long getRowCountAccordingRule(int rule){
String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS +
" WHERE " + COLUMN_RULE + " = " + rule + " ALLOW FILTERING"; // must use ALLOW FILTERING
+ //modified
long num = 0;
try {
@@ -294,7 +587,7 @@ public long getRowCountAccordingRule(int rule){
return num;
}
-
+*/
public void insertResources(long id, String label) throws InvalidRequestException, TException{
String query = "INSERT INTO " + COLUMNFAMILY_RESOURCES +
@@ -305,7 +598,7 @@ public void insertResources(long id, String label) throws InvalidRequestExceptio
args.add(ByteBufferUtil.bytes(label));
CqlPreparedResult p_result = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE);
CqlResult result = client.execute_prepared_cql3_query(p_result.itemId, args, ConsistencyLevel.ANY);
- logger.info("Number of results: " + result.getNum());
+ //logger.info("Number of results: " + result.getNum());
}
// TODO it's wrong!!!!!!!!!!
@@ -327,10 +620,10 @@ public static Triple readJustificationFromMapReduceRow(Row row){
long pre = row.getLong(CassandraDB.COLUMN_PRE);
long obj = row.getLong(CassandraDB.COLUMN_OBJ);
boolean isObjectLiteral = row.getBool(CassandraDB.COLUMN_IS_LITERAL);
- long v1 = row.getLong(CassandraDB.COLUMN_V1);
- long v2 = row.getLong(CassandraDB.COLUMN_V2);
- long v3 = row.getLong(CassandraDB.COLUMN_V3);
- int rule = row.getInt(CassandraDB.COLUMN_RULE);
+ long v1 = -1;
+ long v2 = -2;
+ long v3 = -3;
+ int rule = -4;
result.setObject(obj);
result.setObjectLiteral(isObjectLiteral);
@@ -348,7 +641,193 @@ public static int readStepFromMapReduceRow(Row row){
return step;
}
+ /*
+ public static void writeJustificationToMapReduceMultipleOutputsLessObjects(
+ Triple triple,
+ TripleSource source,
+ MultipleOutputs output,
+ Map keys,
+ Map allkeys,
+ List stepsValues,
+ List allTValues,
+ String stepname) throws IOException, InterruptedException{
+
+ keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject()));
+ keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate()));
+ keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject()));
+ keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int
+ keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long
+ keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long
+ keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long
+
+ allkeys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject()));
+ allkeys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate()));
+ allkeys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject()));
+
+ allTValues.add(ByteBufferUtil.bytes(triple.getSubject()));
+ allTValues.add(ByteBufferUtil.bytes(triple.getPredicate()));
+ allTValues.add(ByteBufferUtil.bytes(triple.getObject()));
+ //用数字直接替代。
+ allTValues.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{1}):ByteBuffer.wrap(new byte[]{0}));
+ allTValues.add(ByteBufferUtil.bytes(
+ TriplesUtils.getTripleType(
+ source, triple.getSubject(),
+ triple.getPredicate(),
+ triple.getObject())));
+ allTValues.add(ByteBufferUtil.bytes((int)source.getStep()));
+
+ stepsValues.add(ByteBufferUtil.bytes(triple.getSubject()));
+ stepsValues.add(ByteBufferUtil.bytes(triple.getPredicate()));
+ stepsValues.add(ByteBufferUtil.bytes(triple.getObject()));
+ stepsValues.add(ByteBufferUtil.bytes((int)triple.getType()));
+ stepsValues.add(ByteBufferUtil.bytes(triple.getRsubject()));
+ stepsValues.add(ByteBufferUtil.bytes(triple.getRpredicate()));
+ stepsValues.add(ByteBufferUtil.bytes(triple.getRobject()));
+ stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel()));
+
+ output.write(CassandraDB.COLUMNFAMILY_ALLTRIPLES, null, allTValues);
+ output.write(stepname, null, stepsValues);
+
+
+ keys.clear();
+ allkeys.clear();
+ allTValues.clear();
+ stepsValues.clear();
+
+ }
+ */
+
+ /*
+ public static void writeJustificationToMapReduceMultipleOutputs(
+ Triple triple,
+ TripleSource source,
+ MultipleOutputs output,
+ String stepname) throws IOException, InterruptedException{
+ Map keys = new LinkedHashMap();
+ Map allkeys = new LinkedHashMap();
+ List allvariables = new ArrayList();
+// long time = System.currentTimeMillis();
+
+ byte one = 1;
+ byte zero = 0;
+ // Prepare composite key (sub, pre, obj)
+ keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject()));
+ keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate()));
+ keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject()));
+ // the length of boolean type in cassandra is one byte!!!!!!!!
+ keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int
+ keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long
+ keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long
+ keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long
+
+ allkeys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject()));
+ allkeys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate()));
+ allkeys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject()));
+
+ allvariables.add(ByteBufferUtil.bytes(source.getStep()));
+ allvariables.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero}));
+ allvariables.add(ByteBufferUtil.bytes((int)triple.getType()));
+
+ // Prepare variables
+ List variables = new ArrayList();
+// variables.add(ByteBufferUtil.bytes(oValue.getSubject()));
+ // the length of boolean type in cassandra is one byte!!!!!!!!
+ // For column inferred, init it as false i.e. zero
+ //variables.add(ByteBuffer.wrap(new byte[]{zero}));
+
+ variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel()));
+
+
+
+ // Keys are not used for
+ // CqlBulkRecordWriter.write(Object key, List values),
+ // so it can be set to null.
+ // Only values are used there where the value correspond to
+ // the insert statement set in CqlBulkOutputFormat.setColumnFamilyInsertStatement()
+ // All triples columnfamily:
+ // sub, pre, obj, isliteral, tripletype, inferredsteps
+ // Steps columnfamily:
+ // sub, pre, obj, rule, v1, v2, v3, transitivelevel
+
+ List allTValues = new ArrayList();
+ allTValues.add(ByteBufferUtil.bytes(triple.getSubject()));
+ allTValues.add(ByteBufferUtil.bytes(triple.getPredicate()));
+ allTValues.add(ByteBufferUtil.bytes(triple.getObject()));
+ allTValues.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero}));
+ allTValues.add(ByteBufferUtil.bytes(
+ TriplesUtils.getTripleType(
+ source, triple.getSubject(),
+ triple.getPredicate(),
+ triple.getObject())));
+ allTValues.add(ByteBufferUtil.bytes((int)source.getStep()));
+
+ List stepsValues = new ArrayList();
+ stepsValues.add(ByteBufferUtil.bytes(triple.getSubject()));
+ stepsValues.add(ByteBufferUtil.bytes(triple.getPredicate()));
+ stepsValues.add(ByteBufferUtil.bytes(triple.getObject()));
+ stepsValues.add(ByteBufferUtil.bytes((int)triple.getType()));
+ stepsValues.add(ByteBufferUtil.bytes(triple.getRsubject()));
+ stepsValues.add(ByteBufferUtil.bytes(triple.getRpredicate()));
+ stepsValues.add(ByteBufferUtil.bytes(triple.getRobject()));
+ stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel()));
+
+// time = System.currentTimeMillis();
+ output.write(CassandraDB.COLUMNFAMILY_ALLTRIPLES, null, allTValues);
+// System.out.println("wrote all " + (System.currentTimeMillis() - time));
+// System.out.println("write all " + (System.currentTimeMillis() - time));// _output.write(stepname, keys, variables);
+// time = System.currentTimeMillis();
+ output.write(stepname, null, stepsValues);
+// System.out.println("wrote steps" + (System.currentTimeMillis() - time));
+
+
+ }
+ */
+/*
public static void writeJustificationToMapReduceContext(
+ Triple triple,
+ TripleSource source,
+ Context context,
+ String stepname) throws IOException, InterruptedException{
+ Map keys = new LinkedHashMap();
+ Map allkeys = new LinkedHashMap();
+ List allvariables = new ArrayList();
+ long time = System.currentTimeMillis();
+
+ byte one = 1;
+ byte zero = 0;
+
+ // Prepare composite key (sub, pre, obj)
+ keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject()));
+ keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate()));
+ keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject()));
+ // the length of boolean type in cassandra is one byte!!!!!!!!
+ keys.put(CassandraDB.COLUMN_IS_LITERAL,
+ triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero}));
+ int tripletype = TriplesUtils.DATA_TRIPLE;
+ if (triple.getType()==TriplesUtils.OWL_HORST_SYNONYMS_TABLE){
+ tripletype = TriplesUtils.SYNONYMS_TABLE; // In this way, we can provide a special triple type for triples in synonyms table
+ }else{
+ tripletype = TriplesUtils.getTripleType(source, triple.getSubject(), triple.getPredicate(), triple.getObject());
+ }
+ keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(tripletype)); // Modified by WuGang 20150109
+ keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int
+ keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long
+ keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long
+ keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long
+
+ // Prepare variables
+ List variables = new ArrayList();
+// variables.add(ByteBufferUtil.bytes(oValue.getSubject()));
+ // the length of boolean type in cassandra is one byte!!!!!!!!
+ // For column inferred, init it as false i.e. zero
+// variables.add(ByteBuffer.wrap(new byte[]{zero}));
+ variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple
+ variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); // It corresponds to COLUMN_TRANSITIVE_LEVEL, only useful in owl's transitive
+ context.write(keys, variables);
+ }
+*/
+ /*
+ public static void writealltripleToMapReduceContext(
Triple triple,
TripleSource source,
Context context) throws IOException, InterruptedException{
@@ -383,8 +862,73 @@ public static void writeJustificationToMapReduceContext(
// For column inferred, init it as false i.e. zero
// variables.add(ByteBuffer.wrap(new byte[]{zero}));
variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple
+ variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); // It corresponds to COLUMN_TRANSITIVE_LEVEL, only useful in owl's transitive
context.write(keys, variables);
+ }
+ */
+
+ public static void writeJustificationToMapReduceContext(
+ Triple triple,
+ TripleSource source,
+ Context context) throws IOException, InterruptedException{
+ Map keys = new LinkedHashMap();
+
+ byte one = 1;
+ byte zero = 0;
+
+ // Prepare composite key (sub, pre, obj)
+// keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject()));
+// keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate()));
+// keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject()));
+// // the length of boolean type in cassandra is one byte!!!!!!!!
+// keys.put(CassandraDB.COLUMN_IS_LITERAL,
+// triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero}));
+// int tripletype = TriplesUtils.DATA_TRIPLE;
+// if (triple.getType()==TriplesUtils.OWL_HORST_SYNONYMS_TABLE){
+// tripletype = TriplesUtils.SYNONYMS_TABLE; // In this way, we can provide a special triple type for triples in synonyms table
+// }else{
+// tripletype = TriplesUtils.getTripleType(source, triple.getSubject(), triple.getPredicate(), triple.getObject());
+// }
+// keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(tripletype)); // Modified by WuGang 20150109
+// keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int
+// keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long
+// keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long
+// keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long
+
+ // Prepare variables
+ List variables = new ArrayList();
+// variables.add(ByteBufferUtil.bytes(oValue.getSubject()));
+ // the length of boolean type in cassandra is one byte!!!!!!!!
+ // For column inferred, init it as false i.e. zero
+// variables.add(ByteBuffer.wrap(new byte[]{zero}));
+
+ int tripletype = TriplesUtils.DATA_TRIPLE;
+ if (triple.getType() == TriplesUtils.OWL_HORST_SYNONYMS_TABLE) {
+ tripletype = TriplesUtils.SYNONYMS_TABLE;
+ } else {
+ tripletype = TriplesUtils.getTripleType(source,
+ triple.getSubject(), triple.getPredicate(),
+ triple.getObject());
+ }
+
+
+ variables.add(triple.isObjectLiteral() ? ByteBuffer
+ .wrap(new byte[] { one }) : ByteBuffer
+ .wrap(new byte[] { zero }));
+ variables.add(ByteBufferUtil.bytes((int) triple.getType()));
+ variables.add(ByteBufferUtil.bytes(triple.getSubject()));
+
+ variables.add(ByteBufferUtil.bytes(tripletype));
+ variables.add(ByteBufferUtil.bytes(triple.getPredicate()));
+ variables.add(ByteBufferUtil.bytes(triple.getObject()));
+ variables.add(ByteBufferUtil.bytes(triple.getRsubject()));
+ variables.add(ByteBufferUtil.bytes(triple.getRpredicate()));
+ variables.add(ByteBufferUtil.bytes(triple.getRobject()));
+
+ variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple
+ variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel()));
+ context.write(null, variables);
}
public boolean loadSetIntoMemory(Set schemaTriples, Set filters, int previousStep) throws IOException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException {
@@ -409,11 +953,15 @@ public static Set> getJustifications() throws InvalidRequestExce
// String query = "SELECT " + COLUMN_JUSTIFICATION + " FROM " + KEYSPACE + "." + COLUMNFAMILY_RESULTS;
SimpleClientDataStax scds = new SimpleClientDataStax();
scds.connect(DEFAULT_HOST);
- Statement statement = QueryBuilder.select().all().from(KEYSPACE, COLUMNFAMILY_RESULTS);
+
+ //Modified 2015-6-25
+ //From COLUMNFAMILY_RESULTS to justifications ??\\
+ Statement statement = QueryBuilder.select().all().from(KEYSPACE, "results").where(QueryBuilder.eq("id", OWLHorstJustification.id));
List rows = scds.getSession().execute(statement).all();
for (Row row : rows){
- Set testResult = row.getSet(COLUMN_JUSTIFICATION, TupleValue.class);
+ //modified
+ Set testResult = row.getSet("justification", TupleValue.class);
Set> toBeDeletedFromResults = new HashSet>(); // Perform delete these from the results
boolean beAdded = true;
for (Set currentResult : results){
@@ -426,7 +974,7 @@ else if (currentResult.containsAll(testResult)){
toBeDeletedFromResults.add(currentResult);
}
}
- if (beAdded) // The testResult is a candidate justification
+ if (beAdded) // The testResul2.5 getTracingEntries(Triple triple) throws InvalidRequestExcepti
byte zero = 0;
Set tracingEntries = new HashSet();
- String query = "SELECT * FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + " WHERE " +
- COLUMN_SUB + "=? AND " + COLUMN_PRE + "=? AND " + COLUMN_OBJ + "=? AND " + COLUMN_IS_LITERAL + "=?";
+ //Fixed 2016/4/13
+
+ String query = "SELECT * FROM " + KEYSPACE + "." + "resultrows" + " WHERE " +
+ COLUMN_SUB + "=? AND " + COLUMN_PRE + "=? AND " + COLUMN_OBJ + "=? AND " + COLUMN_IS_LITERAL + "=? ALLOW FILTERING";
CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE);
List list = new ArrayList();
list.add(ByteBufferUtil.bytes(triple.getSubject()));
@@ -507,20 +1057,32 @@ public boolean loadSetIntoMemory(
logger.info("In CassandraDB's loadSetIntoMemory");
// Require an index created on COLUMN_TRIPLE_TYPE column
- String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS +
+ /*
+ * Be Attention
+ * add ALLOW FILTERING
+ * 2015/6/12
+ */
+
+
+ String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS +
" FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS +
- " WHERE " + COLUMN_TRIPLE_TYPE + " = ? ";
-// System.out.println(query);
+ " WHERE " + COLUMN_TRIPLE_TYPE + " = ? ALLOW FILTERING";
+ System.out.println(query);
CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE);
for (int filter : filters){
List list = new ArrayList();
list.add(ByteBufferUtil.bytes(filter));
+// System.out.println("filter " + filter);
CqlResult result = client.execute_prepared_cql3_query(preparedResult.itemId, list, ConsistencyLevel.ONE);
- for(CqlRow row : result.rows){
+ Iterator it =result.getRowsIterator();
+ while(it.hasNext() ){
+ CqlRow row = it.next();
+// for(CqlRow row : result.rows){
Iterator columnsIt = row.getColumnsIterator();
Long sub = null, obj = null;
+// System.out.println("row : " + row);
while (columnsIt.hasNext()) {
Column column = columnsIt.next();
if (new String(column.getName()).equals(COLUMN_SUB))
@@ -534,9 +1096,11 @@ public boolean loadSetIntoMemory(
}
}
if (!inverted)
- schemaTriples.add(sub);
+ schemaTriples.add(sub);
else
schemaTriples.add(obj);
+
+ System.out.println("schema : " + schemaTriples);
}
}
@@ -550,7 +1114,12 @@ public Map> loadMapIntoMemory(Set filters) throw
return loadMapIntoMemory(filters, false);
}
- // ���ص�key����triple��subject��value��object
+ // ���ص�key����triple��subject��value��object
+ /*
+ * Be Attention
+ * add ALLOW FILTERING
+ * 2015/6/12
+ */
public Map> loadMapIntoMemory(Set filters, boolean inverted) throws IOException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException {
long startTime = System.currentTimeMillis();
@@ -561,7 +1130,7 @@ public Map> loadMapIntoMemory(Set filters, boole
// Require an index created on COLUMN_TRIPLE_TYPE column
String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS +
" FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS +
- " WHERE " + COLUMN_TRIPLE_TYPE + " = ? ";
+ " WHERE " + COLUMN_TRIPLE_TYPE + " = ? ALLOW FILTERING"; //partitonkey
CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE);
@@ -599,7 +1168,7 @@ public Map> loadMapIntoMemory(Set filters, boole
}
}
- logger.debug("Time for CassandraDB's loadMapIntoMemory " + (System.currentTimeMillis() - startTime));
+ logger.info("Time for CassandraDB's loadMapIntoMemory " + (System.currentTimeMillis() - startTime));
return schemaTriples;
}
@@ -611,26 +1180,246 @@ public void createIndexOnTripleType() throws InvalidRequestException, Unavailabl
client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
}
- public void createIndexOnRule() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{
- String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_RULE + ")";
+ public void createIndexOnInferredSteps() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{
+ String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_INFERRED_STEPS + ")";
+ client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+ }
+
+ public void createIndexOnresultrows() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{
+
+ String query = "CREATE INDEX on resultrows (sub) ;";
client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+ query = "CREATE INDEX on resultrows (obj) ;";
+ client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+ query = "CREATE INDEX on resultrows (pre) ;";
+ client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+ query = "CREATE INDEX on resultrows (isliteral) ;";
+ client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+
}
+// public void createIndexOnRule() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{
+// String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_RULE + ")";
+// client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+// }
+//
+//
+// public void createIndexOnTransitiveLevel() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{
+// String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_TRANSITIVE_LEVELS + ")";
+// client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+// }
+
+ /*
+
+ public void Index() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{
+ //createIndexOnInferredSteps();
+ createIndexOnRule();
+ createIndexOnTransitiveLevel();
+ createIndexOnTripleType();
+ System.out.println("IndexED");
+ }
+
+ public void DropTripleTypeIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{
+ String query = "DROP INDEX mrjks.justifications_tripletype_idx";
+ client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+ }
+
+ public void DropRuleIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{
+ String query = "DROP INDEX mrjks.justifications_rule_idx";
+ client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+ }
+
+ public void DropInferredStepsIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{
+ String query = "DROP INDEX mrjks.justifications_inferredSteps_idx";
+ client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+ }
+
+ public void DropTransitiveLevelIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{
+ String query = "DROP INDEX mrjks.justifications_transitiveLevel_idx";
+ client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE);
+ }
+
+ public void UnIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{
+
+ this.DropInferredStepsIndex();
+ this.DropRuleIndex();
+ this.DropTransitiveLevelIndex();
+ this.DropTripleTypeIndex();
+ }
+ */
+ // Added by WuGang 2015-06-08
+
+ public static ResultSet getRows(){
+ Builder builder = Cluster.builder();
+ builder.addContactPoint(DEFAULT_HOST);
+ SocketOptions socketoptions= new SocketOptions().setKeepAlive(true).setReadTimeoutMillis(10 * 10000).setConnectTimeoutMillis(5 * 10000);
+ Cluster clu = builder.build();
+ Session session = clu.connect();
+ SimpleStatement statement = new SimpleStatement("SELECT sub, obj, pre, isliteral FROM mrjks.justifications where inferredsteps = 0");
+ statement.setFetchSize(100);
+ ResultSet results = session.execute(statement);
+ System.out.println("------------------" + results + "--------------");
+ return results;
+ }
+
+ public static boolean delornot = false;
+/*
+ public static void removeOriginalTriples(){
+ if (delornot == true)
+ return;
+ delornot = true;
+ //ִ�в�Ӧ�жϡ�
+ Builder builder = Cluster.builder();
+ builder.addContactPoint(DEFAULT_HOST);
+ SocketOptions socketoptions= new SocketOptions().setKeepAlive(true).setReadTimeoutMillis(10 * 10000).setConnectTimeoutMillis(5 * 10000);
+ Cluster clu = builder.build();
+ Session session = clu.connect();
+
+ String cquery1 = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + "." + "ruleiszero" +
+ " ( " +
+ COLUMN_SUB + " bigint, " + // partition key
+ COLUMN_PRE + " bigint, " + // partition key
+ COLUMN_OBJ + " bigint, " + // partition key
+ COLUMN_IS_LITERAL + " boolean, " + // partition key
+ COLUMN_TRIPLE_TYPE + " int, " +
+ COLUMN_RULE + " int, " +
+ COLUMN_V1 + " bigint, " +
+ COLUMN_V2 + " bigint, " +
+ COLUMN_V3 + " bigint, " +
+// COLUMN_TRIPLE_TYPE + " int, " +
+ COLUMN_INFERRED_STEPS + " int, " + // from this line is non-primary key
+ COLUMN_TRANSITIVE_LEVELS + " int, " +
+ " PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_IS_LITERAL + "), " +
+ COLUMN_TRIPLE_TYPE + ", " + COLUMN_RULE + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 +
+ //", " + COLUMN_TRIPLE_TYPE +
+ " ) ) ";
+ session.execute(cquery1);
+
+ //SELECT ALL AND DEL ALL
+ SimpleStatement statement = new SimpleStatement("SELECT * FROM mrjks.justifications");
+ statement.setFetchSize(100);
+ ResultSet results = session.execute(statement);
+
+ System.out.println("--------DEL ALL----------");
+ for (Row row : results){
+
+ if(row.getInt(COLUMN_RULE) != 0){
+ session.execute("INSERT INTO mrjks.ruleiszero(sub, pre, obj, isliteral, tripletype, rule, v1, v2, v3, inferredsteps)" +
+ "VALUES (" +
+ row.getLong(COLUMN_SUB) + "," +
+ row.getLong(COLUMN_PRE) + "," +
+ row.getLong(COLUMN_OBJ) + "," +
+ row.getBool(COLUMN_IS_LITERAL) + "," +
+ row.getInt(COLUMN_TRIPLE_TYPE) + "," +
+ row.getInt(COLUMN_RULE) + "," +
+ row.getLong(COLUMN_V1) + "," +
+ row.getLong(COLUMN_V2) + "," +
+ row.getLong(COLUMN_V3) + "," +
+ row.getInt(COLUMN_INFERRED_STEPS) + ");");
+ System.out.println("-------Insert ----------");
+ System.out.println(row);
+ }
+
+ Statement delete = QueryBuilder.delete()
+ .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS)
+ .where(QueryBuilder.eq(COLUMN_SUB, row.getLong(CassandraDB.COLUMN_SUB)))
+ .and(QueryBuilder.eq(COLUMN_PRE, row.getLong(CassandraDB.COLUMN_PRE)))
+ .and(QueryBuilder.eq(COLUMN_OBJ, row.getLong(CassandraDB.COLUMN_OBJ)))
+ .and(QueryBuilder.eq(COLUMN_IS_LITERAL, row.getBool(COLUMN_IS_LITERAL)));
+ session.execute(delete);
+ System.out.println(row);
+ }
+ */
+// SimpleClientDataStax scds = new SimpleClientDataStax();
+// scds.connect(DEFAULT_HOST);
+//
+// System.out.println("Select Primary Key");
+// //modified select partition key and delete using partition key
+// Statement select = QueryBuilder.select()
+// .all()
+// .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS)
+// .where(QueryBuilder.eq(COLUMN_INFERRED_STEPS, 0));
+// select.setFetchSize(100);
+// ResultSet result = scds.getSession().execute(select);
+// //List rows = scds.getSession().executeAsync(statement);
+// //List rows = scds.getSession().execute(select).all();
+//
+// while(true){
+// Row delrow = result.one();
+// if(delrow == null)
+// break;
+// Where dQuery = QueryBuilder.delete()
+// .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS)
+// .where(QueryBuilder.eq(COLUMN_SUB, delrow.getLong(CassandraDB.COLUMN_SUB)))
+// .and(QueryBuilder.eq(COLUMN_PRE, delrow.getLong(CassandraDB.COLUMN_PRE)))
+// .and(QueryBuilder.eq(COLUMN_OBJ, delrow.getLong(CassandraDB.COLUMN_OBJ)))
+// .and(QueryBuilder.eq(COLUMN_IS_LITERAL, delrow.getBool(COLUMN_IS_LITERAL)));
+// System.out.println(delrow);
+// session.execute(dQuery);
+// }
+
+// Where dQuery = QueryBuilder.delete()
+// .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS)
+// .where(QueryBuilder.eq(COLUMN_RULE, ByteBufferUtil.bytes(0)));
+// scds.getSession().execute(dQuery);
+
+// scds.close();
+
+// }
+
+ //create by LiYang
+// public static void createReasonTable(){
+// SimpleClientDataStax scds = new SimpleClientDataStax();
+// scds.connect(DEFAULT_HOST);
+// //Statement st = QueryBuilder
+//
+// for (int i = 1; i <= 7; i++ ){
+// System.out.println("Select Primary Key");
+// //modified select partition key and delete using partition key
+// Statement select = QueryBuilder.select()
+// .all()
+// .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS)
+// .where(QueryBuilder.eq(COLUMN_INFERRED_STEPS, i));
+// select.setFetchSize(100);
+// ResultSet result = scds.getSession().execute(select);
+//
+// Session session = scds.getSession();
+// while(true){
+// Row insertrow = result.one();
+// if(insertrow == null)
+// break;
+// Insert insert = QueryBuilder
+// .insertInto(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS)
+// .value(COLUMN_SUB, insertrow.getLong(CassandraDB.COLUMN_SUB))
+// .value(COLUMN_PRE, insertrow.getLong(CassandraDB.COLUMN_PRE))
+// .value(COLUMN_OBJ, insertrow.getLong(CassandraDB.COLUMN_OBJ))
+// .value(COLUMN_IS_LITERAL, insertrow.getBool(COLUMN_IS_LITERAL))
+// .value(COLUMN_TRIPLE_TYPE, insertrow.getLong(CassandraDB.COLUMN_TRIPLE_TYPE))
+// .value(COLUMN_SUB, insertrow.getLong(CassandraDB.COLUMN_SUB));
+//
+// }
+// }
+// }
public static void main(String[] args) {
try {
- CassandraDB db = new CassandraDB("localhost", 9160);
- db.init();
- db.createIndexOnTripleType();
- db.createIndexOnRule();
+ CassandraDB db = new CassandraDB();
+ db.init();
+// db.createIndexOnTripleType();
+// db.createIndexOnRule();
+// db.createIndexOnInferredSteps();
+// db.createIndexOnTransitiveLevel();
// db.insertResources(100, "Hello World!");
Set schemaTriples = new HashSet();
Set filters = new HashSet();
filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY);
db.loadSetIntoMemory(schemaTriples, filters, 0);
+ //db.loadMapIntoMemory(filters, inverted)
+
System.out.println(schemaTriples);
+ //modified 2015/5/19
System.out.println("Transitive: " + db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE));
System.exit(0);
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CreateTables.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CreateTables.java
new file mode 100644
index 0000000..8280f5f
--- /dev/null
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CreateTables.java
@@ -0,0 +1,66 @@
+package cn.edu.neu.mitt.mrj.io.dbs;
+
+import org.apache.cassandra.transport.SimpleClient;
+
+import com.datastax.driver.core.Cluster;
+import com.datastax.driver.core.Host;
+import com.datastax.driver.core.Metadata;
+import com.datastax.driver.core.Session;
+
+public class CreateTables {
+ private Cluster cluster;
+ private Session session;
+
+ public Session getSession(){
+ return this.session;
+ }
+
+ public void connect(String node){
+ cluster = Cluster.builder()
+ .addContactPoint(node)
+ .build();
+ Metadata metadata = cluster.getMetadata();
+ System.out.printf("Connected to cluster: %s\n",
+ metadata.getClusterName());
+ for(Host host : metadata.getAllHosts()){
+ System.out.printf("Datatacenter: %s; Host: %s; Rack: %s|n",
+ host.getDatacenter(), host.getAddress(), host.getRack());
+ }
+ session = cluster.connect();
+ }
+
+ //javaDriver21
+ public void createSchema(Integer step){
+ session.execute("CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + ".step" + step +
+ " ( " +
+ "sub" + " bigint, " +
+ "pre" + " bigint, " +
+ "obj" + " bigint, " +
+ "rule int, " +
+ "v1" + " bigint, " +
+ "v2" + " bigint, " +
+ "v3" + " bigint, " +
+ "transitivelevel int" +
+ ", primary key((sub, pre, obj, rule) ,v1, v2, v3 )) WITH compaction = {'class': 'LeveledCompactionStrategy'}");
+ }
+
+ public void close(){
+ session.close();
+ cluster.close();
+ }
+
+ public static void main(String args[]){
+ CreateTables client = new CreateTables();
+ client.connect(CassandraDB.DEFAULT_HOST);
+ for (int i = 1; i < 14; i++) {
+ client.createSchema(i);
+ try {
+ Thread.sleep(1000);
+ } catch (InterruptedException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
+ client.close();
+ }
+}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java
new file mode 100644
index 0000000..9bf3734
--- /dev/null
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java
@@ -0,0 +1,146 @@
+/**
+ *
+ */
+package cn.edu.neu.mitt.mrj.io.dbs;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.cassandra.hadoop.ConfigHelper;
+import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
+import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/**
+ * @author L
+ *
+ */
+
+public class MrjMultioutput extends MultipleOutputs {
+
+ private Map taskContexts = new HashMap();
+
+ public MrjMultioutput(TaskInputOutputContext, ?, KEYOUT, VALUEOUT> context) {
+ super(context);
+ }
+
+
+
+ //This is copied from hadoop 0.23.11
+ // maybe resolve the problem of construct job redundantly
+ @Override
+ protected TaskAttemptContext getContext(String nameOutput)
+ throws IOException {
+ TaskAttemptContext taskContext = taskContexts.get(nameOutput);
+
+ if (taskContext != null) {
+ return taskContext;
+ }
+
+ // The following trick leverages the instantiation of a record writer via
+ // the job thus supporting arbitrary output formats.
+ Job job = new Job(context.getConfiguration());
+ job.setOutputFormatClass(getNamedOutputFormatClass(context, nameOutput));
+ job.setOutputKeyClass(getNamedOutputKeyClass(context, nameOutput));
+ job.setOutputValueClass(getNamedOutputValueClass(context, nameOutput));
+
+ taskContext = new TaskAttemptContext(
+ job.getConfiguration(), context.getTaskAttemptID());
+
+ taskContexts.put(nameOutput, taskContext);
+
+ return taskContext;
+ }
+
+
+
+ @Override
+ protected synchronized RecordWriter getRecordWriter(
+ TaskAttemptContext taskContext, String columnFamilyName)
+ throws IOException, InterruptedException {
+
+
+ // look for record-writer in the cache
+ RecordWriter writer = recordWriters.get(columnFamilyName);
+
+// System.out.println("get Record Writer");
+
+ // If not in cache, create a new one
+ if (writer == null) {
+ // get the record writer from context output format
+// FileOutputFormat.setOutputName(taskContext, baseFileName);
+// System.out.println("Before ConfigHelper.setOutputColumnFamily");
+// System.out.println(ConfigHelper.getOutputColumnFamily(taskContext.getConfiguration()));
+
+
+ ConfigHelper.setOutputColumnFamily(taskContext.getConfiguration(), columnFamilyName);
+// CqlConfigHelper.setOutputCql(taskContext.getConfiguration(), getCql(columnFamilyNameName));
+
+// CqlBulkOutputFormat.setColumnFamilySchema(
+// taskContext.getConfiguration(),
+// columnFamilyName,
+// getSchema(columnFamilyName));
+//
+// CqlBulkOutputFormat.setColumnFamilyInsertStatement(
+// taskContext.getConfiguration(),
+// columnFamilyName,
+// getInsertStatement(columnFamilyName));
+
+
+
+ try {
+// System.out.println(taskContext.getOutputFormatClass());
+ writer = ((OutputFormat) ReflectionUtils.newInstance(
+ taskContext.getOutputFormatClass(), taskContext.getConfiguration()))
+ .getRecordWriter(taskContext);
+
+// System.out.println(writer.getClass());
+ } catch (ClassNotFoundException e) {
+ throw new IOException(e);
+ }
+
+ // if counters are enabled, wrap the writer with context
+ // to increment counters
+ if (countersEnabled) {
+ writer = new MultipleOutputs.RecordWriterWithCounter(writer, columnFamilyName, context);
+ }
+
+ // add the record-writer to the cache
+ recordWriters.put(columnFamilyName, writer);
+ }
+ return writer;
+ }
+
+
+ String getCql(String columnFamilyNameName){
+ if (columnFamilyNameName == "alltriples") {
+ System.out.println("get cql allt");
+ return ("UPDATE alltriples SET inferredsteps =? , isliteral =? , tripletype =?");
+ }
+ System.out.println("get cql step");
+ return("UPDATE " + columnFamilyNameName + " SET transitivelevel =? ");
+ }
+
+// String getSchema(String columnFamilyNameName){
+//// System.out.println(columnFamilyNameName + " schema");
+// if (columnFamilyNameName == "alltriples") {
+// return CassandraDB.getAlltripleSchema();
+// }
+// return CassandraDB.getStepsSchema(columnFamilyNameName);
+// }
+//
+// String getInsertStatement(String columnFamilyNameName){
+//// System.out.println(columnFamilyNameName + " insert statement");
+// if (columnFamilyNameName == "alltriples") {
+// return CassandraDB.getAlltripleStatement();
+// }
+// return CassandraDB.getStepsStatement(columnFamilyNameName);
+// }
+
+}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java
index 0b848cd..c8a6157 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java
@@ -2,17 +2,22 @@
* Project Name: mrj-0.1
* File Name: OWLHorstJustification.java
* @author Gang Wu
- * 2015��2��5�� ����4:58:08
+ * 2015��2��5�� ����4:58:08
*
* Description:
* TODO
*/
package cn.edu.neu.mitt.mrj.justification;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
import java.io.IOException;
import java.net.URI;
import java.util.Set;
+import jdk.internal.dynalink.beans.StaticClass;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
@@ -29,13 +34,23 @@
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
+import org.omg.CORBA.PUBLIC_MEMBER;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import ch.qos.logback.core.Context;
import cn.edu.neu.mitt.mrj.data.Triple;
import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB;
+import cn.edu.neu.mitt.mrj.reasoner.Experiments;
import cn.edu.neu.mitt.mrj.utils.TripleKeyMapComparator;
+
+
+
+
+
+
+//modified cassandra java 2.0.5
import com.datastax.driver.core.TupleValue;
/**
@@ -53,6 +68,12 @@ public class OWLHorstJustification extends Configured implements Tool {
public static long pre = -1;
public static long obj = -1;
public static Path justificationsDirBase = new Path("/justification");
+
+ public static long totaltriples;
+ private static int tripleamount = 0;
+ public static int id; //??
+
+ private boolean bClearOriginals = false;
/**
*
@@ -79,6 +100,10 @@ public void parseArgs(String[] args) {
numMapTasks = Integer.valueOf(args[++i]);
if (args[i].equalsIgnoreCase("--reducetasks"))
numReduceTasks = Integer.valueOf(args[++i]);
+
+ // Added by WuGang 2015-06-08
+ if (args[i].equalsIgnoreCase("--clearoriginals"))
+ bClearOriginals = true;
}
}
@@ -93,7 +118,7 @@ public static void prepareInput(long sub, long pre, long obj, boolean literal) {
Configuration conf = new Configuration();
try {
int step = 0;
- Path justificationsDir = new Path(justificationsDirBase, String.valueOf(step)); // ����Ŀ¼�£�����һ����original���ļ����ڴ洢��ʼ��justification��triple
+ Path justificationsDir = new Path(justificationsDirBase, String.valueOf(step)); // ��Ŀ¼�£����һ����original���ļ����ڴ洢��ʼ��justification��triple
FileSystem fs = FileSystem.get(URI.create(justificationsDir.toString()), conf);
if (!fs.exists(justificationsDir)) {
SequenceFile.Writer writer = SequenceFile.createWriter(fs,
@@ -115,6 +140,9 @@ private Job createJustificationJob(int step) throws IOException {
// Job
Configuration conf = new JobConf();
conf.setInt("maptasks", numMapTasks);
+
+ conf.setInt("id", id);
+
Job job = new Job(conf);
job.setJobName("OWL Horst Justification - Step " + step);
job.setJarByClass(OWLHorstJustification.class);
@@ -142,61 +170,114 @@ private Job createJustificationJob(int step) throws IOException {
job.setOutputKeyClass(Triple.class); // reduce output key (in next loop it will be tried to expanded)
job.setOutputValueClass(MapWritable.class); // reduce output value is an explanation
job.setOutputFormatClass(SequenceFileOutputFormat.class);
-
+
return job;
}
public long launchClosure(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
+ parseArgs(args);
+
+ // Added by WuGang 2015-06-08
+// if (bClearOriginals)
+// CassandraDB.removeOriginalTriples();
+
+
long total = 0; // Total justifications
long newExpanded = -1; // count of explanations that expanded in this loop
long startTime = System.currentTimeMillis();
int step = 0;
- parseArgs(args);
+ id = Experiments.id + 200;
+ System.out.println("id : " + id);
+
+
prepareInput(sub, pre, obj, false); // Default it is not a literal.
+ File outputFile = new File("output");
+ outputFile.createNewFile();
+ BufferedWriter out = new BufferedWriter(new FileWriter(outputFile, true));
+
+// out.write("id : " + id + "\r\n");
+// System.out.println(sub + " " + pre + " " + obj);
+ out.write("id : " + id + "\r\n");
+ out.write("sub : " + sub + " pre : " + pre + " obj : " + obj + "\r\n");
+
+
// find justifications
do{
log.info(">>>>>>>>>>>>>>>>>>>> Processing justification in step - " + step + " <<<<<<<<<<<<<<<<<<<<<<<<<");
+
+ out.write("step : " + step + "\r\n");
+
+// out.write("total : " + totaltriples + "\r\n");
+
Job job = createJustificationJob(step);
-
+
job.waitForCompletion(true);
+// int Retotal = 0;
+// Retotal = conf.getInt("id", 111);
+ //需要在 job.waitForCompletion(true); 之后。
+ Long result = job.getCounters().findCounter("Triples", "Triples").getValue();
+ out.write("Reduce triples : " + result + "\r\n");
+
+
newExpanded = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue();
Counter counterToProcess = job.getCounters().findCounter("OWL Horst Justifications Job", "ExplanationOutputs");
total += counterToProcess.getValue();
+
+
step++;
}while (newExpanded > 0);
+ //modified cassandra java 2.0.5
CassandraDB db = null;
+
try{
- db = new CassandraDB("localhost", 9160);
+ db = new CassandraDB();
db.getDBClient().set_keyspace(CassandraDB.KEYSPACE);
Set> justifications = db.getJustifications();
int count = 0;
+
for (Set justification : justifications){
- System.out.println(">>>Justification - " + ++count + ":");
+// int tripleamount = 0;
+// System.out.println(">>>Justification - " + ++count + ":");
+// out.write(">>>Justification - " + ++count + ":" + "\r\n");
for(TupleValue triple : justification){
long sub = triple.getLong(0);
long pre = triple.getLong(1);
long obj = triple.getLong(2);
- System.out.println("\t<" + sub + ", " + pre + ", " + obj + ">" +
- " - <" + db.idToLabel(sub) + ", " + db.idToLabel(pre) + ", " + db.idToLabel(obj) + ">");
+// System.out.println("\t<" + sub + ", " + pre + ", " + obj + ">" +
+// " - <" + db.idToLabel(sub) + ", " + db.idToLabel(pre) + ", " + db.idToLabel(obj) + ">");
+// out.write("\t<" + sub + ", " + pre + ", " + obj + ">" +
+// " - <" + db.idToLabel(sub) + ", " + db.idToLabel(pre) + ", " + db.idToLabel(obj) + ">" + "\r\n");
+ tripleamount++;
}
+// System.out.println(tripleamount);
+ out.write("tripleamount : " + tripleamount + "\r\n");
}
+
+ db.CassandraDBClose();
+
}catch(Exception e){
System.err.println(e.getMessage());
}
-
-
-
+
System.out.println("Time (in seconds): " + (System.currentTimeMillis() - startTime) / 1000);
System.out.println("Number justifications: " + total);
+// out.write("tripleamount : " + tripleamount + "\r\n");
+
+ out.write("Time (in seconds): " + (System.currentTimeMillis() - startTime) / 1000 + "\r\n");
+ out.write("Number justifications: " + total + "\r\n\r\n");
+ out.flush();
+ out.close();
+
+
return total;
}
@@ -214,7 +295,7 @@ public int run(String[] args) throws Exception {
public static void main(String[] args) {
if (args.length < 2) {
- System.out.println("USAGE: OWLHorstJustification [DerivedTriples base path] [Justifications base path] [options]");
+ System.out.println("USAGE: OWLHorstJustification [options]");
return;
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationMapper.java
index df05ca2..d7a4bf0 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationMapper.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationMapper.java
@@ -13,6 +13,7 @@
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -393,5 +394,9 @@ protected void setup(Context context) throws IOException, InterruptedException {
te.printStackTrace();
}
}
+
+ protected void cleanup(Context context) throws IOException, InterruptedException{
+ db.CassandraDBClose();
+ }
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java
index d477f56..b80060a 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java
@@ -3,10 +3,17 @@
*/
package cn.edu.neu.mitt.mrj.justification;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
import java.io.IOException;
+import java.io.OutputStreamWriter;
import java.util.HashSet;
import java.util.Set;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Writable;
@@ -15,8 +22,10 @@
import cn.edu.neu.mitt.mrj.data.Triple;
import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB;
import cn.edu.neu.mitt.mrj.io.dbs.SimpleClientDataStax;
+import cn.edu.neu.mitt.mrj.reasoner.Experiments;
import com.datastax.driver.core.DataType;
+//modified cassandra java 2.0.5
import com.datastax.driver.core.TupleType;
import com.datastax.driver.core.TupleValue;
import com.datastax.driver.core.querybuilder.Insert;
@@ -33,15 +42,52 @@ public class OWLHorstJustificationReducer extends
// private static Logger log = LoggerFactory.getLogger(OWLHorstJustificationReducer.class);
private static SimpleClientDataStax sClient = null;
+ private long triplenum = 0;
+
@Override
protected void reduce(MapWritable key, Iterable values, Context context)
throws IOException, InterruptedException {
long total = 0;
+ int id = 0;
+ Configuration reduceconf = context.getConfiguration();
+ id = reduceconf.getInt("id", 2);
+
for (LongWritable count:values){
total += count.get();
}
-// System.out.println("Total count is: " + total);
+ triplenum = total;
+ System.out.println("Reduce total count is: " + total);
+ //modified cassandra java 2.0.5
+
+// reduceconf.setInt("id", (int)total);
+
+
+
+
+// File outputFile = new File("output");
+// outputFile.createNewFile();
+// BufferedWriter out = new BufferedWriter(new FileWriter(outputFile, true));
+// out.write("Total count is: " + total);
+// out.flush();
+// out.close();
+
+// try{
+// Path pt=new Path("./result");
+// FileSystem fs = FileSystem.get(new Configuration());
+// BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true)));
+// // TO append data to a file, use fs.append(Path f)
+// String line;
+// line="Total count is: " + total;
+// System.out.println(line);
+// br.write(line);
+// br.close();
+// }catch(Exception e){
+// System.out.println("File not found");
+// }
+
+// System.out.println("Reduce id : " + Experiments.id); //均是0
+
if (total == key.size()){ // Find a candidate justification, output it to the database
Set resultJustification = new HashSet();
@@ -52,15 +98,19 @@ protected void reduce(MapWritable key, Iterable values, Context co
theValue.setLong(1, ((Triple)triple).getPredicate());
theValue.setLong(2, ((Triple)triple).getObject());
resultJustification.add(theValue);
+ System.out.println(" _______ " + ((Triple)triple).getSubject());
}
+ System.out.println("Write a candidate justification to database=========== ");
+ System.out.println(resultJustification.toString());
// log.info("Write a candidate justification to database=========== ");
// log.info(resultJustification.toString());
+ System.out.println(" REDUCE id : " + id);
Insert insert = QueryBuilder
.insertInto(CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_RESULTS)
.value(CassandraDB.COLUMN_JUSTIFICATION, resultJustification)
- .value(CassandraDB.COLUMN_ID, UUIDs.timeBased());
+ .value("id", id);
sClient.getSession().execute(insert);
// Added by WuGang 2015-02-14
@@ -72,6 +122,8 @@ protected void reduce(MapWritable key, Iterable values, Context co
}
} // else do nothing.
+// OWLHorstJustification.totaltriples = total;
+
}
@Override
@@ -84,6 +136,8 @@ protected void setup(Context context)
@Override
protected void cleanup(Context context)
throws IOException, InterruptedException {
+ context.getCounter("Triples", "Triples").increment(triplenum);
+
sClient.close();
}
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/Experiments.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/Experiments.java
new file mode 100644
index 0000000..d856550
--- /dev/null
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/Experiments.java
@@ -0,0 +1,90 @@
+package cn.edu.neu.mitt.mrj.reasoner;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.util.Random;
+import java.util.concurrent.ThreadLocalRandom;
+
+import jdk.internal.dynalink.beans.StaticClass;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.ToolRunner;
+import org.omg.CORBA.PUBLIC_MEMBER;
+
+import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB;
+import cn.edu.neu.mitt.mrj.justification.OWLHorstJustification;
+
+import com.datastax.driver.core.Cluster;
+import com.datastax.driver.core.Host;
+import com.datastax.driver.core.Metadata;
+import com.datastax.driver.core.ResultSet;
+import com.datastax.driver.core.Row;
+import com.datastax.driver.core.Session;
+import com.datastax.driver.core.SocketOptions;
+import com.datastax.driver.core.Cluster.Builder;
+public class Experiments {
+
+ public static int id;
+
+ public static void main(String[] args){
+ Builder builder = Cluster.builder();
+ builder.addContactPoint(CassandraDB.DEFAULT_HOST);
+ SocketOptions socketoptions = new SocketOptions().setKeepAlive(true).setConnectTimeoutMillis(5 * 10000).setReadTimeoutMillis(100000);
+ builder.withSocketOptions(socketoptions);
+ Cluster cluster = builder.build();
+ Metadata metadata = cluster.getMetadata();
+ Session session = cluster.connect();
+
+// Random r = new Random(System.currentTimeMillis()) ;
+// int random = 0;
+// if (r.nextBoolean()) {
+// random = r.nextInt(101) ;
+// } else {
+// random = -r.nextInt(101) ;
+// }
+ for (id = 0; id < 10; id++) {
+ long random = ThreadLocalRandom.current().nextLong(-9223372036854775808L, 9223372036854775807L);
+// long startTime = System.currentTimeMillis();
+ ResultSet results = session.execute("SELECT sub ,pre ,obj FROM mrjks.resultrows WHERE TOKEN(isliteral , rule , sub ) > " + random + " LIMIT 1;");
+// System.out.println(results);
+ for (Row row : results){
+ Configuration conf = new Configuration();
+ try {
+ FileSystem hdfs = FileSystem.get(conf);
+ Path deledir= new Path("/justification");
+ boolean isDeleted=hdfs.delete(deledir,true);
+ } catch (IOException e1) {
+ e1.printStackTrace();
+ }
+
+// System.out.println("id : " + id);
+
+ Long sub, pre, obj;
+ sub = row.getLong("sub");
+ pre = row.getLong("pre");
+ obj = row.getLong("obj");
+ System.out.println("sub : " + sub + " pre : " + pre + " obj : " + obj);
+ //不能加空格
+ String[] argStrings = {"--maptasks" , "8" , "--reducetasks" , "8" , "--subject" , sub.toString() , "--predicate" , pre.toString() , "--object" , obj.toString() ,"--clearoriginals"};
+// OWLHorstJustification OWJ = new OWLHorstJustification();
+ System.out.println(argStrings);
+ OWLHorstJustification.main(argStrings);
+
+// try {
+// OWJ.launchClosure(argStrings);
+// } catch (ClassNotFoundException | IOException
+// | InterruptedException e) {
+// System.out.println("launchClosure error");
+// e.printStackTrace();
+// }
+
+ }
+// System.out.println("Time (in seconds): " + (System.currentTimeMillis() - startTime) / 1000);
+ }
+ cluster.close();
+ }
+}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java
index 03a6e64..dcc2125 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java
@@ -2,23 +2,27 @@
* Project Name: mrj-0.1
* File Name: MapReduceJobConfig.java
* @author Gang Wu
- * 2014��12��28�� ����10:44:16
+ * 2014��12��28�� ����10:44:16
*
* Description:
* TODO
*/
package cn.edu.neu.mitt.mrj.reasoner;
+
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.cassandra.hadoop.ConfigHelper;
+import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat;
import org.apache.cassandra.hadoop.cql3.CqlConfigHelper;
import org.apache.cassandra.hadoop.cql3.CqlInputFormat;
import org.apache.cassandra.hadoop.cql3.CqlOutputFormat;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB;
@@ -31,72 +35,115 @@ public class MapReduceReasonerJobConfig {
// Input from CassandraDB.COLUMNFAMILY_JUSTIFICATIONS
- private static void configureCassandraInput(Job job, Set filters) {
+ private static void configureCassandraInput(Job job, Set typeFilters, Set transitiveLevelFilters, int certainStep) {
//Set the input
- ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
+ ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host);
// Should not use 9160 port in cassandra 2.1.2 because new cql3 port is 9042, please refer to conf/cassandra.yaml
//ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
- ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner");
+ ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner);
ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS);
- if (filters.size() == 0){
- CqlConfigHelper.setInputCql(job.getConfiguration(),
- "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS +
- " WHERE TOKEN(" +
- CassandraDB.COLUMN_SUB + ", " +
- CassandraDB.COLUMN_PRE + ", " +
- CassandraDB.COLUMN_OBJ + ", " +
- CassandraDB.COLUMN_IS_LITERAL +
- ") > ? AND TOKEN(" +
- CassandraDB.COLUMN_SUB + ", " +
- CassandraDB.COLUMN_PRE + ", " +
- CassandraDB.COLUMN_OBJ + ", " +
- CassandraDB.COLUMN_IS_LITERAL +
- ") <= ? ALLOW FILTERING");
+ if (typeFilters.size() == 0){
+
+ if (transitiveLevelFilters.size() == 0)
+ CqlConfigHelper.setInputCql(job.getConfiguration(),
+ "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS +
+ " WHERE TOKEN(" +
+ CassandraDB.COLUMN_IS_LITERAL + ", " +
+ CassandraDB.COLUMN_RULE + ", " +
+ CassandraDB.COLUMN_SUB +
+ ") > ? AND TOKEN(" +
+ CassandraDB.COLUMN_IS_LITERAL + ", " +
+ CassandraDB.COLUMN_RULE + ", " +
+ CassandraDB.COLUMN_SUB +
+ ") <= ? ALLOW FILTERING");
+// "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS +
+// " WHERE TOKEN(" +
+// CassandraDB.COLUMN_SUB + ", " +
+// CassandraDB.COLUMN_PRE + ", " +
+// CassandraDB.COLUMN_OBJ + ", " +
+// CassandraDB.COLUMN_IS_LITERAL +
+// ") > ? AND TOKEN(" +
+// CassandraDB.COLUMN_SUB + ", " +
+// CassandraDB.COLUMN_PRE + ", " +
+// CassandraDB.COLUMN_OBJ + ", " +
+// CassandraDB.COLUMN_IS_LITERAL +
+// ") <= ? ALLOW FILTERING");
+ else{
+ Integer max = java.util.Collections.max(transitiveLevelFilters);
+ Integer min = java.util.Collections.min(transitiveLevelFilters);
+
+
+ CqlConfigHelper.setInputCql(job.getConfiguration(),
+ "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS +
+ " WHERE TOKEN(" +
+ CassandraDB.COLUMN_IS_LITERAL + ", " +
+ CassandraDB.COLUMN_RULE + ", " +
+ CassandraDB.COLUMN_SUB +
+ ") > ? AND TOKEN(" +
+ CassandraDB.COLUMN_IS_LITERAL + ", " +
+ CassandraDB.COLUMN_RULE + ", " +
+ CassandraDB.COLUMN_SUB +
+ ") <= ? " +
+// CassandraDB.COLUMN_INFERRED_STEPS + " = " + certainStep + " AND " +
+// CassandraDB.COLUMN_TRANSITIVE_LEVELS + " >= " + min + " AND " +
+// CassandraDB.COLUMN_TRANSITIVE_LEVELS + " <= " + max +
+ " ALLOW FILTERING");
+ }
+
}
- else if (filters.size() == 1){
+ else if (typeFilters.size() == 1){
+ if (transitiveLevelFilters.size() != 0){ // stepFilter is only for handling transitive property
+ System.err.println("This is not supported!!!");
+ return;
+ }
+
CqlConfigHelper.setInputCql(job.getConfiguration(),
"SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS +
" WHERE TOKEN(" +
- CassandraDB.COLUMN_SUB + ", " +
- CassandraDB.COLUMN_PRE + ", " +
- CassandraDB.COLUMN_OBJ + ", " +
- CassandraDB.COLUMN_IS_LITERAL +
+ CassandraDB.COLUMN_IS_LITERAL + ", " +
+ CassandraDB.COLUMN_RULE + ", " +
+ CassandraDB.COLUMN_SUB +
") > ? AND TOKEN(" +
- CassandraDB.COLUMN_SUB + ", " +
- CassandraDB.COLUMN_PRE + ", " +
- CassandraDB.COLUMN_OBJ + ", " +
- CassandraDB.COLUMN_IS_LITERAL +
- ") <= ? AND " +
- CassandraDB.COLUMN_TRIPLE_TYPE + " = " + filters.toArray()[0] +
- " ALLOW FILTERING");
+ CassandraDB.COLUMN_IS_LITERAL + ", " +
+ CassandraDB.COLUMN_RULE + ", " +
+ CassandraDB.COLUMN_SUB +
+ ") <= ? ");
+// ") <= ? AND " +
+// CassandraDB.COLUMN_TRIPLE_TYPE + " = " + typeFilters.toArray()[0] +
+// " ALLOW FILTERING");
}else{
+ if (transitiveLevelFilters.size() != 0){ // stepFilter is only for handling transitive property
+ System.err.println("This is not supported!!!");
+ return;
+ }
+
+
// The support of IN clause in cassandra db's SELECT is restricted.
// So we have to try to manually cluster the values in the filters.
// see http://www.datastax.com/documentation/cql/3.1/cql/cql_reference/select_r.html#reference_ds_d35_v2q_xj__selectIN
System.out.println("<<<<<<<>>>>>>>>");
System.out.println("<<<<<<<>>>>>>>>");
- Integer max = java.util.Collections.max(filters);
- Integer min = java.util.Collections.min(filters);
+ Integer max = java.util.Collections.max(typeFilters);
+ Integer min = java.util.Collections.min(typeFilters);
CqlConfigHelper.setInputCql(job.getConfiguration(),
"SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS +
" WHERE TOKEN(" +
- CassandraDB.COLUMN_SUB + ", " +
- CassandraDB.COLUMN_PRE + ", " +
- CassandraDB.COLUMN_OBJ + ", " +
- CassandraDB.COLUMN_IS_LITERAL +
+ CassandraDB.COLUMN_IS_LITERAL + ", " +
+ CassandraDB.COLUMN_RULE + ", " +
+ CassandraDB.COLUMN_SUB +
") > ? AND TOKEN(" +
- CassandraDB.COLUMN_SUB + ", " +
- CassandraDB.COLUMN_PRE + ", " +
- CassandraDB.COLUMN_OBJ + ", " +
- CassandraDB.COLUMN_IS_LITERAL +
- ") <= ? AND " +
- CassandraDB.COLUMN_TRIPLE_TYPE + " >= " + min + " AND " +
- CassandraDB.COLUMN_TRIPLE_TYPE + " <= " + max +
- " ALLOW FILTERING");
+ CassandraDB.COLUMN_IS_LITERAL + ", " +
+ CassandraDB.COLUMN_RULE + ", " +
+ CassandraDB.COLUMN_SUB +
+ ") <= ? ");
+// + "AND " +
+// CassandraDB.COLUMN_TRIPLE_TYPE + " >= " + min + " AND " +
+// CassandraDB.COLUMN_TRIPLE_TYPE + " <= " + max +
+// " ALLOW FILTERING");
// String strFilter = filters.toString();
// String strInFilterClause = strFilter.substring(1, strFilter.length()-1); // remove "[" and "]" characters of Set.toString()
@@ -120,7 +167,8 @@ else if (filters.size() == 1){
}
CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE);
- ConfigHelper.setInputSplitSize(job.getConfiguration(), 180);
+ //Modifide by LiYang
+ ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000);
job.setInputFormatClass(CqlInputFormat.class);
System.out.println("ConfigHelper.getInputSplitSize - input: " + ConfigHelper.getInputSplitSize(job.getConfiguration()));
System.out.println("CqlConfigHelper.getInputPageRowSize - input: " + CqlConfigHelper.getInputPageRowSize(job.getConfiguration()));
@@ -129,38 +177,53 @@ else if (filters.size() == 1){
// Output to CassandraDB.COLUMNFAMILY_JUSTIFICATIONS
- private static void configureCassandraOutput(Job job) {
+ private static void configureCassandraOutput(Job job, int step) {
//Set the output
job.setOutputKeyClass(Map.class);
job.setOutputValueClass(List.class);
- job.setOutputFormatClass(CqlOutputFormat.class);
- ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost");
- ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner");
- ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS);
- String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS +
- " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? ";
- CqlConfigHelper.setOutputCql(job.getConfiguration(), query);
+
+ job.setOutputFormatClass(CqlBulkOutputFormat.class);
+ CqlBulkOutputFormat.setColumnFamilySchema(job.getConfiguration(), CassandraDB.COLUMNFAMILY_JUSTIFICATIONS, CassandraDB.getJustificationsSchema());
+ CqlBulkOutputFormat.setColumnFamilyInsertStatement(job.getConfiguration(), CassandraDB.COLUMNFAMILY_JUSTIFICATIONS, CassandraDB.getJustificationseStatement());
+ CqlBulkOutputFormat.setDeleteSourceOnSuccess(job.getConfiguration(), true);
+
+ ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host);
+ ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner);
+
+ ConfigHelper.setOutputKeyspace(job.getConfiguration(), CassandraDB.KEYSPACE);
+ ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.COLUMNFAMILY_JUSTIFICATIONS);
+
+// MrjMultioutput.addNamedOutput(job, CassandraDB.COLUMNFAMILY_ALLTRIPLES, CqlBulkOutputFormat.class, ByteBuffer.class, List.class);
+// MrjMultioutput.addNamedOutput(job, "step" + step, CqlBulkOutputFormat.class, ByteBuffer.class, List.class);
+// CqlConfigHelper.setOutputCql(conf, "select * from step1");
}
- // In each derivation, we may create a set of jobs
+ // In each derivation, we may create a set of jobs
+ // certainStep is optional, if it is specified then we can use it to filter transitiveLevel with non-equal operator
+ // (see cql specification)
public static Job createNewJob(Class> classJar, String jobName,
- Set filters, int numMapTasks, int numReduceTasks,
- boolean bConfigCassandraInput, boolean bConfigCassandraOutput)
+ Set typeFilters, Set transitiveLevelFilters, int certainStep, int numMapTasks, int numReduceTasks,
+ boolean bConfigCassandraInput, boolean bConfigCassandraOutput, Integer step)
throws IOException {
Configuration conf = new Configuration();
conf.setInt("maptasks", numMapTasks);
- conf.set("input.filter", filters.toString());
-
+ conf.set("input.filter", typeFilters.toString());
+
+ conf.set("mapreduce.output.bulkoutputformat.streamthrottlembits", "400");
+
Job job = new Job(conf);
job.setJobName(jobName);
job.setJarByClass(classJar);
job.setNumReduceTasks(numReduceTasks);
+ job.setNumReduceTasks(8);
+
if (bConfigCassandraInput)
- configureCassandraInput(job, filters);
+ configureCassandraInput(job, typeFilters, transitiveLevelFilters, certainStep);
if (bConfigCassandraOutput)
- configureCassandraOutput(job);
+ configureCassandraOutput(job, step);
+
// Added by WuGang 2010-05-25
System.out.println("Create a job - " + jobName);
@@ -169,6 +232,44 @@ public static Job createNewJob(Class> classJar, String jobName,
return job;
}
-
-
+/*
+ public static void CreateTables(String jobname){
+ Builder builder = Cluster.builder();
+ builder.addContactPoint(CassandraDB.DEFAULT_HOST);
+ SocketOptions socketoptions= new SocketOptions().setKeepAlive(true).setReadTimeoutMillis(10 * 10000).setConnectTimeoutMillis(5 * 10000);
+ Cluster clu = builder.build();
+ Session session = clu.connect();
+
+ String query = "";
+ if(jobname == "RDFS special properties reasoning"){
+ query = "CREATE TABLE IF NOT EXISTS " + "mrjks" + "." + jobname +
+ " ( " +
+ "sub" + " bigint, " +
+ "pre" + " bigint, " +
+ "obj" + " bigint, " +
+ "rule int, " +
+ "v1" + " bigint, " +
+ "v2" + " bigint, " +
+ "v3" + " bigint, " +
+ "transitiveleves int" +
+ ", primary key((sub, pre, obj, rule) ,v1, v2, v3 ))";
+ }
+ else {
+ query = "CREATE TABLE IF NOT EXISTS " + "mrjks" + "." + jobname +
+ " ( " +
+ "sub" + " bigint, " +
+ "pre" + " bigint, " +
+ "obj" + " bigint, " +
+ "rule int, " +
+ "v1" + " bigint, " +
+ "v2" + " bigint, " +
+ "v3" + " bigint, " +
+ ", primary key((id, rule) ,v1, v2, v3))";
+ }
+
+ session.execute(query);
+ System.out.println(query);
+ System.out.println("--------Create Table----------");
+ }
+ */
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/RDFSOWLReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/RDFSOWLReasoner.java
new file mode 100644
index 0000000..64ffe76
--- /dev/null
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/RDFSOWLReasoner.java
@@ -0,0 +1,81 @@
+package cn.edu.neu.mitt.mrj.reasoner;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import cn.edu.neu.mitt.mrj.reasoner.owl.OWLReasoner;
+import cn.edu.neu.mitt.mrj.reasoner.rdfs.RDFSReasoner;
+
+public class RDFSOWLReasoner {
+
+ protected static Logger log = LoggerFactory.getLogger(RDFSOWLReasoner.class);
+
+ static int step = 0;
+
+ private static void parseArgs(String[] args) {
+
+ for(int i=0;i 0;
+ firstLoop = false;
+ }
+// log.info("Number triples derived: " + totalDerivation);
+// log.info("Time derivation: " + (System.currentTimeMillis() - startTime));
+ System.out.println("Number triples derived: " + totalDerivation);
+ System.out.println("Time derivation: " + (System.currentTimeMillis() - startTime));
+ } catch (Exception e) {
+ log.error(e.getMessage());
+ e.printStackTrace();
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java
new file mode 100644
index 0000000..08fba3e
--- /dev/null
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java
@@ -0,0 +1,79 @@
+package cn.edu.neu.mitt.mrj.reasoner;
+
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.cassandra.hadoop.ConfigHelper;
+import org.apache.cassandra.hadoop.cql3.CqlConfigHelper;
+import org.apache.cassandra.hadoop.cql3.CqlInputFormat;
+import org.apache.cassandra.hadoop.cql3.CqlOutputFormat;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB;
+
+public class ReasonedJustifications extends Configured implements Tool{
+ public int run(String[] args) throws Exception{
+
+ Configuration conf = new Configuration();
+
+ Job job = new Job(conf);
+ job.setJobName(" Test ");
+ job.setJarByClass(ReasonedJustifications.class);
+ job.setNumReduceTasks(8);
+
+ ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host);
+ ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner);
+ ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS);
+ CqlConfigHelper.setInputCql(job.getConfiguration(),
+ "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS +
+ " WHERE TOKEN(" +
+ CassandraDB.COLUMN_IS_LITERAL + ", " +
+ CassandraDB.COLUMN_RULE + ", " +
+ CassandraDB.COLUMN_SUB +
+ //CassandraDB.COLUMN_IS_LITERAL +
+ ") > ? AND TOKEN(" +
+ CassandraDB.COLUMN_IS_LITERAL + ", " +
+ CassandraDB.COLUMN_RULE + ", " +
+ CassandraDB.COLUMN_SUB +
+ //CassandraDB.COLUMN_IS_LITERAL +
+ ") <= ? ALLOW FILTERING");
+ CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE);
+ //Modified by LiYang
+ ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000);
+ job.setInputFormatClass(CqlInputFormat.class);
+ job.setOutputKeyClass(Map.class);
+ job.setOutputValueClass(List.class);
+ job.setOutputFormatClass(CqlOutputFormat.class);
+ ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host);
+ ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner);
+
+ ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS);
+ String query = "UPDATE " + CassandraDB.KEYSPACE + "." + "resultrows" +
+ " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=?, " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "=?";
+ CqlConfigHelper.setOutputCql(job.getConfiguration(), query);
+
+ job.setMapperClass(ReasonedJustificationsMapper.class);
+ job.setMapOutputKeyClass(Text.class);
+ job.setMapOutputValueClass(IntWritable.class);
+ job.setReducerClass(ReasonedJustificationsReducer.class);
+
+
+
+ job.waitForCompletion(true);
+
+ return 0;
+ }
+
+ public static void main(String[] args) throws Exception {
+ int res = ToolRunner.run(new Configuration(), new ReasonedJustifications(), args);
+ System.exit(res);
+ }
+
+}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java
new file mode 100644
index 0000000..e2142fc
--- /dev/null
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java
@@ -0,0 +1,91 @@
+package cn.edu.neu.mitt.mrj.reasoner;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.cassandra.thrift.Compression;
+import org.apache.cassandra.thrift.CqlPreparedResult;
+import org.apache.cassandra.utils.ByteBufferUtil;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Mapper.Context;
+
+import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB;
+
+import com.datastax.driver.core.Cluster;
+import com.datastax.driver.core.Metadata;
+import com.datastax.driver.core.ResultSet;
+import com.datastax.driver.core.Row;
+import com.datastax.driver.core.Session;
+import com.datastax.driver.core.SimpleStatement;
+
+public class ReasonedJustificationsMapper extends Mapper{
+ private Cluster cluster;
+ private Session session;
+ //**
+ public void map(Long keys, Row rows, Context context) throws IOException, InterruptedException{
+
+ Integer inferredsteps;
+ Integer transitivelevel;
+ // for (Row rows : row){
+ if (rows.getInt(CassandraDB.COLUMN_RULE) != 0) {
+
+ String conKey;
+ //*****
+ conKey = rows.getLong(CassandraDB.COLUMN_SUB) //��ʹ��ByteBufferUtil��
+ + "_" + rows.getLong(CassandraDB.COLUMN_PRE)
+ + "_" + rows.getLong(CassandraDB.COLUMN_OBJ)
+ + "_" + rows.getBool(CassandraDB.COLUMN_IS_LITERAL)
+ + "_" + rows.getInt(CassandraDB.COLUMN_TRIPLE_TYPE)
+ + "_" + rows.getInt(CassandraDB.COLUMN_RULE)
+ + "_" + rows.getLong(CassandraDB.COLUMN_V1)
+ + "_" + rows.getLong(CassandraDB.COLUMN_V2)
+ + "_" + rows.getLong(CassandraDB.COLUMN_V3)
+ + "_" + rows.getInt(CassandraDB.COLUMN_INFERRED_STEPS); // Modified by WuGang, 2015-07-15
+ transitivelevel = rows.getInt(CassandraDB.COLUMN_TRANSITIVE_LEVELS); // Added by WuGang, 2015-07-15
+
+ context.write(new Text(conKey), new IntWritable(transitivelevel));
+ }
+ //}
+
+ }
+
+ public void setup(Context context) throws IOException, InterruptedException{
+ cluster = Cluster.builder().addContactPoint(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host).build();
+ Metadata metadata = cluster.getMetadata();
+ System.out.printf("-------Connected to cluster: %s\n", metadata.getClusterName());
+ session = cluster.connect();
+
+ String query = "CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + "." + "resultrows" +
+ " ( " +
+ CassandraDB.COLUMN_IS_LITERAL + " boolean, " + // partition key
+ CassandraDB.COLUMN_RULE + " int, " +
+ CassandraDB.COLUMN_SUB + " bigint, " + // partition key
+ CassandraDB.COLUMN_TRIPLE_TYPE + " int, " +
+ CassandraDB.COLUMN_PRE + " bigint, " + // partition key
+ CassandraDB.COLUMN_OBJ + " bigint, " + // partition key
+ CassandraDB.COLUMN_V1 + " bigint, " +
+ CassandraDB.COLUMN_V2 + " bigint, " +
+ CassandraDB.COLUMN_V3 + " bigint, " +
+ CassandraDB.COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key
+ CassandraDB.COLUMN_TRANSITIVE_LEVELS + " int, " +
+ " PRIMARY KEY ((" + CassandraDB.COLUMN_IS_LITERAL + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_SUB + "), " +
+ CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 +
+ //", " + COLUMN_TRIPLE_TYPE +
+ " ) ) ";
+
+// session.execute(query);
+// query = "CREATE INDEX on mrjks.resultrows (sub) ;";
+// session.execute(query);
+// query = "CREATE INDEX on mrjks.resultrows (obj) ;";
+// session.execute(query);
+// query = "CREATE INDEX on mrjks.resultrows (pre) ;";
+// session.execute(query);
+// query = "CREATE INDEX on mrjks.resultrows (isliteral) ;";
+// session.execute(query);
+
+ }
+}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java
new file mode 100644
index 0000000..ca30fc7
--- /dev/null
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java
@@ -0,0 +1,46 @@
+package cn.edu.neu.mitt.mrj.reasoner;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.cassandra.utils.ByteBufferUtil;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Reducer;
+
+import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB;
+
+public class ReasonedJustificationsReducer extends Reducer, List>{
+ public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException{
+
+ for (IntWritable value : values) {
+ //Prepare the insert keys collection
+ String[] splitkeys = key.toString().split("_");
+ Map keys = new LinkedHashMap();
+ keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(Long.parseLong(splitkeys[0])));
+ keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(Long.parseLong(splitkeys[1])));
+ keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(Long.parseLong(splitkeys[2])));
+ //bool
+ keys.put(CassandraDB.COLUMN_IS_LITERAL, Boolean.valueOf(splitkeys[3])?ByteBuffer.wrap(new byte[]{1}):ByteBuffer.wrap(new byte[]{0}));
+ keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(Integer.parseInt(splitkeys[4])));
+ keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes(Integer.parseInt(splitkeys[5])));
+ keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(Long.parseLong(splitkeys[6])));
+ keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(Long.parseLong(splitkeys[7])));
+ keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(Long.parseLong(splitkeys[8])));
+
+ //prepare the insert variables collection
+ List variables = new ArrayList();
+ int var_inferredsteps = Integer.parseInt(value.toString());
+ variables.add(ByteBufferUtil.bytes(var_inferredsteps));
+ int var_transitivelevel = Integer.parseInt(splitkeys[9]);
+ variables.add(ByteBufferUtil.bytes(var_transitivelevel));
+ context.write(keys, variables);
+ }
+
+ }
+
+}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java
index cb27ef0..8cad3d8 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java
@@ -16,6 +16,7 @@
import org.apache.cassandra.thrift.UnavailableException;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.thrift.TException;
import org.apache.thrift.transport.TTransportException;
import org.slf4j.Logger;
@@ -35,7 +36,7 @@ public class OWLAllSomeValuesMapper extends Mapper= previousDerivation) {
-
- log.info("And I met a triple with RDF_TYPE as predicate: " + value);
+ //DEL
+// log.info("And I met a triple with RDF_TYPE as predicate: " + value);
- // ��Ҫ�����һ��w
- if (someValues.containsKey(value.getObject())) { //�ҵ���һ��(x,rdf:type,w)��������Ԫ�飬����w����v owl:someValuesFrom w
+ // ��Ҫ�����һ��w
+ if (someValues.containsKey(value.getObject())) { //�ҵ���һ��(x,rdf:type,w)��������Ԫ�飬����w����v owl:someValuesFrom w
log.info("I met someValuesFrom: " + value);
Collection values = someValues.get(value.getObject());
Iterator itr = values.iterator();
bKey[0] = 2;
bValue[0] = 1;
- bValue[17] = 0; // ��������һ��someValues
+ bValue[17] = 0; // ��������һ��someValues
NumberUtils.encodeLong(bKey, 9, value.getSubject());
- NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��w�value��
+ NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��w�value��
while (itr.hasNext()) {
byte[] bytes = itr.next();
System.arraycopy(bytes, 0, bKey, 1, 8);
System.arraycopy(bytes, 8, bValue, 1, 8);
- context.write(oKey, oValue); //�������((p,x),v) -> ((p,x),(v,w,0))
+ context.write(oKey, oValue); //�������((p,x),v) -> ((p,x),(v,w,0))
}
}
- // ��Ҫ�����һ��v
- if (allValues.containsKey(value.getObject())) { //�ҵ���һ��(w,rdf:type,v)��������Ԫ�飬����v����v owl:allValuesFrom u
+ // ��Ҫ�����һ��v
+ if (allValues.containsKey(value.getObject())) { //�ҵ���һ��(w,rdf:type,v)��������Ԫ�飬����v����v owl:allValuesFrom u
log.info("I met allValuesFrom: " + value);
Collection values = allValues.get(value.getObject());
Iterator itr = values.iterator();
bKey[0] = 1;
bValue[0] = 1;
- bValue[17] = 1; // ��������һ��allValues
+ bValue[17] = 1; // ��������һ��allValues
NumberUtils.encodeLong(bKey, 9, value.getSubject());
- NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��v�value��
+ NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��v�value��
while (itr.hasNext()) {
byte[] bytes = itr.next();
System.arraycopy(bytes, 0, bKey, 1, 8);
System.arraycopy(bytes, 8, bValue, 1, 8);
- context.write(oKey, oValue); //�������((p,w),u) -> ((p,w),(u,v,1))
+ context.write(oKey, oValue); //�������((p,w),u) -> ((p,w),(u,v,1))
}
}
} else {
- // onPropertySome�з��õ������е���onPropertySome�����õ����ԣ������е� v owl:someValuesFrom w��ʽ����Ԫ���е�w
- if (onPropertySome.contains(value.getPredicate())) {//ijһ����Ԫ��u p x�е�p��һ��onPropertySome�����õ�������
+ // onPropertySome�з��õ������е���onPropertySome�����õ����ԣ������е� v owl:someValuesFrom w��ʽ����Ԫ���е�w
+ if (onPropertySome.contains(value.getPredicate())) {//ijһ����Ԫ��u p x�е�p��һ��onPropertySome�����õ�������
//Rule 15 - someValuesFrom
log.info("I met onPropertySome: " + value);
bKey[0] = 2;
@@ -102,19 +103,19 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup
NumberUtils.encodeLong(bKey, 1, value.getPredicate());
NumberUtils.encodeLong(bKey, 9, value.getObject());
NumberUtils.encodeLong(bValue, 1, value.getSubject());
- context.write(oKey, oValue); //�����((p,x),(u,,)) value�ĺ�����field������ֵ,û�и�ֵ
+ context.write(oKey, oValue); //�����((p,x),(u,,)) value�ĺ�����field������ֵ,û�и�ֵ
}
- // onPropertyAll�з��õ������е���onPropertyAll�����õ����ԣ������е� v owl:allValuesFrom u��ʽ����Ԫ���е�u
- if (onPropertyAll.contains(value.getPredicate())) {//ijһ����Ԫ��w p x�е�p��һ��onPropertyAll�����õ�������
+ // onPropertyAll�з��õ������е���onPropertyAll�����õ����ԣ������е� v owl:allValuesFrom u��ʽ����Ԫ���е�u
+ if (onPropertyAll.contains(value.getPredicate())) {//ijһ����Ԫ��w p x�е�p��һ��onPropertyAll�����õ�������
//Rule 16 - allValuesFrom
log.info("I met onPropertyAll: " + value);
bKey[0] = 1;
- bValue[0] = 0; // Added by WuGang, ԭ������������һ�У����ж����������Ԫ��ʱ�������������reduce�����л���ɴ���
+ bValue[0] = 0; // Added by WuGang, ԭ������������һ�У����ж����������Ԫ��ʱ�������������reduce�����л���ɴ���
NumberUtils.encodeLong(bKey, 1, value.getPredicate());
NumberUtils.encodeLong(bKey, 9, value.getSubject());
NumberUtils.encodeLong(bValue, 1, value.getObject());
- context.write(oKey, oValue); //�������((p,w),(x,,)) value�ĺ�����field������ֵ,û�и�ֵ
+ context.write(oKey, oValue); //�������((p,w),(x,,)) value�ĺ�����field������ֵ,û�и�ֵ
}
}
}
@@ -122,7 +123,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup
@Override
public void setup(Context context) throws IOException {
previousDerivation = context.getConfiguration().getInt("reasoner.previousDerivation", -1);
-
+
// List filesProperty = MultiFilesReader.recursiveListStatus(context, "FILTER_ONLY_OWL_ON_PROPERTY");
// Map> allValuesTmp = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_OWL_ALL_VALUES", context);
// Map> someValuesTmp = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_OWL_SOME_VALUES", context);
@@ -150,6 +151,11 @@ public void setup(Context context) throws IOException {
makeJoin(onPropertyTmp, context, someValuesTmp,
allValuesTmp, someValues, allValues,
onPropertySome, onPropertyAll);
+
+
+ db.CassandraDBClose();
+
+
}catch (TTransportException e) {
e.printStackTrace();
} catch (InvalidRequestException e) {
@@ -206,7 +212,7 @@ protected void makeJoin(Map> onPropertyTmp, Context contex
}
if (allValuesTmp.containsKey(sub)) {
- // col��Ӧ�������е�subject��Ӧ��object������subject��object����subject, owl:allValuesFrom, object
+ // col��Ӧ�������е�subject��Ӧ��object������subject��object����subject, owl:allValuesFrom, object
Collection col = allValuesTmp.get(sub);
if (col != null) {
Iterator itr = col.iterator();
@@ -231,4 +237,5 @@ protected void makeJoin(Map> onPropertyTmp, Context contex
}
}
+
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java
index 9d56f78..0161e11 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java
@@ -28,18 +28,18 @@ public class OWLAllSomeValuesReducer extends Reducer resources = new LinkedList();
// Added by WuGang
- private LinkedList others = new LinkedList(); // ��types����һ��
- private LinkedList s_a_types = new LinkedList(); // ��types����һ��,���ڴ洢��someValues(0)����allValues(1)����
+ private LinkedList others = new LinkedList(); // ��types����һ��
+ private LinkedList s_a_types = new LinkedList(); // ��types����һ��,���ڴ洢��someValues(0)����allValues(1)����
@Override
public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException {
- log.info("I'm in OWLAllSomeValuesReducer");
+ //log.info("I'm in OWLAllSomeValuesReducer");
types.clear();
resources.clear();
byte[] bKey = key.getBytes();
- long rSubject = NumberUtils.decodeLong(bKey, 9); // rSubject����key�ĵڶ���Long����ʼλ��Ϊ9���ͷ����һ��byte��
+ long rSubject = NumberUtils.decodeLong(bKey, 9); // rSubject����key�ĵڶ���Long����ʼλ��Ϊ9���ͷ����һ��byte��
long predicate = NumberUtils.decodeLong(bKey, 1); // Added by WuGang 2010-07-14
Iterator itr = values.iterator();
@@ -48,7 +48,7 @@ public void reduce(BytesWritable key, Iterable values, Context co
byte[] bValue = value.getBytes();
if (bValue[0] == 1) { //Type triple
types.add(NumberUtils.decodeLong(bValue, 1));
- others.add(NumberUtils.decodeLong(bValue, 9)); // Added by WuGang, ��types����Ҫ�����һ��long�ͣ���һ��byte
+ others.add(NumberUtils.decodeLong(bValue, 9)); // Added by WuGang, ��types����Ҫ�����һ��long�ͣ���һ��byte
s_a_types.add(bValue[17]);
} else { //Resource triple
resources.add(NumberUtils.decodeLong(bValue, 1));
@@ -66,7 +66,7 @@ public void reduce(BytesWritable key, Iterable values, Context co
while (itrResource.hasNext()) {
long resource = itrResource.next();
triple.setSubject(resource);
- // ����Types���͵�value����someValuesΪ����((p,x),(v,w))����allValuesΪ����((p,w),(u,v))
+ // ����Types���͵�value����someValuesΪ����((p,x),(v,w))����allValuesΪ����((p,w),(u,v))
Iterator itrTypes = types.listIterator();
Iterator itrOthers = others.listIterator();
Iterator itrSATypes = s_a_types.listIterator();
@@ -74,14 +74,14 @@ public void reduce(BytesWritable key, Iterable values, Context co
long type = itrTypes.next();
triple.setObject(type);
- // Added by WuGang����triple��ֵ
+ // Added by WuGang����triple��ֵ
long other = itrOthers.next();
byte s_a_type = itrSATypes.next();
- triple.setRsubject(rSubject); // ��someValues������x,��allValues������w
+ triple.setRsubject(rSubject); // ��someValues������x,��allValues������w
// Modified by WuGang 2010-07-14
// triple.setRpredicate(TriplesUtils.RDF_TYPE); //rdf:type
triple.setRpredicate(predicate);
- triple.setRobject(other); // ��someValues������w,��allValues������v
+ triple.setRobject(other); // ��someValues������w,��allValues������v
switch (s_a_type) {
case 0:
triple.setType(TriplesUtils.OWL_HORST_15);
@@ -95,7 +95,7 @@ public void reduce(BytesWritable key, Iterable values, Context co
// System.out.println("Generate an extended triple for OWLAllSomeValues: " + triple);
// context.write(source, triple);
- CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
+ CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
}
}
}
@@ -110,4 +110,12 @@ public void setup(Context context) {
triple.setObjectLiteral(false);
triple.setPredicate(TriplesUtils.RDF_TYPE);
}
+
+ @Override
+ protected void cleanup(
+ Reducer, List>.Context context)
+ throws IOException, InterruptedException {
+ super.cleanup(context);
+ }
+
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java
index a4afd43..3323bd6 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java
@@ -85,7 +85,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup
@Override
public void setup(Context context) throws IOException {
-
+
CassandraDB db;
try {
db = new CassandraDB();
@@ -94,6 +94,7 @@ public void setup(Context context) throws IOException {
subpropSchemaTriples = new HashSet();
Set filters = new HashSet();
filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY);
+ //modified 2015/5/31
db.loadSetIntoMemory(subpropSchemaTriples, filters, -1);
}
@@ -101,18 +102,21 @@ public void setup(Context context) throws IOException {
subclassSchemaTriples = new HashSet();
Set filters = new HashSet();
filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBCLASS);
+ //modified 2015/5/31
db.loadSetIntoMemory(subclassSchemaTriples, filters, -1);
}
- } catch (TTransportException e) {
- e.printStackTrace();
- } catch (InvalidRequestException e) {
- e.printStackTrace();
- } catch (UnavailableException e) {
- e.printStackTrace();
- } catch (TimedOutException e) {
- e.printStackTrace();
- } catch (SchemaDisagreementException e) {
- e.printStackTrace();
+ db.CassandraDBClose();
+ //modified 2015/5/31
+// } catch (TTransportException e) {
+// e.printStackTrace();
+// } catch (InvalidRequestException e) {
+// e.printStackTrace();
+// } catch (UnavailableException e) {
+// e.printStackTrace();
+// } catch (TimedOutException e) {
+// e.printStackTrace();
+// } catch (SchemaDisagreementException e) {
+// e.printStackTrace();
} catch (TException e) {
e.printStackTrace();
}
@@ -120,4 +124,5 @@ public void setup(Context context) throws IOException {
}
+
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java
index e98f5ba..731fb98 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java
@@ -33,7 +33,7 @@ public class OWLEquivalenceSCSPReducer extends Reducer> subpropSchemaTriples = null;
public static Map> subclassSchemaTriples = null;
public static Map> equivalenceClassesSchemaTriples = null; // Added by WuGang
@@ -90,7 +90,7 @@ public void reduce(LongWritable key, Iterable values, Context con
}
}
- if (!found) { // ������������
+ if (!found) { // �����������
triple.setObject(resource);
triple.setSubject(key.get());
triple.setPredicate(TriplesUtils.RDFS_SUBCLASS);
@@ -107,9 +107,8 @@ public void reduce(LongWritable key, Iterable values, Context con
triple.setRpredicate(TriplesUtils.OWL_EQUIVALENT_CLASS);
triple.setRobject(triple.getSubject());
}
-
// context.write(source, triple);
- CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
+ CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
}
}
@@ -146,12 +145,12 @@ public void reduce(LongWritable key, Iterable values, Context con
}
// context.write(source, triple);
- CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
+ CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
}
}
//Subproperties
- // Modified by WuGang,����ò��Ӧ����superProperties
+ // Modified by WuGang,����ò��Ӧ����superProperties
// itr2 = equivalenceProperties.iterator();
itr2 = superProperties.iterator();
while (itr2.hasNext()) {
@@ -180,12 +179,12 @@ public void reduce(LongWritable key, Iterable values, Context con
triple.setRobject(triple.getObject());
// context.write(source, triple);
- CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
+ CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
}
}
//Subclasses
- // Modified by WuGang,����ò��Ӧ����superClasses
+ // Modified by WuGang,����ò��Ӧ����superClasses
// itr2 = equivalenceClasses.iterator();
itr2 = superClasses.iterator();
while (itr2.hasNext()) {
@@ -213,9 +212,8 @@ public void reduce(LongWritable key, Iterable values, Context con
triple.setRsubject(triple.getSubject());
triple.setRpredicate(TriplesUtils.RDFS_SUBCLASS);
triple.setRobject(triple.getObject());
-
// context.write(source, triple);
- CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
+ CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
}
}
}
@@ -255,7 +253,8 @@ public void setup(Context context) throws IOException {
Set filters = new HashSet();
filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_PROPERTY);
equivalencePropertiesSchemaTriples = db.loadMapIntoMemory(filters);
- }
+ }
+ db.CassandraDBClose();
}catch (TTransportException e) {
e.printStackTrace();
} catch (InvalidRequestException e) {
@@ -271,4 +270,11 @@ public void setup(Context context) throws IOException {
}
}
+
+ @Override
+ protected void cleanup(
+ Reducer, List>.Context context)
+ throws IOException, InterruptedException {
+ super.cleanup(context);
+ }
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java
index b78a782..2ca8a07 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java
@@ -43,7 +43,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup
} //TODO: check whether also the schema is modified
oKey.set(value.getSubject());
- if (value.getPredicate() == TriplesUtils.RDF_TYPE && // ����14b������value����(u rdf:type v)��Ŀ������reduce������(u p w)�����ǻ�Ҫ����14b(v owl:hasValue w)
+ if (value.getPredicate() == TriplesUtils.RDF_TYPE && // ����14b������value����(u rdf:type v)��Ŀ������reduce������(u p w)�����ǻ�Ҫ���14b(v owl:hasValue w)
hasValue.contains(value.getObject()) &&
onProperty.contains(value.getObject())) {
// System.out.println("In OWLHasValueMapper for 14b: " + value); // Added by Wugang
@@ -52,7 +52,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup
oValue.set(values, 0, 9);
context.write(oKey, oValue);
- } else if (value.getPredicate() != TriplesUtils.RDF_TYPE // ����14a������value����(u p w)��Ŀ������reduce������(u rdf:type v)�����ǻ�Ҫ����14a(v owl:hasValue w)
+ } else if (value.getPredicate() != TriplesUtils.RDF_TYPE // ����14a������value����(u p w)��Ŀ������reduce������(u rdf:type v)�����ǻ�Ҫ���14a(v owl:hasValue w)
&& hasValueInverted.contains(value.getObject())
&& onPropertyInverted.contains(value.getPredicate())) {
// System.out.println("In OWLHasValueMapper for 14a: " + value); // Added by Wugang
@@ -62,6 +62,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup
oValue.set(values, 0, 17);
context.write(oKey, oValue);
+
}
// Moved into if-else by WuGang, 20150203
@@ -70,7 +71,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup
public void setup(Context context) throws IOException {
previousStep = context.getConfiguration().getInt("reasoner.previousStep", -1);
-
+
try{
CassandraDB db = new CassandraDB();
@@ -95,6 +96,7 @@ public void setup(Context context) throws IOException {
onPropertyInverted = new HashSet();
db.loadSetIntoMemory(onPropertyInverted, filters, -1, true);
}
+ db.CassandraDBClose();
}catch(TException te){
te.printStackTrace();
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java
index cdae522..c85b693 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java
@@ -44,8 +44,10 @@ public class OWLHasValueReducer extends Reducer values, Context context) throws IOException, InterruptedException {
Iterator itr = values.iterator();
+ System.out.println("step 6");
while (itr.hasNext()) {
byte[] v = itr.next().getBytes();
+ System.out.println("step6 has values reduce");
if (v.length > 0) {
if (v[0] == 0) { //Rule 14b
// System.out.println("In OWLHasValueReducer for 14b: "); // Added by Wugang
@@ -69,9 +71,8 @@ public void reduce(LongWritable key, Iterable values, Context con
triple.setRsubject(object); // v
triple.setRpredicate(TriplesUtils.OWL_HAS_VALUE); // owl:hasValue
triple.setRobject(triple.getObject()); // w
-// System.out.println("In OWLHasValueReducer for 14b output: "+triple); // Added by Wugang
-
- CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
+ System.out.println("In OWLHasValueReducer for 14b output: "+triple); // Added by Wugang
+ CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
// context.write(source, triple);
}
}
@@ -97,11 +98,11 @@ public void reduce(LongWritable key, Iterable values, Context con
triple.setType(TriplesUtils.OWL_HORST_14a);
triple.setRsubject(triple.getObject()); // v
// triple.setRpredicate(TriplesUtils.OWL_HAS_VALUE); // owl:hasValue
- triple.setRpredicate(predicate); // p // Modified by WuGang, 2010-08-26,�����Ϣ�������»ָ���
+ triple.setRpredicate(predicate); // p // Modified by WuGang, 2010-08-26,�����Ϣ�������»ָ���
triple.setRobject(object); // w
// System.out.println("In OWLHasValueReducer for 14a output: "+triple); // Added by Wugang
- CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
+ CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
// context.write(source, triple);
}
}
@@ -130,6 +131,7 @@ public void setup(Context context) throws IOException {
onPropertyFilter.add(TriplesUtils.SCHEMA_TRIPLE_ON_PROPERTY);
onPropertyMap = db.loadMapIntoMemory(onPropertyFilter);
onProperty2Map = db.loadMapIntoMemory(onPropertyFilter, true);
+ db.CassandraDBClose();
}catch (TTransportException e) {
e.printStackTrace();
} catch (InvalidRequestException e) {
@@ -144,4 +146,11 @@ public void setup(Context context) throws IOException {
e.printStackTrace();
}
}
+
+ @Override
+ protected void cleanup(
+ Reducer, List>.Context context)
+ throws IOException, InterruptedException {
+ super.cleanup(context);
+ }
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java
index 2fc767b..d6bf4a6 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java
@@ -40,6 +40,7 @@ public void map(Long key, Row row, Context context) throws IOException,Interrupt
/* Check if the triple has the functional property. If yes output
* a key value so it can be matched in the reducer.
*/
+
if (schemaFunctionalProperties.contains(value.getPredicate())
&& !value.isObjectLiteral()) {
//Set as key a particular flag plus the predicate
@@ -95,13 +96,16 @@ public void map(Long key, Row row, Context context) throws IOException,Interrupt
}
context.write(this.key, new LongWritable(predicate));
}
+
+ //System.out.println("Cassandra time :"+(System.currentTimeMillis() - time));
+
}
protected void setup(Context context) throws IOException {
previousTransDerivation = context.getConfiguration().getInt("reasoner.previosTransitiveDerivation", -1);
previousDerivation = context.getConfiguration().getInt("reasoner.previousDerivation", -1);
hasSchemaChanged = false;
-
+
try{
CassandraDB db = new CassandraDB();
@@ -142,6 +146,8 @@ protected void setup(Context context) throws IOException {
filters.add(TriplesUtils.SCHEMA_TRIPLE_TRANSITIVE_PROPERTY);
hasSchemaChanged = db.loadSetIntoMemory(schemaTransitiveProperties, filters, previousDerivation);
}
+
+ db.CassandraDBClose();
}catch(TException te){
te.printStackTrace();
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java
index e098029..486af50 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java
@@ -37,27 +37,28 @@ public class OWLNotRecursiveReducer extends Reducer set = new HashSet();
protected Map> schemaInverseOfProperties = null;
-
+
protected void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException {
byte[] bytes = key.getBytes();
long rsubject=0, rpredicate=0, robject=0;
long key1=0, key2=0, value1 = 0;
-
+
switch(bytes[0]) {
// case 0:
// case 1: //Functional and inverse functional property
case 0: // Modified by WuGang, Functional
case 1: // Modified by WuGang, Inverse Functional
// System.out.println("Processing Functional & Inverse Functional Property.");
- key1 = NumberUtils.decodeLong(bytes, 1); // ����Functional������subject������Inverse Functional������object
+ key1 = NumberUtils.decodeLong(bytes, 1); // ����Functional������subject������Inverse Functional������object
key2 = NumberUtils.decodeLong(bytes, 9); // predicate
long minimum = Long.MAX_VALUE;
set.clear();
Iterator itr = values.iterator();
+
while (itr.hasNext()) {
long value = itr.next().get();
- value1 = value; // Added by Wugang���������ֵ������Functional������ԭʼ��Ԫ���object������Inverse Functional������ԭʼ��Ԫ���subject
+ value1 = value; // Added by Wugang���������ֵ������Functional������ԭʼ��Ԫ���object������Inverse Functional������ԭʼ��Ԫ���subject
if (value < minimum) {
if (minimum != Long.MAX_VALUE)
set.add(minimum);
@@ -97,7 +98,7 @@ else if (bytes[0] == 1){ //Inverse Functional
triple.setObject(object);
// System.out.println("Find a derive in functional and inverse functional property!" + triple);
// context.write(source, triple);
- CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
+ CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
outputSize++;
}
context.getCounter("OWL derived triples", "functional and inverse functional property").increment(outputSize);
@@ -116,13 +117,13 @@ else if (bytes[0] == 1){ //Inverse Functional
triple.setRsubject(subject);
triple.setRobject(object);
triple.setType(TriplesUtils.OWL_HORST_3);
-
+
itr = values.iterator();
while (itr.hasNext()) {
triple.setPredicate(itr.next().get());
triple.setRpredicate(triple.getPredicate()); // Added by WuGang
// context.write(source, triple);
- CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
+ CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
context.getCounter("OWL derived triples", "simmetric property").increment(1);
}
@@ -144,7 +145,7 @@ else if (bytes[0] == 1){ //Inverse Functional
triple.setRsubject(subject);
triple.setRobject(object);
triple.setRpredicate(predicate);
-
+
/* I only output the last key of the inverse */
Collection inverse = schemaInverseOfProperties.get(predicate);
if (inverse != null) {
@@ -154,7 +155,7 @@ else if (bytes[0] == 1){ //Inverse Functional
triple.setPredicate(derivedPredicate); // Only one of the inverse, the others will be completed in outputInverseOf()
//triple.setPredicate(itrInverse.next()); // Commented by WuGang 2015-01-27
// context.write(source, triple);
- CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
+ CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
context.getCounter("OWL derived triples", "inverse of").increment(1);
// Moved to here by WuGang, 2015-01-27
@@ -171,7 +172,7 @@ else if (bytes[0] == 1){ //Inverse Functional
break;
case 4:
case 5:
- // �ⲿ���Ƿ�����inferTransitivityStatements�д������أ��˴���û����
+ // �ⲿ���Ƿ�����inferTransitivityStatements�д�����أ��˴���û����
//Transitive property. I copy to a temporary directory setting a special triple source
subject = NumberUtils.decodeLong(bytes, 1);
object = NumberUtils.decodeLong(bytes, 9);
@@ -191,7 +192,7 @@ else if (bytes[0] == 1){ //Inverse Functional
transitiveSource.setDerivation(TripleSource.TRANSITIVE_ENABLED);
triple.setPredicate(Math.abs(predicate));
// context.write(transitiveSource, triple);
- CassandraDB.writeJustificationToMapReduceContext(triple, transitiveSource, context);
+ CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
context.getCounter("OWL derived triples", "transitive property input").increment(1);
}
default:
@@ -213,7 +214,7 @@ private void outputInverseOf(long subject, long object, long predicate, Set> schemaInverseOfProperties_reverse = db.loadMapIntoMemory(filters, true);
schemaInverseOfProperties.putAll(schemaInverseOfProperties_reverse);
+
+ db.CassandraDBClose();
}catch (TTransportException e) {
e.printStackTrace();
} catch (InvalidRequestException e) {
@@ -255,4 +258,11 @@ public void setup(Context context) throws IOException {
}
}
+
+ @Override
+ protected void cleanup(
+ Reducer, List>.Context context)
+ throws IOException, InterruptedException {
+ super.cleanup(context);
+ }
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java
index ef48ffc..24381fd 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java
@@ -1,9 +1,18 @@
package cn.edu.neu.mitt.mrj.reasoner.owl;
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.util.HashSet;
+import java.util.List;
import java.util.Set;
+import org.apache.cassandra.hadoop.ConfigHelper;
+import org.apache.cassandra.hadoop.cql3.CqlConfigHelper;
+import org.apache.cassandra.hadoop.cql3.CqlOutputFormat;
+import org.apache.cassandra.thrift.InvalidRequestException;
+import org.apache.cassandra.thrift.SchemaDisagreementException;
+import org.apache.cassandra.thrift.TimedOutException;
+import org.apache.cassandra.thrift.UnavailableException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
@@ -17,13 +26,16 @@
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
+import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import ch.qos.logback.classic.db.DBAppender;
import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB;
import cn.edu.neu.mitt.mrj.io.files.readers.FilesTriplesReader;
import cn.edu.neu.mitt.mrj.partitioners.MyHashPartitioner;
import cn.edu.neu.mitt.mrj.reasoner.MapReduceReasonerJobConfig;
+import cn.edu.neu.mitt.mrj.utils.Cassandraconf;
import cn.edu.neu.mitt.mrj.utils.FileUtils;
import cn.edu.neu.mitt.mrj.utils.TriplesUtils;
@@ -37,7 +49,7 @@ public class OWLReasoner extends Configured implements Tool {
public static final String OWL_PROP_INHERITANCE_TMP = "/dir-tmp-prop-inheritance/";
public static final String OWL_PROP_INHERITANCE = "/dir-prop-inheritance/";
public static final String OWL_TRANSITIVITY_BASE = OWL_PROP_INHERITANCE_TMP + "dir-transitivity-base/";
- public static final String OWL_TRANSITIVITY = "dir-transitivity/"; // Added by WuGang 2010-08-25���¼ӵ�Ŀ¼
+ public static final String OWL_TRANSITIVITY = "dir-transitivity/"; // Added by WuGang 2010-08-25���¼ӵ�Ŀ¼
public static final String OWL_SYNONYMS_TABLE = "dir-table-synonyms/";
public static final String OWL_SYNONYMS_TABLE_NEW = "_table_synonyms_new/";
@@ -48,7 +60,7 @@ public class OWLReasoner extends Configured implements Tool {
public static final String OWL_ALL_VALUE_TMP = "/dir-tmp-all-some-values/";
public static final String OWL_HAS_VALUE_TMP = "/dir-tmp-has-value/";
- private CassandraDB db;
+ public CassandraDB db;
private int numMapTasks = -1;
private int numReduceTasks = -1;
@@ -100,10 +112,12 @@ public static void main(String[] args) {
try {
OWLReasoner owlreasoner = new OWLReasoner();
- owlreasoner.db = new CassandraDB("localhost", 9160);
- owlreasoner.db.init();
+// owlreasoner.db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160);
+// owlreasoner.db.init();
ToolRunner.run(new Configuration(), owlreasoner, args);
+
+// owlreasoner.db.CassandraDBClose();
} catch (Exception e) {
e.printStackTrace();
}
@@ -119,7 +133,20 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio
boolean firstCycle = true;
int currentStep = 0;
- int lastDerivationStep = 0;
+ int lastDerivationStep = 0;
+
+ //Modified 2015/6/28
+ try {
+ db = new CassandraDB();
+// db.init(); // 这不要init() 否则会出现 TTransportException: java.net.SocketException: 断开的管道
+ /*
+ * getRowCountAccordingInferredSteps 类似的函数中出错。
+ * 具体原因不确定,可能跟client使用有关。
+ */
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
do {
if (!firstCycle && lastDerivationStep == (currentStep - 4))
@@ -128,6 +155,8 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio
System.out.println(">>>>>>>>>>> Start new OWL Reasoner loop <<<<<<<<<<<");
long propDerivation = inferPropertiesInheritance(args);
System.out.println("----------- End inferPropertiesInheritance");
+ //Get Attention!
+ System.out.println("----------- Start inferTransitivityStatements");
derivedTriples = inferTransitivityStatements(args) + propDerivation;
System.out.println("----------- End inferTransitivityStatements");
if (derivedTriples > 0)
@@ -154,7 +183,7 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio
break;
currentStep++;
long hasValueDerivation = inferHasValueStatements(args);
- System.out.println("-----------inferHasValueStatements����");
+ System.out.println("-----------inferHasValueStatements����");
derivedTriples += hasValueDerivation;
if (hasValueDerivation > 0) lastDerivationStep = currentStep;
@@ -162,7 +191,7 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio
break;
currentStep++;
long someAllDerivation = inferSomeAndAllValuesStatements(args);
- System.out.println("-----------inferSomeAndAllValuesStatements����");
+ System.out.println("-----------inferSomeAndAllValuesStatements����");
derivedTriples += someAllDerivation;
if (someAllDerivation > 0) lastDerivationStep = currentStep;
@@ -192,8 +221,10 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter
OWLReasoner.class,
"OWL reasoner: infer properties inherited statements (not recursive), step " + step,
new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(),
+ new HashSet(), // not supported
+ step, // not used here
numMapTasks,
- numReduceTasks, true, true);
+ numReduceTasks, true, true, 5);
job.getConfiguration().setInt("reasoner.step", step);
job.getConfiguration().setInt("reasoner.previosTransitiveDerivation", previousTransitiveDerivation);
job.getConfiguration().setInt("reasoner.previousDerivation", previousInferPropertiesDerivation);
@@ -203,7 +234,7 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter
job.setMapOutputKeyClass(BytesWritable.class);
job.setMapOutputValueClass(LongWritable.class);
job.setReducerClass(OWLNotRecursiveReducer.class);
-
+
job.waitForCompletion(true);
@@ -230,23 +261,49 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter
private long inferTransitivityStatements(String[] args)
throws IOException, InterruptedException, ClassNotFoundException {
boolean derivedNewStatements = true;
-// System.out.println("��inferTransitivityStatements��ͷ��");
+// System.out.println("��inferTransitivityStatements��ͷ��");
// We'll not use filesystem but db.getTransitiveStatementsCount()
long derivation = 0;
int level = 0;
- long beforeInferCount = db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE);
+
+ //modified 2015/5/19
+ long beforeInferCount = db.getRowCountAccordingTripleTypeWithLimitation(TriplesUtils.TRANSITIVE_TRIPLE, 1);
+
while ((beforeInferCount > 0) && derivedNewStatements && shouldInferTransitivity) {
-// System.out.println("��ʼ��inferTransitivityStatements��whileѭ����Ѱ�ҡ�");
+// System.out.println("��ʼ��inferTransitivityStatements��whileѭ����Ѱ�ҡ�");
level++;
+ Set levels = new HashSet();
+ levels.add(new Integer(level-1));
+ if (level > 1)
+ levels.add(new Integer(level-2));
+
//Configure input. Take only the directories that are two levels below
- Job job = MapReduceReasonerJobConfig.createNewJob(
- OWLReasoner.class,
- "OWL reasoner: transitivity rule. Level " + level,
- new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(),
- numMapTasks,
- numReduceTasks, true, true);
+ Job job = null;
+
+ // for the first two level, we use the whole data in the database
+ if (level <= 2)
+ job = MapReduceReasonerJobConfig.createNewJob(
+ OWLReasoner.class,
+ "OWL reasoner: transitivity rule. Level " + level,
+ new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(),
+ new HashSet(),
+ 0,
+ numMapTasks,
+ numReduceTasks, true, true, 6);
+ // for the level more than two, we only consider the last two level derived data in the current step
+ if (level > 2)
+ job = MapReduceReasonerJobConfig.createNewJob(
+ OWLReasoner.class,
+ "OWL reasoner: transitivity rule. Level " + level,
+ new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(),
+ levels,
+ step,
+ numMapTasks,
+ numReduceTasks, true, true ,7);
+
+
job.getConfiguration().setInt("reasoning.baseLevel", step);
job.getConfiguration().setInt("reasoning.transitivityLevel", level);
job.getConfiguration().setInt("maptasks", Math.max(numMapTasks / 10, 1));
@@ -257,18 +314,35 @@ private long inferTransitivityStatements(String[] args)
job.setReducerClass(OWLTransitivityReducer.class);
job.waitForCompletion(true);
-
- // About duplication, we will modify the checkTransitivity to return transitive triple counts
- // and then do subtraction.
-
- long afterInferCount = db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE);
- derivation = afterInferCount - beforeInferCount;
- derivedNewStatements = (derivation > 0);
- beforeInferCount = afterInferCount; // Update beforeInferCount
+ long stepNotFilteredDerivation = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue();
+
+ long stepDerivation = 0;
+ if (stepNotFilteredDerivation > 0) {
+ try {
+ db.createIndexOnInferredSteps();
+ } catch (InvalidRequestException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (UnavailableException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (TimedOutException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (SchemaDisagreementException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (TException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ stepDerivation = db.getRowCountAccordingInferredSteps(level);
+ }
+ derivation += stepDerivation;
+ derivedNewStatements = stepDerivation > 0;
}
previousTransitiveDerivation = step;
-
return derivation;
}
@@ -281,7 +355,8 @@ private long inferSameAsStatements(String[] args) {
try {
boolean derivedSynonyms = true;
int derivationStep = 1;
- long previousStepDerived = 0; // Added by WuGang 2015-01-30
+// long previousStepDerived = 0; // Added by WuGang 2015-01-30
+
while (derivedSynonyms) {
if (db.getRowCountAccordingTripleType(TriplesUtils.DATA_TRIPLE_SAME_AS)==0) // We need not to infer on SameAs
return 0;
@@ -292,43 +367,56 @@ private long inferSameAsStatements(String[] args) {
OWLReasoner.class,
"OWL reasoner: build the synonyms table from same as triples - step " + derivationStep++,
filters, // FileUtils.FILTER_ONLY_HIDDEN.getClass(),
+ new HashSet(), // Added by WuGang, 2015-07-12
+ step, // not used here
numMapTasks,
- numReduceTasks, true, true);
+ numReduceTasks, true, true, 8);
job.setMapperClass(OWLSameAsMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(BytesWritable.class);
job.setReducerClass(OWLSameAsReducer.class);
-
+
job.waitForCompletion(true);
// System.out.println("In FilesOWLReasoner: " + job.getCounters().findCounter("synonyms", "replacements").getValue());
Counter cDerivedSynonyms = job.getCounters().findCounter("synonyms","replacements");
long currentStepDerived = cDerivedSynonyms.getValue(); // Added by WuGang 2015-01-30
- derivedTriples += currentStepDerived;
- derivedSynonyms = (currentStepDerived - previousStepDerived) > 0; // Modified by WuGang 2015-01-30
+ derivedSynonyms = currentStepDerived > 0; // Added by WuGang 2015-07-12
+// derivedTriples += currentStepDerived;
+// derivedSynonyms = (currentStepDerived - previousStepDerived) > 0; // Modified by WuGang 2015-01-30
//derivedSynonyms = currentStepDerived > 0;
- previousStepDerived = currentStepDerived; // Added by WuGang 2015-01-30
+// previousStepDerived = currentStepDerived; // Added by WuGang 2015-01-30
}
//Filter the table.
+
+ //modified 2015/5/19
long tableSize = db.getRowCountAccordingTripleType(TriplesUtils.SYNONYMS_TABLE);
// System.out.println("tableSize Ϊ : " + tableSize);
// System.out.println("sizeDictionary Ϊ : " + sizeDictionary);
// System.out.println("derivedTriples Ϊ : " + derivedTriples);
- if (tableSize > sizeDictionary || derivedTriples > 0) {
+ //modified 2015/5/19
+ if (tableSize > sizeDictionary || derivedTriples > 0) {
+ //for(int j =0 ;j <= 3 ; j++){
//1) Calculate the URIs distribution and get the first 2M.
job = MapReduceReasonerJobConfig.createNewJob(
OWLReasoner.class,
"OWL reasoner: sampling more common resources",
new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(),
+ new HashSet(), // Added by WuGang, 2015-07-12
+ step, // not used here
numMapTasks,
- numReduceTasks, true, false); // input from cassandra, but output to hdfs
+ numReduceTasks, true, false, 9); // input from cassandra, but output to hdfs
job.getConfiguration().setInt("reasoner.samplingPercentage", sampling); //Sampling at 10%
job.getConfiguration().setInt("reasoner.threshold", resourceThreshold); //Threshold resources
+ /*
+ * output to hdfs
+ */
+
job.setMapperClass(OWLSampleResourcesMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(LongWritable.class);
@@ -341,7 +429,7 @@ private long inferSameAsStatements(String[] args) {
SequenceFileOutputFormat.setOutputPath(job, commonResourcesPath);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
-
+
job.waitForCompletion(true);
@@ -378,8 +466,10 @@ private long inferSameAsStatements(String[] args) {
OWLReasoner.class,
"OWL reasoner: replace triples using the sameAs synonyms: reconstruct triples",
new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(),
+ new HashSet(), // Added by WuGang, 2015-07-12
+ step, // not used here
numMapTasks,
- numReduceTasks, false, true); // input from hdfs, but output to cassandra
+ numReduceTasks, false, true, 10); // input from hdfs, but output to cassandra
SequenceFileInputFormat.addInputPath(job, tmpPath);
job.setInputFormatClass(SequenceFileInputFormat.class);
@@ -388,6 +478,7 @@ private long inferSameAsStatements(String[] args) {
job.setMapOutputKeyClass(BytesWritable.class);
job.setMapOutputValueClass(BytesWritable.class);
job.setReducerClass(OWLSameAsReconstructReducer.class);
+
job.waitForCompletion(true);
FileSystem fs = FileSystem.get(job.getConfiguration());
@@ -401,6 +492,7 @@ private long inferSameAsStatements(String[] args) {
fs.rename(new Path(args[0] + "/dir-input"), new Path(args[0] + "/_dir-input"));
}
+ //modified 2015/5/19
sizeDictionary = tableSize;
} catch (Exception e) {
@@ -418,17 +510,19 @@ private long inferEquivalenceStatements(String[] args) throws IOException, Inter
step++;
Set filters = new HashSet();
- filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBCLASS);
- filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY);
- filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_CLASS);
- filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_PROPERTY);
+// filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBCLASS);
+// filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY);
+// filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_CLASS);
+// filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_PROPERTY);
Job job = MapReduceReasonerJobConfig.createNewJob(
OWLReasoner.class,
"OWL reasoner: infer equivalence from subclass and subprop. step " + step,
filters,
+ new HashSet(), // Added by WuGang, 20150712
+ step, // not used here
numMapTasks,
- numReduceTasks, true, true);
+ numReduceTasks, true, true, 11);
job.getConfiguration().setInt("maptasks", Math.max(job.getConfiguration().getInt("maptasks", 0) / 10, 1));
job.getConfiguration().setInt("reasoner.step", step);
@@ -436,7 +530,7 @@ private long inferEquivalenceStatements(String[] args) throws IOException, Inter
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(BytesWritable.class);
job.setReducerClass(OWLEquivalenceSCSPReducer.class);
-
+
job.waitForCompletion(true);
return job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue();
}
@@ -448,19 +542,23 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException,
boolean derivedNewStatements = true;
long totalDerivation = 0;
int previousSomeAllValuesDerivation = -1;
+ boolean firstCycle = true;
// Added by Wugang 20150111
- long countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15); // see OWLAllSomeValuesReducer
- long countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16); // see OWLAllSomeValuesReducer
+ //long countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15); // see OWLAllSomeValuesReducer
+ //long countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16); // see OWLAllSomeValuesReducer
while (derivedNewStatements) {
step++;
+
Job job = MapReduceReasonerJobConfig.createNewJob(
OWLReasoner.class,
"OWL reasoner: some and all values rule. step " + step,
new HashSet(),
+ new HashSet(),
+ step, // not used here
numMapTasks,
- numReduceTasks, true, true);
+ numReduceTasks, true, true, 12);
job.getConfiguration().setInt("reasoner.step", step);
job.getConfiguration().setInt("reasoner.previousDerivation", previousSomeAllValuesDerivation);
previousSomeAllValuesDerivation = step;
@@ -469,21 +567,48 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException,
job.setMapOutputKeyClass(BytesWritable.class);
job.setMapOutputValueClass(BytesWritable.class);
job.setReducerClass(OWLAllSomeValuesReducer.class);
-
+
job.waitForCompletion(true);
// Added by Wugang 20150111
- countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15) - countRule15; // see OWLAllSomeValuesReducer
- countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer
- totalDerivation = countRule15 + countRule16;
+ // countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15) - countRule15; // see OWLAllSomeValuesReducer
+ // countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer
+ // totalDerivation = countRule15 + countRule16;
- derivedNewStatements = (totalDerivation > 0);
+
+ Counter derivedTriples = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS");
+ long notFilteredDerivation = derivedTriples.getValue();
+ long stepDerivation = 0;
+ if (firstCycle)
+ notFilteredDerivation -= previousSomeAllValuesCycleDerivation;
+ if (notFilteredDerivation > 0) {
+ previousSomeAllValuesCycleDerivation += notFilteredDerivation;
+ //Modified by LiYang 2015/9/21
+// try {
+// db.createIndexOnInferredSteps();
+// } catch (TException e) {
+// // TODO Auto-generated catch block
+// e.printStackTrace();
+// }
+ try {
+ db.createIndexOnInferredSteps();
+ } catch (TException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ stepDerivation = db.getRowCountAccordingInferredSteps(step - 1);
+ totalDerivation += stepDerivation;
+ derivedNewStatements = stepDerivation > 0;
+ } else {
+ derivedNewStatements = false;
+ }
+ firstCycle = false;
}
// Added by Wugang 20150111
- countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15) - countRule15; // see OWLAllSomeValuesReducer
- countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer
- totalDerivation = countRule15 + countRule16;
+ //countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15) - countRule15; // see OWLAllSomeValuesReducer
+ //countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer
+ //totalDerivation = countRule15 + countRule16;
return totalDerivation;
}
@@ -495,16 +620,18 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup
step++;
// Added by Wugang 20150111
- long countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a); // see OWLAllSomeValuesReducer
- long countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b); // see OWLAllSomeValuesReducer
+ //long countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a); // see OWLAllSomeValuesReducer
+ //long countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b); // see OWLAllSomeValuesReducer
Job job = MapReduceReasonerJobConfig.createNewJob(
OWLReasoner.class,
"OWL reasoner: hasValue rule. step " + step,
new HashSet(),
+ new HashSet(),
+ step, // not used here
numMapTasks,
- numReduceTasks, true, true);
+ numReduceTasks, true, true, 13);
long schemaOnPropertySize = db.getRowCountAccordingTripleType(TriplesUtils.SCHEMA_TRIPLE_ON_PROPERTY);
if (schemaOnPropertySize == 0)
@@ -523,9 +650,29 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup
// Get inferred count
if (job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue() > 0) {
- countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a) - countRule14a; // see OWLAllSomeValuesReducer
- countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b) - countRule14b; // see OWLAllSomeValuesReducer
- return(countRule14a + countRule14b);
+ // countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a) - countRule14a; // see OWLAllSomeValuesReducer
+ // countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b) - countRule14b; // see OWLAllSomeValuesReducer
+ // return(countRule14a + countRule14b);
+ try {
+ db.createIndexOnInferredSteps();
+ } catch (InvalidRequestException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (UnavailableException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (TimedOutException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (SchemaDisagreementException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (TException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ long stepDerivation = db.getRowCountAccordingInferredSteps(step - 1);
+ return stepDerivation;
} else {
return 0;
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java
index 5b02e6f..0462b42 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java
@@ -17,9 +17,9 @@
import cn.edu.neu.mitt.mrj.utils.FileUtils;
import cn.edu.neu.mitt.mrj.utils.NumberUtils;
import cn.edu.neu.mitt.mrj.utils.TriplesUtils;
-
import cn.edu.neu.mitt.mrj.data.Triple;
import cn.edu.neu.mitt.mrj.data.TripleSource;
+import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB;
public class OWLSameAsDeconstructMapper extends Mapper {
@@ -82,8 +82,8 @@ public void map(TripleSource key, Triple value, Context context) throws IOExcept
context.write(oKey, oValue);
- //�����oKey��һ����Դ����ν�����п��ܣ�����sameas���͵Ļ��϶���һ����������Ҫ�ֱ���Ϊ��ν�����Σ�
- //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte
+ //�����oKey��һ����Դ����ν�����п��ܣ�����sameas���͵Ļ��϶���һ����������Ҫ�ֱ���Ϊ��ν�����Σ�
+ //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte
++tripleId;
}
@@ -92,7 +92,7 @@ public void map(TripleSource key, Triple value, Context context) throws IOExcept
@Override
public void setup(Context context) {
oValue = new BytesWritable(bValue);
-
+
try {
String taskId = context.getConfiguration().get("mapred.task.id").substring(context.getConfiguration().get("mapred.task.id").indexOf("_m_") + 3);
taskId = taskId.replaceAll("_", "");
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java
index fa3135e..8d1a1a5 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java
@@ -10,6 +10,7 @@
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;
+import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB;
import cn.edu.neu.mitt.mrj.utils.NumberUtils;
public class OWLSameAsDeconstructReducer extends Reducer {
@@ -17,7 +18,7 @@ public class OWLSameAsDeconstructReducer extends Reducer storage = new LinkedList();
@@ -38,9 +39,9 @@ public void reduce(LongWritable key, Iterable values, Context con
byte[] bValue = iValue.getBytes();
// System.out.println("In processing things before storage, size of iValue is: " + iValue.getLength());
// System.out.println("In processing things before storage, size of bValue is: " + bValue.length);
- // ���ڲ�һ���ں�ʱ��������bValue[0]=4���Ǹ�key��value��
- // ���������֮ǰ�Ƚ����滻��resource������storage�ﱣ����������һ��������֮��Ϳ���ֱ�ӽ����滻�ˣ���value�滻
- // ���(�����whileѭ��֮��)�ٰ�storage���汣���ֵ�����滻һ�¡�
+ // ���ڲ�һ���ں�ʱ��������bValue[0]=4���Ǹ�key��value��
+ // ���������֮ǰ�Ƚ����滻��resource������storage�ﱣ����������һ��������֮��Ϳ���ֱ�ӽ����滻�ˣ���value�滻
+ // ���(�����whileѭ��֮��)�ٰ�storage���汣���ֵ�����滻һ�¡�
if (bValue[0] == 4) {//Same as
long resource = NumberUtils.decodeLong(bValue, 1);
replacement = true;
@@ -54,14 +55,14 @@ public void reduce(LongWritable key, Iterable values, Context con
byte[] bTempValue = new byte[15+8]; // Added by WuGang
System.arraycopy(bValue, 0, bTempValue, 0, 15); // Added by WuGang
System.arraycopy(bOriginalResource, 0, bTempValue, 15, 8); // Added by WuGang
- iValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource
+ iValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource
context.write(oKey, iValue);
countOutput++;
context.getCounter("reasoner", "substitutions").increment(1);
}
}
- Iterator itr2 = storage.iterator(); //���storageΪ����˵���������������û�п�����sameas���滻�Ķ���
+ Iterator itr2 = storage.iterator(); //���storageΪ����˵���������������û�п�����sameas���滻�Ķ���
while (itr2.hasNext()) {
byte[] bValue = itr2.next();
oValue.set(bValue, 0, bValue.length);
@@ -70,15 +71,19 @@ public void reduce(LongWritable key, Iterable values, Context con
// System.out.println("In processing things in storage, size of bValue is: " + bValue.length);
System.arraycopy(bValue, 0, bTempValue, 0, 15); // Added by WuGang
System.arraycopy(bOriginalResource, 0, bTempValue, 15, 8); // Added by WuGang
- oValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource
+ oValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource
context.write(oKey, oValue);
}
- //�����oKey��һ����Դ������sameas����������Ķ����滻���ˣ���ʵ�����Ѿ�����sameas�滻����ˣ�
- //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte
+ //�����oKey��һ����Դ������sameas����������Ķ����滻���ˣ���ʵ�����Ѿ�����sameas�滻����ˣ�
+ //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte
if (replacement) { //Increment counter of replacements
context.getCounter("reasoner", "substitutions").increment(countOutput + storage.size());
}
}
+ public void setup(Context context) throws IOException, InterruptedException{
+ CassandraDB.setConfigLocation();
+
+ }
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java
index a526c85..ed4b73f 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java
@@ -29,7 +29,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup
/* Source triple: s owl:sameAs o */
long olKey = 0;
long olValue = 0;
- if (value.getSubject() > value.getObject()) { //key�����Ǵ�ֵ��value����Сֵ
+ if (value.getSubject() > value.getObject()) { //key�����Ǵ�ֵ��value����Сֵ
olKey = value.getSubject();
olValue = value.getObject();
} else {
@@ -37,18 +37,21 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup
olValue = value.getSubject();
}
- // ����С�Ǹ�ֵ��ʶÿһ����
+ // ����С�Ǹ�ֵ��ʶÿһ����
oKey.set(olKey);
bValue[0] = 0;
NumberUtils.encodeLong(bValue, 1, olValue);
oValue.set(bValue, 0, bValue.length);
- context.write(oKey, oValue); //����key�Ǵ�ֵ��value��Сֵ����������Ե�֪ÿһ��resource�������ĸ���
+ context.write(oKey, oValue); //����key�Ǵ�ֵ��value��Сֵ����������Ե�֪ÿһ��resource�������ĸ���
oKey.set(olValue);
bValue[0] = 1;
NumberUtils.encodeLong(bValue, 1, olKey);
oValue.set(bValue, 0, bValue.length);
- context.write(oKey, oValue); //����key��Сֵ��value�Ǵ�ֵ����������Ե�֪ÿһ�����а�����Щresource
+ context.write(oKey, oValue); //����key��Сֵ��value�Ǵ�ֵ����������Ե�֪ÿһ�����а�����Щresource
+ }
+ public void setup(Context context) throws IOException{
+
}
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java
index 827e360..887503b 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java
@@ -8,6 +8,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB;
import cn.edu.neu.mitt.mrj.utils.NumberUtils;
public class OWLSameAsReconstructMapper extends Mapper {
@@ -17,25 +18,26 @@ public class OWLSameAsReconstructMapper extends Mapper values, Context context) throws IOException, InterruptedException {
// System.out.println("In OWLSameAsReconstructReducer!!!");
@@ -28,31 +28,31 @@ public void reduce(BytesWritable key, Iterable values, Context co
oKey.setDerivation(bKey[12]);
int elements = 0;
- Iterator itr = values.iterator(); ////�����1�ֽڿ�ʼ����owlsameas�������0�ֽڱ�������ڱ������滻��λ�ã����ܵ�ֵ��0,1,2,3,4��4�DZ�ʾν����owl:sameas������Ϊ��owl:sameas������0��ʾ���1��ʾν�2��3��ʾ���
+ Iterator itr = values.iterator(); ////�����1�ֽڿ�ʼ����owlsameas�������0�ֽڱ�������ڱ������滻��λ�ã����ܵ�ֵ��0,1,2,3,4��4�DZ�ʾν����owl:sameas������Ϊ��owl:sameas������0��ʾ���1��ʾν�2��3��ʾ���
while (itr.hasNext()) {
elements++;
byte[] bValue = itr.next().getBytes();
- long resource = NumberUtils.decodeLong(bValue, 1); //�������owlsameas��������潫���������滻��
- long originalResource = NumberUtils.decodeLong(bValue, 9); // Added by Wugang, �����滻ǰ��resource
+ long resource = NumberUtils.decodeLong(bValue, 1); //�������owlsameas��������潫���������滻��
+ long originalResource = NumberUtils.decodeLong(bValue, 9); // Added by Wugang, �����滻ǰ��resource
switch (bValue[0]) {
case 0:
- oValue.setSubject(resource); //�滻����
- oValue.setRsubject(originalResource); // Added by Wugang, ԭʼ����
+ oValue.setSubject(resource); //�滻����
+ oValue.setRsubject(originalResource); // Added by Wugang, ԭʼ����
// System.out.println("Replacing subject: " + resource);
break;
case 1:
- oValue.setPredicate(resource); //�滻ν��
- oValue.setRpredicate(originalResource); // Added by Wugang, ԭʼν��
+ oValue.setPredicate(resource); //�滻ν��
+ oValue.setRpredicate(originalResource); // Added by Wugang, ԭʼν��
// System.out.println("Replacing predicate: " + resource);
break;
- case 2: //�滻����
- case 3: //�滻����
+ case 2: //�滻����
+ case 3: //�滻����
if (bValue[0] == 2)
oValue.setObjectLiteral(false);
else
oValue.setObjectLiteral(true);
oValue.setObject(resource);
- oValue.setRobject(originalResource); // Added by Wugang, ԭʼ����
+ oValue.setRobject(originalResource); // Added by Wugang, ԭʼ����
// System.out.println("Replacing object: " + resource);
break;
default:
@@ -61,24 +61,24 @@ public void reduce(BytesWritable key, Iterable values, Context co
}
if (elements == 3){
- // Added by WuGang, ���rule11
+ // Added by WuGang, ���rule11
// oValue.setRsubject(rsubject)
if ((oValue.getSubject() == oValue.getRsubject())
&& (oValue.getPredicate() == oValue.getRpredicate())
&& (oValue.getObject() == oValue.getRobject()))
- oValue.setType(TriplesUtils.OWL_HORST_NA); // �滻ǰ��û�б仯������һ��sameasrule
+ oValue.setType(TriplesUtils.OWL_HORST_NA); // �滻ǰ��û�б仯������һ��sameasrule
else {
if ((oValue.getPredicate() == TriplesUtils.OWL_SAME_AS)
&& (oValue.getRpredicate() == TriplesUtils.OWL_SAME_AS))
- oValue.setType(TriplesUtils.OWL_HORST_7); // Ӧ��OWL Horst����7
+ oValue.setType(TriplesUtils.OWL_HORST_7); // Ӧ��OWL Horst����7
else
- oValue.setType(TriplesUtils.OWL_HORST_11); // Ӧ��OWL
- // Horst����11
+ oValue.setType(TriplesUtils.OWL_HORST_11); // Ӧ��OWL
+ // Horst����11
}
// System.out.println("Find a complete replacment of triple: " + oValue);
- CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context);
+ CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context);
// context.write(oKey, oValue);
}
}
@@ -86,5 +86,13 @@ public void reduce(BytesWritable key, Iterable values, Context co
@Override
public void setup(Context context) throws IOException {
CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around.
+
+ }
+
+ @Override
+ protected void cleanup(
+ Reducer, List>.Context context)
+ throws IOException, InterruptedException {
+ super.cleanup(context);
}
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java
index a7988da..83fbdf7 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java
@@ -25,7 +25,7 @@ public class OWLSameAsReducer extends Reducer duplicates = new HashSet();
private List storage = new LinkedList();
-
+
@Override
public void reduce(LongWritable key, Iterable values, Context context) throws IOException, InterruptedException {
@@ -44,12 +44,12 @@ public void reduce(LongWritable key, Iterable values, Context con
BytesWritable value = itr.next();
long lValue = NumberUtils.decodeLong(value.getBytes(), 1);
// System.out.println("processing " + lValue + " with the first byte is: " + value.getBytes()[0]);
- if (value.getBytes()[0] != 0) { // 1��ÿһ��value����һ����Ա
+ if (value.getBytes()[0] != 0) { // 1��ÿһ��value����һ����Ա
//Store in-memory
storage.add(lValue);
// System.out.println("Storage size is: " + storage.size());
//}
- } else { // 0���ϲ�һ��resource�����ĸ����飨valueֵ��
+ } else { // 0���ϲ�һ��resource�����ĸ����飨valueֵ��
// System.out.println("Prepare to repalce: lValue is " + lValue + " and oValue.getSubject() is " + oValue.getSubject());
if (lValue < oValue.getSubject()) {
// System.out.println("Hahahahah, I'm here!");
@@ -65,7 +65,7 @@ public void reduce(LongWritable key, Iterable values, Context con
long lValue = itr2.next();
if (!duplicates.contains(lValue)) {
oValue.setObject(lValue);
- CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context);
+ CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context);
duplicates.add(lValue);
}
}
@@ -91,4 +91,11 @@ public void setup(Context context) {
oKey.setDerivation(TripleSource.OWL_DERIVED);
oKey.setStep(context.getConfiguration().getInt("reasoner.step", 0));
}
+
+ @Override
+ protected void cleanup(
+ Reducer, List>.Context context)
+ throws IOException, InterruptedException {
+ super.cleanup(context);
+ }
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java
index ace1796..2c8aa57 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java
@@ -45,6 +45,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup
}
public void setup(Context context) {
+
threshold = context.getConfiguration().getInt("reasoner.samplingPercentage", 0);
}
}
\ No newline at end of file
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java
index d2c658e..50dfe04 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java
@@ -6,6 +6,8 @@
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;
+import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB;
+
public class OWLSampleResourcesReducer extends Reducer {
//private static Logger log = LoggerFactory.getLogger(OWLSampleResourcesReducer.class);
@@ -34,6 +36,8 @@ public void reduce(LongWritable key, Iterable values, Context cont
@Override
public void setup(Context context) {
+ CassandraDB.setConfigLocation();
+
threshold = context.getConfiguration().getInt("reasoner.threshold", 0);
}
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java
index b2b04bd..09232eb 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java
@@ -28,7 +28,9 @@ public class OWLTransitivityMapper extends Mapper minLevel) {
+ if (level > minLevel) {
NumberUtils.encodeLong(keys,0,value.getPredicate());
NumberUtils.encodeLong(keys,8,value.getSubject());
oKey.set(keys, 0, 16);
@@ -63,19 +65,20 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup
values[0] = 3;
else
values[0] = 2;
- NumberUtils.encodeLong(values, 1, step);
+ NumberUtils.encodeLong(values, 1, level);
NumberUtils.encodeLong(values, 9, value.getObject());
oValue.set(values, 0, 17);
context.write(oKey, oValue);
}
- //����u p w, w p v���������key��(p, w),�����value��(value[0], key.getStep(), value.getObject)
+ //����u p w, w p v���������key��(p, w),�����value��(value[0], key.getStep(), value.getObject)
}
}
@Override
public void setup(Context context) {
+
level = context.getConfiguration().getInt("reasoning.transitivityLevel", 0);
baseLevel = context.getConfiguration().getInt("reasoning.baseLevel", 0) - 1;
minLevel = Math.max(1, (int)Math.pow(2,level - 2)) + baseLevel;
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java
index 7ad71eb..beb7b8d 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java
@@ -69,9 +69,9 @@ public void reduce(BytesWritable key, Iterable values, Context co
triple.setPredicate(NumberUtils.decodeLong(key.getBytes(),0));
- // Added by WuGang,���extended triple������Ϊu p w,����w��һ���ؼ���resource�������ع�ԭʼ��ruleǰ��
+ // Added by WuGang,���extended triple������Ϊu p w,����w��һ���ؼ��resource�������ع�ԭʼ��ruleǰ��
triple.setType(TriplesUtils.OWL_HORST_4);
-// triple.setRsubject(rsubject); // �������������������ģ���μ�����Ĵ���
+// triple.setRsubject(rsubject); // �����������������ģ���μ�����Ĵ���
triple.setRpredicate(NumberUtils.decodeLong(key.getBytes(),0));
triple.setRobject(NumberUtils.decodeLong(key.getBytes(), 8));
@@ -87,13 +87,15 @@ public void reduce(BytesWritable key, Iterable values, Context co
triple.setSubject(entry.getKey());
triple.setObject(entry2.getKey());
- // Added by Wugang, ���extended triple��ʵ�������rsubject�費���ö�����ν����Ϊ�˱��������������ð�
- triple.setRsubject(triple.getSubject()); // ��Ϊ��ѡȡu p w��Ϊ���triple��������е�u�������������ĺ��������
+ // Added by Wugang, ���extended triple��ʵ�������rsubject�費���ö�����ν����Ϊ�˱������������ð�
+ triple.setRsubject(triple.getSubject()); // ��Ϊ��ѡȡu p w��Ϊ���triple��������е�u������������ĺ��������
- source.setStep((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel));
+ // Modified by WuGang, 2015-07-15
+ //source.setStep((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel));
+ source.setTransitiveLevel((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel));
// context.write(source, triple);
- CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
+ CassandraDB.writeJustificationToMapReduceContext(triple, source, context);
// System.out.println("In OWLTransitivityReducer: " + triple);
}
@@ -104,12 +106,19 @@ public void reduce(BytesWritable key, Iterable values, Context co
@Override
public void setup(Context context) {
CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around.
-
baseLevel = context.getConfiguration().getInt("reasoning.baseLevel", 1) - 1;
level = context.getConfiguration().getInt("reasoning.transitivityLevel", -1);
// Modified by WuGang 2015-01-28
//source.setDerivation(TripleSource.OWL_DERIVED);
+ source.setStep(baseLevel + 1); // Added by WuGang, 2015-07-15
source.setDerivation(TripleSource.TRANSITIVE_ENABLED);
triple.setObjectLiteral(false);
}
+
+ @Override
+ protected void cleanup(
+ Reducer, List>.Context context)
+ throws IOException, InterruptedException {
+ super.cleanup(context);
+ }
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java
index b112445..d709301 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java
@@ -2,9 +2,20 @@
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.util.HashSet;
+import java.util.List;
import java.util.Set;
+import org.apache.cassandra.hadoop.ColumnFamilyOutputFormat;
+import org.apache.cassandra.hadoop.ConfigHelper;
+import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat;
+import org.apache.cassandra.hadoop.cql3.CqlConfigHelper;
+import org.apache.cassandra.hadoop.cql3.CqlOutputFormat;
+import org.apache.cassandra.thrift.InvalidRequestException;
+import org.apache.cassandra.thrift.SchemaDisagreementException;
+import org.apache.cassandra.thrift.TimedOutException;
+import org.apache.cassandra.thrift.UnavailableException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.BytesWritable;
@@ -13,11 +24,14 @@
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
+import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB;
import cn.edu.neu.mitt.mrj.reasoner.MapReduceReasonerJobConfig;
import cn.edu.neu.mitt.mrj.utils.TriplesUtils;
+//import org.apache.hadoop.mapred.lib.MultipleOutputs;
public class RDFSReasoner extends Configured implements Tool {
@@ -26,7 +40,7 @@ public class RDFSReasoner extends Configured implements Tool {
private int numReduceTasks = -1;
public static int step = 0;
private int lastExecutionPropInheritance = -1;
- private int lastExecutionDomRange = -1;
+ private int lastExecutionDomRange = -1;
private void parseArgs(String[] args) {
@@ -69,22 +83,25 @@ public static void main(String[] args) {
// The derivation will be launched in run()
- public long launchDerivation(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
+ public long launchDerivation(String[] args) throws IOException, InterruptedException, ClassNotFoundException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException {
long time = System.currentTimeMillis();
-
parseArgs(args);
Job job = null;
long derivation = 0;
-
+
+
// RDFS subproperty inheritance reasoning
// job = createNewJob("RDFS subproperty inheritance reasoning", "FILTER_ONLY_HIDDEN");
job = MapReduceReasonerJobConfig.createNewJob(
RDFSReasoner.class,
"RDFS subproperty inheritance reasoning",
new HashSet(),
+ new HashSet(), // Added by WuGang, 2015-07-13
+ step, // not used here
numMapTasks,
- numReduceTasks, true, true);
+ numReduceTasks, true, true, 1);
+
job.setMapperClass(RDFSSubPropInheritMapper.class);
job.setMapOutputKeyClass(BytesWritable.class);
job.setMapOutputValueClass(LongWritable.class);
@@ -93,10 +110,12 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep
job.getConfiguration().setInt("lastExecution.step", lastExecutionPropInheritance);
lastExecutionPropInheritance = step;
//TODO: configureOutputJob(job, args[0], "dir-rdfs-derivation/dir-subprop-inherit");
+
job.waitForCompletion(true);
long propInheritanceDerivation = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue();
derivation += propInheritanceDerivation;
+
// RDFS subproperty domain and range reasoning
// job = createNewJob("RDFS subproperty domain and range reasoning", "FILTER_ONLY_HIDDEN");
@@ -104,14 +123,17 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep
RDFSReasoner.class,
"RDFS subproperty domain and range reasoning",
new HashSet(),
+ new HashSet(), // Added by WuGang, 2015-07-13
+ step, // not used here
numMapTasks,
- numReduceTasks, true, true);
+ numReduceTasks, true, true, 2);
job.setMapperClass(RDFSSubPropDomRangeMapper.class);
job.setMapOutputKeyClass(BytesWritable.class); // Modified by WuGang, 2010-08-26
job.setMapOutputValueClass(LongWritable.class);
//job.setPartitionerClass(MyHashPartitioner.class); // Is this ok? seems not necessary
job.setReducerClass(RDFSSubpropDomRangeReducer.class);
job.getConfiguration().setInt("reasoner.step", ++step);
+
job.getConfiguration().setInt("lastExecution.step", lastExecutionDomRange);
lastExecutionDomRange = step;
//TODO: configureOutputJob(job, args[0], "dir-rdfs-derivation/dir-subprop-domain-range");
@@ -122,7 +144,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep
// RDFS cleaning up subprop duplicates
// We remove it for simplicity. That means we will not support stop and restart from breakpoints
-
+
//RDFS subclass reasoning
// job = createNewJob("RDFS subclass reasoning", "FILTER_ONLY_TYPE_SUBCLASS");
@@ -132,13 +154,16 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep
RDFSReasoner.class,
"RDFS subclass reasoning",
filters,
+ new HashSet(), // Added by WuGang, 2015-07-13
+ step, // not used here
numMapTasks,
- numReduceTasks, true, true);
+ numReduceTasks, true, true, 3);
job.setMapperClass(RDFSSubclasMapper.class);
job.setMapOutputKeyClass(BytesWritable.class);
job.setMapOutputValueClass(LongWritable.class);
job.setReducerClass(RDFSSubclasReducer.class);
job.getConfiguration().setInt("reasoner.step", ++step);
+
// configureOutputJob(job, args[0], "dir-rdfs-output/dir-subclass-" + step);
job.waitForCompletion(true);
derivation += job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue();
@@ -163,14 +188,16 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep
RDFSReasoner.class,
"RDFS special properties reasoning",
filters,
+ new HashSet(), // Added by WuGang, 2015-07-13
+ step, // not used here
numMapTasks,
- numReduceTasks, true, true);
+ numReduceTasks, true, true, 4);
job.setMapperClass(RDFSSpecialPropsMapper.class);
job.setMapOutputKeyClass(BytesWritable.class);
job.setMapOutputValueClass(LongWritable.class);
job.setReducerClass(RDFSSpecialPropsReducer.class);
job.getConfiguration().setInt("reasoner.step", ++step);
-
+
// configureOutputJob(job, args[0], "dir-rdfs-output/dir-special-props-" + step);
job.waitForCompletion(true);
derivation += job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue();
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java
index 065a112..9a8e1b4 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java
@@ -87,8 +87,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup
}
@Override
- public void setup(Context context) throws IOException {
-
+ public void setup(Context context) throws IOException {
try{
CassandraDB db = new CassandraDB();
if (memberProperties == null) {
@@ -111,6 +110,9 @@ public void setup(Context context) throws IOException {
filters.add(TriplesUtils.SCHEMA_TRIPLE_LITERAL_SUBCLASS);
db.loadSetIntoMemory(literalSubclasses, filters, -1);
}
+
+ db.CassandraDBClose();
+
} catch(TTransportException tte){
tte.printStackTrace();
} catch (InvalidRequestException e) {
@@ -124,5 +126,16 @@ public void setup(Context context) throws IOException {
} catch (TException e) {
e.printStackTrace();
}
+
}
+
+// protected void cleanup(Context context) throws IOException, InterruptedException{
+// try {
+// CassandraDB db = new CassandraDB();
+// db.UnIndex();
+// db.CassandraDBClose();
+// } catch (Exception e) {
+// // TODO: handle exception
+// }
+// }
}
diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java
index dd87d22..34913b0 100644
--- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java
+++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java
@@ -2,10 +2,13 @@
import java.io.IOException;
import java.nio.ByteBuffer;
+import java.util.ArrayList;
import java.util.Iterator;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import org.apache.cassandra.thrift.Cassandra;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;
@@ -20,11 +23,19 @@ public class RDFSSpecialPropsReducer extends Reducer keys = new LinkedHashMap();
+ private Map allkeys = new LinkedHashMap();
+ private List allvariables = new ArrayList