diff --git a/mrj-0.1/.classpath b/mrj-0.1/.classpath index 569943f..b1b26f7 100644 --- a/mrj-0.1/.classpath +++ b/mrj-0.1/.classpath @@ -1,10 +1,12 @@ - - - - - + + + + + + + diff --git a/mrj-0.1/.gitignore b/mrj-0.1/.gitignore new file mode 100644 index 0000000..f8d886b --- /dev/null +++ b/mrj-0.1/.gitignore @@ -0,0 +1,2 @@ +/bin +/bin/ diff --git a/mrj-0.1/.idea/.name b/mrj-0.1/.idea/.name new file mode 100644 index 0000000..bb04bae --- /dev/null +++ b/mrj-0.1/.idea/.name @@ -0,0 +1 @@ +mrj-0.1 \ No newline at end of file diff --git a/mrj-0.1/.idea/compiler.xml b/mrj-0.1/.idea/compiler.xml new file mode 100644 index 0000000..a852314 --- /dev/null +++ b/mrj-0.1/.idea/compiler.xml @@ -0,0 +1,23 @@ + + + + + \ No newline at end of file diff --git a/mrj-0.1/.idea/copyright/profiles_settings.xml b/mrj-0.1/.idea/copyright/profiles_settings.xml new file mode 100644 index 0000000..e7bedf3 --- /dev/null +++ b/mrj-0.1/.idea/copyright/profiles_settings.xml @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/mrj-0.1/.idea/encodings.xml b/mrj-0.1/.idea/encodings.xml new file mode 100644 index 0000000..d821048 --- /dev/null +++ b/mrj-0.1/.idea/encodings.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/mrj-0.1/.idea/misc.xml b/mrj-0.1/.idea/misc.xml new file mode 100644 index 0000000..1a5ae83 --- /dev/null +++ b/mrj-0.1/.idea/misc.xml @@ -0,0 +1,178 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1.8 + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mrj-0.1/.idea/modules.xml b/mrj-0.1/.idea/modules.xml new file mode 100644 index 0000000..39bb12e --- /dev/null +++ b/mrj-0.1/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/mrj-0.1/.idea/scopes/scope_settings.xml b/mrj-0.1/.idea/scopes/scope_settings.xml new file mode 100644 index 0000000..922003b --- /dev/null +++ b/mrj-0.1/.idea/scopes/scope_settings.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/mrj-0.1/.idea/vcs.xml b/mrj-0.1/.idea/vcs.xml new file mode 100644 index 0000000..6564d52 --- /dev/null +++ b/mrj-0.1/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/mrj-0.1/.idea/workspace.xml b/mrj-0.1/.idea/workspace.xml new file mode 100644 index 0000000..a30b4df --- /dev/null +++ b/mrj-0.1/.idea/workspace.xml @@ -0,0 +1,341 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1426120853528 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mrj-0.1/.settings/org.eclipse.core.resources.prefs b/mrj-0.1/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..83eb0de --- /dev/null +++ b/mrj-0.1/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,3 @@ +eclipse.preferences.version=1 +encoding//src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java=UTF-8 +encoding//src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java=UTF-8 diff --git a/mrj-0.1/.settings/org.eclipse.jdt.core.prefs b/mrj-0.1/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000..7341ab1 --- /dev/null +++ b/mrj-0.1/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,11 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.7 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.source=1.7 diff --git a/mrj-0.1/mrj-0.1.iml b/mrj-0.1/mrj-0.1.iml new file mode 100644 index 0000000..017bf6e --- /dev/null +++ b/mrj-0.1/mrj-0.1.iml @@ -0,0 +1,533 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java index 8d5c320..afbc721 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/data/TripleSource.java @@ -16,17 +16,20 @@ public class TripleSource implements WritableComparable { byte derivation = 0; int step = 0; + int transitive_level = 0; @Override public void readFields(DataInput in) throws IOException { derivation = in.readByte(); step = in.readInt(); + transitive_level = in.readInt(); } @Override public void write(DataOutput out) throws IOException { out.write(derivation); out.writeInt(step); + out.writeInt(transitive_level); } @Override @@ -47,6 +50,14 @@ public void setStep(int step) { this.step = step; } + public int getTransitiveLevel() { + return transitive_level; + } + + public void setTransitiveLevel(int level) { + this.transitive_level = level; + } + public void setDerivation(byte ruleset) { derivation = ruleset; } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java index c9e2781..db2cad6 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/FilesImportTriples.java @@ -101,7 +101,7 @@ public void parseArgs(String[] args) { } public void sampleCommonResources(String[] args) throws Exception { -// System.out.println("��sampleCommonResources�����С�"); +// System.out.println("��sampleCommonResources�����С�"); Job job = createNewJob("Sample common resources"); //Input @@ -127,7 +127,7 @@ public void sampleCommonResources(String[] args) throws Exception { } public void assignIdsToNodes(String[] args) throws Exception { -// System.out.println("��assignIdsToNodes�����С�"); +// System.out.println("��assignIdsToNodes�����С�"); Job job = createNewJob("Deconstruct statements"); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); @@ -156,7 +156,7 @@ public void assignIdsToNodes(String[] args) throws Exception { } private void rewriteTriples(String[] args) throws Exception { -// System.out.println("��rewriteTriples�����С�"); +// System.out.println("��rewriteTriples�����С�"); Job job = createNewJob("Reconstruct statements"); @@ -188,13 +188,15 @@ private void rewriteTriples(String[] args) throws Exception { // is it useful below line? //job.getConfiguration().set(CASSANDRA_PRIMARY_KEY, "(sub, pre, obj)"); + /* + * 这个地方设置成了0, map那个地方add的时候就应该少加一列元素。 + */ String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + - " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; - + " SET " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "= null" + ","+ CassandraDB.COLUMN_INFERRED_STEPS + "=0"; CqlConfigHelper.setOutputCql(job.getConfiguration(), query); - ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); - ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner"); + ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); + ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); //Launch long time = System.currentTimeMillis(); @@ -223,6 +225,22 @@ public static void main(String[] args) throws Exception { long time = System.currentTimeMillis(); int res = ToolRunner.run(new Configuration(), new FilesImportTriples(), args); // log.info("Import time: " + (System.currentTimeMillis() - time)); +// +// //Modified by LiYang 2015/4/10 +// CassandraDB db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); +// db.init(); +// // Modified +// db.createIndexOnTripleType(); +// //db.createIndexOnRule(); +// +// /* +// * Add by LiYang +// * 2015.7.19 +// */ +// //db.createIndexOnInferredSteps(); +// //db.createIndexOnTransitiveLevel(); +// db.CassandraDBClose(); + System.out.println("Import time: " + (System.currentTimeMillis() - time)); System.exit(res); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java index b2d64d0..6aca38d 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesDeconstructReducer.java @@ -68,7 +68,7 @@ public void reduce(Text key, Iterable values, Context context)thr protected void setup(Context context) throws IOException, InterruptedException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. try { - db = new CassandraDB("localhost", 9160); + db = new CassandraDB(); db.init(); } catch (InvalidRequestException e) { e.printStackTrace(); @@ -89,4 +89,9 @@ protected void setup(Context context) throws IOException, InterruptedException { counter = (Long.valueOf(taskId) + 1) << 32; log.debug("Start counter " + (Long.valueOf(taskId) + 1)); } + + protected void cleanup(Context context) throws IOException, InterruptedException{ + db.CassandraDBClose(); + } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java index 4b7acc3..955693f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesReconstructReducerToCassandra.java @@ -2,7 +2,7 @@ * Project Name: mrj-0.1 * File Name: ImportTriplesReconstructReducerToCassandra.java * @author Gang Wu - * 2014��10��28�� ����10:35:24 + * 2014锟斤拷10锟斤拷28锟斤拷 锟斤拷锟斤拷10:35:24 * * Description: * Send reducer output to Cassandra DB by representing triples with ids @@ -16,8 +16,11 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.UUID; +import org.apache.cassandra.cli.CliParser.rowKey_return; import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.utils.UUIDGen; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; import org.slf4j.Logger; @@ -28,6 +31,7 @@ import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; + /** * @author gibeo_000 * @@ -78,7 +82,7 @@ protected void reduce(LongWritable key, Iterable values, Context c } if (counter != 3) { - // Modified by WuGang 2010-12-3, ��������3Ԫ����֣�����Ҫ������ + // Modified by WuGang 2010-12-3, 锟斤拷锟�?锟斤拷3元锟斤拷锟斤拷郑锟斤拷锟斤拷锟揭拷锟斤拷锟斤拷锟� log.error("Found a non-triple when reconstructing. The count num is " + counter + ", and triple is " + oValue); // throw new IOException("Triple is not reconstructed!"); } @@ -89,6 +93,35 @@ protected void reduce(LongWritable key, Iterable values, Context c byte one = 1; byte zero = 0; +// /* +// keys.put("sub", ByteBufferUtil.bytes(oValue.getSubject())); +// keys.put("pre", ByteBufferUtil.bytes(oValue.getPredicate())); +// keys.put("obj", ByteBufferUtil.bytes(oValue.getObject())); +// // the length of boolean type in cassandra is one byte!!!!!!!! +// keys.put(CassandraDB.COLUMN_IS_LITERAL, oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); +// keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); +//// keys.put("id", ByteBufferUtil.bytes(UUIDGen.getTimeUUID())); +// */ +// +// keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(oValue.getSubject())); +// keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(oValue.getPredicate())); +// keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(oValue.getObject())); +// keys.put(CassandraDB.COLUMN_IS_LITERAL, oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); +// keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); +// +// +// // Prepare variables, here is a boolean value for CassandraDB.COLUMN_IS_LITERAL +// List variables = new ArrayList(); +//// variables.add(ByteBufferUtil.bytes(oValue.getSubject())); +// // the length of boolean type in cassandra is one byte!!!!!!!! +// // For column inferred, init it as false i.e. zero +//// variables.add(ByteBuffer.wrap(new byte[]{zero})); +// variables.add(oValue.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); +// variables.add(ByteBufferUtil.bytes(TriplesUtils.getTripleType(source, oValue.getSubject(), oValue.getPredicate(), oValue.getObject()))); +// +// context.write(keys, variables); + + // Prepare composite key (sub, pre, obj) keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(oValue.getSubject())); keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(oValue.getPredicate())); @@ -107,7 +140,8 @@ protected void reduce(LongWritable key, Iterable values, Context c // the length of boolean type in cassandra is one byte!!!!!!!! // For column inferred, init it as false i.e. zero // variables.add(ByteBuffer.wrap(new byte[]{zero})); - variables.add(ByteBufferUtil.bytes(0)); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple +// variables.add(ByteBufferUtil.bytes(0)); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple +// variables.add(ByteBufferUtil.bytes(0)); // Added by WuGang, 2015-07-15, to support transitive level context.write(keys, variables); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java index c1153f9..8614816 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleMapper.java @@ -19,7 +19,7 @@ public class ImportTriplesSampleMapper extends Mapper preloadedURIs = TriplesUtils.getInstance().getPreloadedURIs(); protected void map(Text key, Text value, Context context) { - System.out.println("��ImportTriplesSampleMapper��"); + //System.out.println("��ImportTriplesSampleMapper��"); try { String[] uris = TriplesUtils.parseTriple(value.toString(), key.toString()); for(String uri : uris) { diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java index 56f33a1..bb81c8a 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/importtriples/ImportTriplesSampleReducer.java @@ -55,7 +55,7 @@ public void reduce(Text key, Iterable values, Context context) th protected void setup(Context context) throws IOException, InterruptedException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. try { - db = new CassandraDB("localhost", 9160); + db = new CassandraDB(); db.init(); } catch (InvalidRequestException e) { e.printStackTrace(); @@ -77,4 +77,7 @@ protected void setup(Context context) throws IOException, InterruptedException { counter = (Long.valueOf(taskId)) << 13; if (counter == 0) { counter +=100; } } + protected void cleanup(Context context) throws IOException, InterruptedException{ + db.CassandraDBClose(); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java index 00877c1..c017711 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CassandraDB.java @@ -10,6 +10,7 @@ package cn.edu.neu.mitt.mrj.io.dbs; +import java.awt.print.Printable; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; @@ -23,9 +24,8 @@ import java.util.Map; import java.util.Set; -import org.apache.cassandra.cql3.QueryProcessor; -import org.apache.cassandra.cql3.UntypedResultSet; -import org.apache.cassandra.db.marshal.TupleType; +import org.apache.cassandra.cli.CliParser.rowKey_return; +import org.apache.cassandra.cql3.statements.MultiColumnRestriction.EQ; import org.apache.cassandra.exceptions.RequestExecutionException; import org.apache.cassandra.thrift.Cassandra; import org.apache.cassandra.thrift.Column; @@ -42,6 +42,7 @@ import org.apache.cassandra.thrift.UnavailableException; import org.apache.cassandra.utils.ByteBufferUtil; import org.apache.hadoop.mapreduce.Reducer.Context; +//import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.protocol.TProtocol; @@ -49,17 +50,28 @@ import org.apache.thrift.transport.TSocket; import org.apache.thrift.transport.TTransport; import org.apache.thrift.transport.TTransportException; +import org.hsqldb.ResultBase.ResultIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; +import cn.edu.neu.mitt.mrj.justification.OWLHorstJustification; +import cn.edu.neu.mitt.mrj.reasoner.Experiments; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Cluster.Builder; +import com.datastax.driver.core.ResultSet; import com.datastax.driver.core.Row; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.SimpleStatement; +import com.datastax.driver.core.SocketOptions; import com.datastax.driver.core.Statement; +//modified cassandra java 2.0.5 import com.datastax.driver.core.TupleValue; import com.datastax.driver.core.querybuilder.QueryBuilder; +//modified /** @@ -71,11 +83,12 @@ public class CassandraDB { public static final String KEYSPACE = "mrjks"; // mr.j keyspace public static final String COLUMNFAMILY_JUSTIFICATIONS = "justifications"; // mr.j keyspace public static final String COLUMNFAMILY_RESOURCES = "resources"; // mr.j keyspace - public static final String COLUMNFAMILY_RESULTS = "results"; // mr.j keyspace + public static final String COLUMNFAMILY_RESULTS = "results"; // mr.j keyspace +// public static final String COLUMNFAMILY_ALLTRIPLES = "alltriples"; public static final String COLUMN_SUB = "sub"; // mrjks.justifications.sub public static final String COLUMN_PRE = "pre"; // mrjks.justifications.pre public static final String COLUMN_OBJ = "obj"; // mrjks.justifications.obj - public static final String COLUMN_TRIPLE_TYPE = "tripletype" ; // mrjks.justifications.tripletype + public static final String COLUMN_TRIPLE_TYPE = "tripletype" ; // mrjks.justifications.tripletype public static final String COLUMN_IS_LITERAL = "isliteral" ; // mrjks.justifications.isliteral public static final String COLUMN_INFERRED_STEPS = "inferredsteps" ; // mrjks.justifications.inferredsteps public static final String COLUMN_RULE = "rule"; // mrjks.justifications.rule @@ -85,16 +98,38 @@ public class CassandraDB { public static final String COLUMN_ID = "id"; // mrjks.resources.id public static final String COLUMN_LABEL = "label"; // mrjks.resources.label public static final String COLUMN_JUSTIFICATION = "justification"; //mrjks.results.justification - public static final String COLUMN_STEP = "step"; // mrjks.results.step + public static final String COLUMN_TRANSITIVE_LEVELS = "transitivelevel"; // mrjks.results.step - public static final String DEFAULT_HOST = "localhost"; + public static final String DEFAULT_HOST = cn.edu.neu.mitt.mrj.utils.Cassandraconf.host; public static final String DEFAULT_PORT = "9160"; // in version 2.1.2, cql3 port is 9042 - public static final String CQL_PAGE_ROW_SIZE = "10"; //3 + public static final String CQL_PAGE_ROW_SIZE = "10000"; //3 modified by liyang + + // Added by WuGang 20160203 + public static Set domainSchemaTriples = null; + public static Set rangeSchemaTriples = null; + public static Set memberProperties = null; + public static Set resourceSubclasses = null; + public static Set literalSubclasses = null; + public static Set schemaFunctionalProperties = null; + public static Set schemaInverseFunctionalProperties = null; + public static Set schemaSymmetricProperties = null; + public static Set schemaInverseOfProperties = null; + public static Set schemaTransitiveProperties = null; + public static Set subclassSchemaTriples = null; + public static Set subpropSchemaTriples = null; + public static Set hasValue = null; + public static Set hasValueInverted = null; + public static Set onProperty = null; + public static Set onPropertyInverted = null; + + public static Map> subclassSchemaTriplesMap = null; + public static Map> domainSchemaTriplesMap = null; + public static Map> rangeSchemaTriplesMap = null; + public static Map> subpropSchemaTriplesMap = null; - // 2014-12-11, Very strange, this works around. - public static final String CONFIG_LOCATION = "file:///home/gibeon/Software/apache-cassandra-2.1.2/conf/cassandra.yaml"; + public static final String CONFIG_LOCATION = cn.edu.neu.mitt.mrj.utils.Cassandraconf.CassandraConfFile; public static void setConfigLocation(){ setConfigLocation(CONFIG_LOCATION); } @@ -108,17 +143,36 @@ private static Cassandra.Iface createConnection() throws TTransportException{ if (System.getProperty("cassandra.host") == null || System.getProperty("cassandra.port") == null){ logger.warn("cassandra.host or cassandra.port is not defined, using default"); } + System.out.println("Port : " + System.getProperty("cassandra.port", DEFAULT_PORT)); return createConnection(System.getProperty("cassandra.host", DEFAULT_HOST), Integer.valueOf(System.getProperty("cassandra.port", DEFAULT_PORT))); } + + + private static TSocket socket = null; + private static TTransport trans = null; + private static Cassandra.Client c1 = null; private static Cassandra.Client createConnection(String host, Integer port) throws TTransportException { - TSocket socket = new TSocket(host, port); - TTransport trans = new TFramedTransport(socket); + if (c1 != null) { + return c1; + } + socket = new TSocket(host, port); + trans = new TFramedTransport(socket); trans.open(); TProtocol protocol = new TBinaryProtocol(trans); - - return new Cassandra.Client(protocol); + + c1 = new Cassandra.Client(protocol); + //Modified 2015/5/25 + return c1; + } + + private static void close(){ + if(trans != null) + trans.close(); + if(socket != null) + socket.close(); + return; } @@ -135,12 +189,12 @@ private static void setupKeyspace(Cassandra.Iface client) } catch(NotFoundException e){ logger.info("set up keyspace " + KEYSPACE); String query = "CREATE KEYSPACE " + KEYSPACE + - " WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 1}"; + " WITH replication = {'class': 'SimpleStrategy', 'replication_factor' : 2}"; - client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ANY); String verifyQuery = "select count(*) from system.peers"; - CqlResult result = client.execute_cql3_query(ByteBufferUtil.bytes(verifyQuery), Compression.NONE, ConsistencyLevel.ONE); + CqlResult result = client.execute_cql3_query(ByteBufferUtil.bytes(verifyQuery), Compression.NONE, ConsistencyLevel.ANY); long magnitude = ByteBufferUtil.toLong(result.rows.get(0).columns.get(0).value); try { @@ -151,12 +205,110 @@ private static void setupKeyspace(Cassandra.Iface client) } } + public static String getJustificationsSchema(){ + String schemaString = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + " ( " + + COLUMN_SUB + " bigint, " + // partition key + COLUMN_PRE + " bigint, " + // partition key + COLUMN_OBJ + " bigint, " + // partition key + COLUMN_IS_LITERAL + " boolean, " + // partition key + COLUMN_TRIPLE_TYPE + " int, " + + COLUMN_RULE + " int, " + + COLUMN_V1 + " bigint, " + + COLUMN_V2 + " bigint, " + + COLUMN_V3 + " bigint, " + + COLUMN_INFERRED_STEPS + " int, " + + COLUMN_TRANSITIVE_LEVELS + " int, " + + " PRIMARY KEY ((" + COLUMN_IS_LITERAL + ", " + COLUMN_RULE + ", " + COLUMN_SUB + "), " + COLUMN_TRIPLE_TYPE + + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + //", " + COLUMN_TRIPLE_TYPE + + " ) ) "; + return schemaString; + } + + /* + * ?? + */ + public static String getJustificationseStatement(){ + return ("INSERT INTO " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " (isliteral, rule, sub, tripletype, pre, obj, v1, v2, v3, inferredsteps, transitivelevel) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )"); + } + + +// public static String getAlltripleSchema(){ +// String ALLTRIPLE_SCHEMA = "CREATE TABLE " + KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + +// " ( " + +// COLUMN_SUB + " bigint, " + // partition key +// COLUMN_PRE + " bigint, " + // partition key +// COLUMN_OBJ + " bigint, " + // partition key +// COLUMN_IS_LITERAL + " boolean, " + // partition key +// COLUMN_TRIPLE_TYPE + " int, " + +// COLUMN_INFERRED_STEPS + " int, " + +// "PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + "," + COLUMN_OBJ + +// ")) ) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; +// return ALLTRIPLE_SCHEMA; +// } + + /* + public static String getStepsSchema(Integer step){ + String STEPS_SCHEMA = "CREATE TABLE " + CassandraDB.KEYSPACE + ".step" + step + + " ( " + + COLUMN_SUB + " bigint, " + + COLUMN_PRE + " bigint, " + + COLUMN_OBJ + " bigint, " + + COLUMN_RULE + " int, " + + COLUMN_V1 + " bigint, " + + COLUMN_V2 + " bigint, " + + COLUMN_V3 + " bigint, " + + COLUMN_TRANSITIVE_LEVELS + " int, " + + "PRIMARY KEY((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_RULE + + "), " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + ")) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; + return STEPS_SCHEMA; + } + + public static String getStepsSchema(String cfName){ + String STEPS_SCHEMA = "CREATE TABLE " + CassandraDB.KEYSPACE + "." + cfName + + " ( " + + COLUMN_SUB + " bigint, " + + COLUMN_PRE + " bigint, " + + COLUMN_OBJ + " bigint, " + + COLUMN_RULE + " int, " + + COLUMN_V1 + " bigint, " + + COLUMN_V2 + " bigint, " + + COLUMN_V3 + " bigint, " + + COLUMN_TRANSITIVE_LEVELS + " int, " + + "PRIMARY KEY((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_RULE + + "), " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + ")) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; + return STEPS_SCHEMA; + } + + public static String getStepsStatement(int step){ + String query = "INSERT INTO " + CassandraDB.KEYSPACE + ".step" + step + + " (sub, pre, obj, rule, v1, v2, v3, transitivelevel) VALUES(?, ?, ?, ?, ?, ?, ?, ?)"; + return query; + } + + public static String getStepsStatement(String cfName){ + String query = "INSERT INTO " + CassandraDB.KEYSPACE + "." + cfName + + " (sub, pre, obj, rule, v1, v2, v3, transitivelevel) VALUES(?, ?, ?, ?, ?, ?, ?, ?)"; + return query; + } + + public static String getAlltripleStatement(){ + return ("INSERT INTO " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + + " (sub, pre, obj, isliteral, tripletype, inferredsteps) VALUES(?, ?, ?, ?, ?, ?)"); + } + */ + private static void setupTables(Cassandra.Iface client) throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { + // Create justifications table String query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + @@ -171,9 +323,10 @@ private static void setupTables(Cassandra.Iface client) COLUMN_V2 + " bigint, " + COLUMN_V3 + " bigint, " + // COLUMN_TRIPLE_TYPE + " int, " + - COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key - " PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_IS_LITERAL + "), " + - COLUMN_TRIPLE_TYPE + ", " + COLUMN_RULE + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + COLUMN_INFERRED_STEPS + " int, " + // from this line, the fields are non-primary key + COLUMN_TRANSITIVE_LEVELS + " int, " + + " PRIMARY KEY ((" + COLUMN_IS_LITERAL + ", " + COLUMN_RULE + ", " + COLUMN_SUB + "), " + COLUMN_TRIPLE_TYPE + + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + //", " + COLUMN_TRIPLE_TYPE + " ) ) "; @@ -184,6 +337,7 @@ private static void setupTables(Cassandra.Iface client) logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS, e); } + // Create resources table query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_RESOURCES + " ( " + @@ -199,12 +353,13 @@ private static void setupTables(Cassandra.Iface client) logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_RESOURCES, e); } + // Create results table query = "CREATE TABLE " + KEYSPACE + "." + COLUMNFAMILY_RESULTS + " ( " + - COLUMN_ID + " uuid, " + + "id" + " int, " + COLUMN_JUSTIFICATION + " set>>, " + - " PRIMARY KEY (" + COLUMN_ID + ") ) "; + " PRIMARY KEY (" + "id" + ") ) "; try { logger.info("set up table " + COLUMNFAMILY_RESULTS); @@ -213,7 +368,67 @@ private static void setupTables(Cassandra.Iface client) catch (InvalidRequestException e) { logger.error("failed to create table " + KEYSPACE + "." + COLUMNFAMILY_RESULTS, e); } + + + //Create resultrow table + String cquery = "CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + "." + "resultrows" + + " ( " + + CassandraDB.COLUMN_IS_LITERAL + " boolean, " + // partition key + CassandraDB.COLUMN_RULE + " int, " + + CassandraDB.COLUMN_SUB + " bigint, " + // partition key + CassandraDB.COLUMN_TRIPLE_TYPE + " int, " + + CassandraDB.COLUMN_PRE + " bigint, " + // partition key + CassandraDB.COLUMN_OBJ + " bigint, " + // partition key + CassandraDB.COLUMN_V1 + " bigint, " + + CassandraDB.COLUMN_V2 + " bigint, " + + CassandraDB.COLUMN_V3 + " bigint, " + +// COLUMN_TRIPLE_TYPE + " int, " + + CassandraDB.COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key + CassandraDB.COLUMN_TRANSITIVE_LEVELS + " int, " + + " PRIMARY KEY ((" + CassandraDB.COLUMN_IS_LITERAL + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_SUB + "), " + + CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 + + //", " + COLUMN_TRIPLE_TYPE + + " ) ) "; + client.execute_cql3_query(ByteBufferUtil.bytes(cquery), Compression.NONE, ConsistencyLevel.ONE); + + /* + * 建立索引可能失败 + */ + +// String indexQuery = "CREATE INDEX on resultrows (sub) ;"; +// CqlPreparedResult indexPreparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(indexQuery), Compression.NONE); +// indexQuery = "CREATE INDEX on resultrows (obj) ;"; +// indexPreparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(indexQuery), Compression.NONE); +// indexQuery = "CREATE INDEX on resultrows (pre) ;"; +// indexPreparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(indexQuery), Compression.NONE); +// indexQuery = "CREATE INDEX on resultrows (isliteral) ;"; +// indexPreparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(indexQuery), Compression.NONE); + + + /* + //创建所有三元组的表 + cquery = "CREATE TABLE " + KEYSPACE + "." + CassandraDB.COLUMNFAMILY_ALLTRIPLES + + " ( " + + COLUMN_SUB + " bigint, " + // partition key + COLUMN_PRE + " bigint, " + // partition key + COLUMN_OBJ + " bigint, " + // partition key + COLUMN_IS_LITERAL + " boolean, " + // partition key + COLUMN_TRIPLE_TYPE + " int, " + + COLUMN_INFERRED_STEPS + " int, " + + "PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + "," + COLUMN_OBJ + + ")) ) WITH compaction = {'class': 'LeveledCompactionStrategy'}"; + + try { + logger.info("set up table " + "all triples"); + client.execute_cql3_query(ByteBufferUtil.bytes(cquery), Compression.NONE, ConsistencyLevel.ONE); + } catch (InvalidRequestException e) { + logger.error("failed to create table " + KEYSPACE + "." + "AllTriples", e); + } + + query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_ALLTRIPLES + "(" + COLUMN_TRIPLE_TYPE + ")"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + */ } @@ -223,20 +438,59 @@ public CassandraDB() throws TTransportException { } - public CassandraDB(String host, Integer port) throws TTransportException { - client = createConnection(host, port); + public void CassandraDBClose(){ + this.close(); } public void init() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ setupKeyspace(client); client.set_keyspace(KEYSPACE); setupTables(client); + + createIndexOnTripleType(); + createIndexOnresultrows(); + } public Cassandra.Iface getDBClient(){ return client; } + + /** + * Get the row count according to the COLUMN_INFERRED_STEPS. + * @return row count. + */ + + /* + * Need to change + */ + + public long getRowCountAccordingInferredSteps(int level){ + //ALLOW FILTERING + String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + " WHERE " + COLUMN_INFERRED_STEPS + " = " + level + " ALLOW FILTERING"; + + long num = 0; + try { + CqlResult cqlresult = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + num = ByteBufferUtil.toLong(cqlresult.getRows().get(0).getColumns().get(0).value); + } catch (InvalidRequestException e) { + e.printStackTrace(); + } catch (UnavailableException e) { + e.printStackTrace(); + } catch (TimedOutException e) { + e.printStackTrace(); + } catch (SchemaDisagreementException e) { + e.printStackTrace(); + } catch (TException e) { + e.printStackTrace(); + } + + return num; + } + + //TriplesUtils.SYNONYMS_TABLE //TriplesUtils.TRANSITIVE_TRIPLE //TriplesUtils.DATA_TRIPLE_SAME_AS @@ -245,9 +499,11 @@ public Cassandra.Iface getDBClient(){ * @return row count. */ public long getRowCountAccordingTripleType(int tripletype){ + //ALLOW FILTERING + String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + - " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype; - + " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " ALLOW FILTERING"; +// System.out.println(query); long num = 0; try { CqlResult cqlresult = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); @@ -267,14 +523,51 @@ public long getRowCountAccordingTripleType(int tripletype){ return num; } + + /** + * Get the row count according to the triple type. + * @return row count. + */ + public long getRowCountAccordingTripleTypeWithLimitation(int tripletype, int limit){ + //ALLOW FILTERING + String query = ""; + if (limit <= 0) + query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " ALLOW FILTERING"; + else + query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + + " WHERE " + COLUMN_TRIPLE_TYPE + " = " + tripletype + " LIMIT " + limit + " ALLOW FILTERING "; + + long num = 0; + try { + CqlResult cqlresult = client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + num = ByteBufferUtil.toLong(cqlresult.getRows().get(0).getColumns().get(0).value); + } catch (InvalidRequestException e) { + e.printStackTrace(); + } catch (UnavailableException e) { + e.printStackTrace(); + } catch (TimedOutException e) { + e.printStackTrace(); + } catch (SchemaDisagreementException e) { + e.printStackTrace(); + } catch (TException e) { + e.printStackTrace(); + } + + return num; + } + /** * Get the row count according to the type of rule. * @return row count. */ + //modified + /* public long getRowCountAccordingRule(int rule){ String query = "SELECT COUNT(*) FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + " WHERE " + COLUMN_RULE + " = " + rule + " ALLOW FILTERING"; // must use ALLOW FILTERING + //modified long num = 0; try { @@ -294,7 +587,7 @@ public long getRowCountAccordingRule(int rule){ return num; } - +*/ public void insertResources(long id, String label) throws InvalidRequestException, TException{ String query = "INSERT INTO " + COLUMNFAMILY_RESOURCES + @@ -305,7 +598,7 @@ public void insertResources(long id, String label) throws InvalidRequestExceptio args.add(ByteBufferUtil.bytes(label)); CqlPreparedResult p_result = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); CqlResult result = client.execute_prepared_cql3_query(p_result.itemId, args, ConsistencyLevel.ANY); - logger.info("Number of results: " + result.getNum()); + //logger.info("Number of results: " + result.getNum()); } // TODO it's wrong!!!!!!!!!! @@ -327,10 +620,10 @@ public static Triple readJustificationFromMapReduceRow(Row row){ long pre = row.getLong(CassandraDB.COLUMN_PRE); long obj = row.getLong(CassandraDB.COLUMN_OBJ); boolean isObjectLiteral = row.getBool(CassandraDB.COLUMN_IS_LITERAL); - long v1 = row.getLong(CassandraDB.COLUMN_V1); - long v2 = row.getLong(CassandraDB.COLUMN_V2); - long v3 = row.getLong(CassandraDB.COLUMN_V3); - int rule = row.getInt(CassandraDB.COLUMN_RULE); + long v1 = -1; + long v2 = -2; + long v3 = -3; + int rule = -4; result.setObject(obj); result.setObjectLiteral(isObjectLiteral); @@ -348,7 +641,193 @@ public static int readStepFromMapReduceRow(Row row){ return step; } + /* + public static void writeJustificationToMapReduceMultipleOutputsLessObjects( + Triple triple, + TripleSource source, + MultipleOutputs output, + Map keys, + Map allkeys, + List stepsValues, + List allTValues, + String stepname) throws IOException, InterruptedException{ + + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long + + allkeys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + allkeys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + allkeys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + + allTValues.add(ByteBufferUtil.bytes(triple.getSubject())); + allTValues.add(ByteBufferUtil.bytes(triple.getPredicate())); + allTValues.add(ByteBufferUtil.bytes(triple.getObject())); + //用数字直接替代。 + allTValues.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{1}):ByteBuffer.wrap(new byte[]{0})); + allTValues.add(ByteBufferUtil.bytes( + TriplesUtils.getTripleType( + source, triple.getSubject(), + triple.getPredicate(), + triple.getObject()))); + allTValues.add(ByteBufferUtil.bytes((int)source.getStep())); + + stepsValues.add(ByteBufferUtil.bytes(triple.getSubject())); + stepsValues.add(ByteBufferUtil.bytes(triple.getPredicate())); + stepsValues.add(ByteBufferUtil.bytes(triple.getObject())); + stepsValues.add(ByteBufferUtil.bytes((int)triple.getType())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRsubject())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRpredicate())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRobject())); + stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel())); + + output.write(CassandraDB.COLUMNFAMILY_ALLTRIPLES, null, allTValues); + output.write(stepname, null, stepsValues); + + + keys.clear(); + allkeys.clear(); + allTValues.clear(); + stepsValues.clear(); + + } + */ + + /* + public static void writeJustificationToMapReduceMultipleOutputs( + Triple triple, + TripleSource source, + MultipleOutputs output, + String stepname) throws IOException, InterruptedException{ + Map keys = new LinkedHashMap(); + Map allkeys = new LinkedHashMap(); + List allvariables = new ArrayList(); +// long time = System.currentTimeMillis(); + + byte one = 1; + byte zero = 0; + // Prepare composite key (sub, pre, obj) + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + // the length of boolean type in cassandra is one byte!!!!!!!! + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long + + allkeys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + allkeys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + allkeys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + + allvariables.add(ByteBufferUtil.bytes(source.getStep())); + allvariables.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + allvariables.add(ByteBufferUtil.bytes((int)triple.getType())); + + // Prepare variables + List variables = new ArrayList(); +// variables.add(ByteBufferUtil.bytes(oValue.getSubject())); + // the length of boolean type in cassandra is one byte!!!!!!!! + // For column inferred, init it as false i.e. zero + //variables.add(ByteBuffer.wrap(new byte[]{zero})); + + variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); + + + + // Keys are not used for + // CqlBulkRecordWriter.write(Object key, List values), + // so it can be set to null. + // Only values are used there where the value correspond to + // the insert statement set in CqlBulkOutputFormat.setColumnFamilyInsertStatement() + // All triples columnfamily: + // sub, pre, obj, isliteral, tripletype, inferredsteps + // Steps columnfamily: + // sub, pre, obj, rule, v1, v2, v3, transitivelevel + + List allTValues = new ArrayList(); + allTValues.add(ByteBufferUtil.bytes(triple.getSubject())); + allTValues.add(ByteBufferUtil.bytes(triple.getPredicate())); + allTValues.add(ByteBufferUtil.bytes(triple.getObject())); + allTValues.add(triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + allTValues.add(ByteBufferUtil.bytes( + TriplesUtils.getTripleType( + source, triple.getSubject(), + triple.getPredicate(), + triple.getObject()))); + allTValues.add(ByteBufferUtil.bytes((int)source.getStep())); + + List stepsValues = new ArrayList(); + stepsValues.add(ByteBufferUtil.bytes(triple.getSubject())); + stepsValues.add(ByteBufferUtil.bytes(triple.getPredicate())); + stepsValues.add(ByteBufferUtil.bytes(triple.getObject())); + stepsValues.add(ByteBufferUtil.bytes((int)triple.getType())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRsubject())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRpredicate())); + stepsValues.add(ByteBufferUtil.bytes(triple.getRobject())); + stepsValues.add(ByteBufferUtil.bytes((int)source.getTransitiveLevel())); + +// time = System.currentTimeMillis(); + output.write(CassandraDB.COLUMNFAMILY_ALLTRIPLES, null, allTValues); +// System.out.println("wrote all " + (System.currentTimeMillis() - time)); +// System.out.println("write all " + (System.currentTimeMillis() - time));// _output.write(stepname, keys, variables); +// time = System.currentTimeMillis(); + output.write(stepname, null, stepsValues); +// System.out.println("wrote steps" + (System.currentTimeMillis() - time)); + + + } + */ +/* public static void writeJustificationToMapReduceContext( + Triple triple, + TripleSource source, + Context context, + String stepname) throws IOException, InterruptedException{ + Map keys = new LinkedHashMap(); + Map allkeys = new LinkedHashMap(); + List allvariables = new ArrayList(); + long time = System.currentTimeMillis(); + + byte one = 1; + byte zero = 0; + + // Prepare composite key (sub, pre, obj) + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); + // the length of boolean type in cassandra is one byte!!!!!!!! + keys.put(CassandraDB.COLUMN_IS_LITERAL, + triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); + int tripletype = TriplesUtils.DATA_TRIPLE; + if (triple.getType()==TriplesUtils.OWL_HORST_SYNONYMS_TABLE){ + tripletype = TriplesUtils.SYNONYMS_TABLE; // In this way, we can provide a special triple type for triples in synonyms table + }else{ + tripletype = TriplesUtils.getTripleType(source, triple.getSubject(), triple.getPredicate(), triple.getObject()); + } + keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(tripletype)); // Modified by WuGang 20150109 + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long + + // Prepare variables + List variables = new ArrayList(); +// variables.add(ByteBufferUtil.bytes(oValue.getSubject())); + // the length of boolean type in cassandra is one byte!!!!!!!! + // For column inferred, init it as false i.e. zero +// variables.add(ByteBuffer.wrap(new byte[]{zero})); + variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple + variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); // It corresponds to COLUMN_TRANSITIVE_LEVEL, only useful in owl's transitive + context.write(keys, variables); + } +*/ + /* + public static void writealltripleToMapReduceContext( Triple triple, TripleSource source, Context context) throws IOException, InterruptedException{ @@ -383,8 +862,73 @@ public static void writeJustificationToMapReduceContext( // For column inferred, init it as false i.e. zero // variables.add(ByteBuffer.wrap(new byte[]{zero})); variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple + variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); // It corresponds to COLUMN_TRANSITIVE_LEVEL, only useful in owl's transitive context.write(keys, variables); + } + */ + + public static void writeJustificationToMapReduceContext( + Triple triple, + TripleSource source, + Context context) throws IOException, InterruptedException{ + Map keys = new LinkedHashMap(); + + byte one = 1; + byte zero = 0; + + // Prepare composite key (sub, pre, obj) +// keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(triple.getSubject())); +// keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(triple.getPredicate())); +// keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(triple.getObject())); +// // the length of boolean type in cassandra is one byte!!!!!!!! +// keys.put(CassandraDB.COLUMN_IS_LITERAL, +// triple.isObjectLiteral()?ByteBuffer.wrap(new byte[]{one}):ByteBuffer.wrap(new byte[]{zero})); +// int tripletype = TriplesUtils.DATA_TRIPLE; +// if (triple.getType()==TriplesUtils.OWL_HORST_SYNONYMS_TABLE){ +// tripletype = TriplesUtils.SYNONYMS_TABLE; // In this way, we can provide a special triple type for triples in synonyms table +// }else{ +// tripletype = TriplesUtils.getTripleType(source, triple.getSubject(), triple.getPredicate(), triple.getObject()); +// } +// keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(tripletype)); // Modified by WuGang 20150109 +// keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes((int)triple.getType())); // int +// keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(triple.getRsubject())); // long +// keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(triple.getRpredicate())); // long +// keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(triple.getRobject())); // long + + // Prepare variables + List variables = new ArrayList(); +// variables.add(ByteBufferUtil.bytes(oValue.getSubject())); + // the length of boolean type in cassandra is one byte!!!!!!!! + // For column inferred, init it as false i.e. zero +// variables.add(ByteBuffer.wrap(new byte[]{zero})); + + int tripletype = TriplesUtils.DATA_TRIPLE; + if (triple.getType() == TriplesUtils.OWL_HORST_SYNONYMS_TABLE) { + tripletype = TriplesUtils.SYNONYMS_TABLE; + } else { + tripletype = TriplesUtils.getTripleType(source, + triple.getSubject(), triple.getPredicate(), + triple.getObject()); + } + + + variables.add(triple.isObjectLiteral() ? ByteBuffer + .wrap(new byte[] { one }) : ByteBuffer + .wrap(new byte[] { zero })); + variables.add(ByteBufferUtil.bytes((int) triple.getType())); + variables.add(ByteBufferUtil.bytes(triple.getSubject())); + + variables.add(ByteBufferUtil.bytes(tripletype)); + variables.add(ByteBufferUtil.bytes(triple.getPredicate())); + variables.add(ByteBufferUtil.bytes(triple.getObject())); + variables.add(ByteBufferUtil.bytes(triple.getRsubject())); + variables.add(ByteBufferUtil.bytes(triple.getRpredicate())); + variables.add(ByteBufferUtil.bytes(triple.getRobject())); + + variables.add(ByteBufferUtil.bytes(source.getStep())); // It corresponds to COLUMN_INFERRED_STEPS where steps = 0 means an original triple + variables.add(ByteBufferUtil.bytes(source.getTransitiveLevel())); + context.write(null, variables); } public boolean loadSetIntoMemory(Set schemaTriples, Set filters, int previousStep) throws IOException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { @@ -409,11 +953,15 @@ public static Set> getJustifications() throws InvalidRequestExce // String query = "SELECT " + COLUMN_JUSTIFICATION + " FROM " + KEYSPACE + "." + COLUMNFAMILY_RESULTS; SimpleClientDataStax scds = new SimpleClientDataStax(); scds.connect(DEFAULT_HOST); - Statement statement = QueryBuilder.select().all().from(KEYSPACE, COLUMNFAMILY_RESULTS); + + //Modified 2015-6-25 + //From COLUMNFAMILY_RESULTS to justifications ??\\ + Statement statement = QueryBuilder.select().all().from(KEYSPACE, "results").where(QueryBuilder.eq("id", OWLHorstJustification.id)); List rows = scds.getSession().execute(statement).all(); for (Row row : rows){ - Set testResult = row.getSet(COLUMN_JUSTIFICATION, TupleValue.class); + //modified + Set testResult = row.getSet("justification", TupleValue.class); Set> toBeDeletedFromResults = new HashSet>(); // Perform delete these from the results boolean beAdded = true; for (Set currentResult : results){ @@ -426,7 +974,7 @@ else if (currentResult.containsAll(testResult)){ toBeDeletedFromResults.add(currentResult); } } - if (beAdded) // The testResult is a candidate justification + if (beAdded) // The testResul2.5 getTracingEntries(Triple triple) throws InvalidRequestExcepti byte zero = 0; Set tracingEntries = new HashSet(); - String query = "SELECT * FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + " WHERE " + - COLUMN_SUB + "=? AND " + COLUMN_PRE + "=? AND " + COLUMN_OBJ + "=? AND " + COLUMN_IS_LITERAL + "=?"; + //Fixed 2016/4/13 + + String query = "SELECT * FROM " + KEYSPACE + "." + "resultrows" + " WHERE " + + COLUMN_SUB + "=? AND " + COLUMN_PRE + "=? AND " + COLUMN_OBJ + "=? AND " + COLUMN_IS_LITERAL + "=? ALLOW FILTERING"; CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); List list = new ArrayList(); list.add(ByteBufferUtil.bytes(triple.getSubject())); @@ -507,20 +1057,32 @@ public boolean loadSetIntoMemory( logger.info("In CassandraDB's loadSetIntoMemory"); // Require an index created on COLUMN_TRIPLE_TYPE column - String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS + + /* + * Be Attention + * add ALLOW FILTERING + * 2015/6/12 + */ + + + String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS + " FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + - " WHERE " + COLUMN_TRIPLE_TYPE + " = ? "; -// System.out.println(query); + " WHERE " + COLUMN_TRIPLE_TYPE + " = ? ALLOW FILTERING"; + System.out.println(query); CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); for (int filter : filters){ List list = new ArrayList(); list.add(ByteBufferUtil.bytes(filter)); +// System.out.println("filter " + filter); CqlResult result = client.execute_prepared_cql3_query(preparedResult.itemId, list, ConsistencyLevel.ONE); - for(CqlRow row : result.rows){ + Iterator it =result.getRowsIterator(); + while(it.hasNext() ){ + CqlRow row = it.next(); +// for(CqlRow row : result.rows){ Iterator columnsIt = row.getColumnsIterator(); Long sub = null, obj = null; +// System.out.println("row : " + row); while (columnsIt.hasNext()) { Column column = columnsIt.next(); if (new String(column.getName()).equals(COLUMN_SUB)) @@ -534,9 +1096,11 @@ public boolean loadSetIntoMemory( } } if (!inverted) - schemaTriples.add(sub); + schemaTriples.add(sub); else schemaTriples.add(obj); + + System.out.println("schema : " + schemaTriples); } } @@ -550,7 +1114,12 @@ public Map> loadMapIntoMemory(Set filters) throw return loadMapIntoMemory(filters, false); } - // ���ص�key����triple��subject��value��object + // ���ص�key����triple��subject��value��object + /* + * Be Attention + * add ALLOW FILTERING + * 2015/6/12 + */ public Map> loadMapIntoMemory(Set filters, boolean inverted) throws IOException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { long startTime = System.currentTimeMillis(); @@ -561,7 +1130,7 @@ public Map> loadMapIntoMemory(Set filters, boole // Require an index created on COLUMN_TRIPLE_TYPE column String query = "SELECT " + COLUMN_SUB + ", " + COLUMN_OBJ + ", " + COLUMN_INFERRED_STEPS + " FROM " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + - " WHERE " + COLUMN_TRIPLE_TYPE + " = ? "; + " WHERE " + COLUMN_TRIPLE_TYPE + " = ? ALLOW FILTERING"; //partitonkey CqlPreparedResult preparedResult = client.prepare_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE); @@ -599,7 +1168,7 @@ public Map> loadMapIntoMemory(Set filters, boole } } - logger.debug("Time for CassandraDB's loadMapIntoMemory " + (System.currentTimeMillis() - startTime)); + logger.info("Time for CassandraDB's loadMapIntoMemory " + (System.currentTimeMillis() - startTime)); return schemaTriples; } @@ -611,26 +1180,246 @@ public void createIndexOnTripleType() throws InvalidRequestException, Unavailabl client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); } - public void createIndexOnRule() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ - String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_RULE + ")"; + public void createIndexOnInferredSteps() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_INFERRED_STEPS + ")"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } + + public void createIndexOnresultrows() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + + String query = "CREATE INDEX on resultrows (sub) ;"; client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + query = "CREATE INDEX on resultrows (obj) ;"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + query = "CREATE INDEX on resultrows (pre) ;"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + query = "CREATE INDEX on resultrows (isliteral) ;"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } +// public void createIndexOnRule() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ +// String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_RULE + ")"; +// client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); +// } +// +// +// public void createIndexOnTransitiveLevel() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ +// String query = "CREATE INDEX ON " + KEYSPACE + "." + COLUMNFAMILY_JUSTIFICATIONS + "(" + COLUMN_TRANSITIVE_LEVELS + ")"; +// client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); +// } + + /* + + public void Index() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + //createIndexOnInferredSteps(); + createIndexOnRule(); + createIndexOnTransitiveLevel(); + createIndexOnTripleType(); + System.out.println("IndexED"); + } + + public void DropTripleTypeIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "DROP INDEX mrjks.justifications_tripletype_idx"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } + + public void DropRuleIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "DROP INDEX mrjks.justifications_rule_idx"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } + + public void DropInferredStepsIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "DROP INDEX mrjks.justifications_inferredSteps_idx"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } + + public void DropTransitiveLevelIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + String query = "DROP INDEX mrjks.justifications_transitiveLevel_idx"; + client.execute_cql3_query(ByteBufferUtil.bytes(query), Compression.NONE, ConsistencyLevel.ONE); + } + + public void UnIndex() throws InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException{ + + this.DropInferredStepsIndex(); + this.DropRuleIndex(); + this.DropTransitiveLevelIndex(); + this.DropTripleTypeIndex(); + } + */ + // Added by WuGang 2015-06-08 + + public static ResultSet getRows(){ + Builder builder = Cluster.builder(); + builder.addContactPoint(DEFAULT_HOST); + SocketOptions socketoptions= new SocketOptions().setKeepAlive(true).setReadTimeoutMillis(10 * 10000).setConnectTimeoutMillis(5 * 10000); + Cluster clu = builder.build(); + Session session = clu.connect(); + SimpleStatement statement = new SimpleStatement("SELECT sub, obj, pre, isliteral FROM mrjks.justifications where inferredsteps = 0"); + statement.setFetchSize(100); + ResultSet results = session.execute(statement); + System.out.println("------------------" + results + "--------------"); + return results; + } + + public static boolean delornot = false; +/* + public static void removeOriginalTriples(){ + if (delornot == true) + return; + delornot = true; + //ִ�в�Ӧ�жϡ� + Builder builder = Cluster.builder(); + builder.addContactPoint(DEFAULT_HOST); + SocketOptions socketoptions= new SocketOptions().setKeepAlive(true).setReadTimeoutMillis(10 * 10000).setConnectTimeoutMillis(5 * 10000); + Cluster clu = builder.build(); + Session session = clu.connect(); + + String cquery1 = "CREATE TABLE IF NOT EXISTS " + KEYSPACE + "." + "ruleiszero" + + " ( " + + COLUMN_SUB + " bigint, " + // partition key + COLUMN_PRE + " bigint, " + // partition key + COLUMN_OBJ + " bigint, " + // partition key + COLUMN_IS_LITERAL + " boolean, " + // partition key + COLUMN_TRIPLE_TYPE + " int, " + + COLUMN_RULE + " int, " + + COLUMN_V1 + " bigint, " + + COLUMN_V2 + " bigint, " + + COLUMN_V3 + " bigint, " + +// COLUMN_TRIPLE_TYPE + " int, " + + COLUMN_INFERRED_STEPS + " int, " + // from this line is non-primary key + COLUMN_TRANSITIVE_LEVELS + " int, " + + " PRIMARY KEY ((" + COLUMN_SUB + ", " + COLUMN_PRE + ", " + COLUMN_OBJ + ", " + COLUMN_IS_LITERAL + "), " + + COLUMN_TRIPLE_TYPE + ", " + COLUMN_RULE + ", " + COLUMN_V1 + ", " + COLUMN_V2 + ", " + COLUMN_V3 + + //", " + COLUMN_TRIPLE_TYPE + + " ) ) "; + session.execute(cquery1); + + //SELECT ALL AND DEL ALL + SimpleStatement statement = new SimpleStatement("SELECT * FROM mrjks.justifications"); + statement.setFetchSize(100); + ResultSet results = session.execute(statement); + + System.out.println("--------DEL ALL----------"); + for (Row row : results){ + + if(row.getInt(COLUMN_RULE) != 0){ + session.execute("INSERT INTO mrjks.ruleiszero(sub, pre, obj, isliteral, tripletype, rule, v1, v2, v3, inferredsteps)" + + "VALUES (" + + row.getLong(COLUMN_SUB) + "," + + row.getLong(COLUMN_PRE) + "," + + row.getLong(COLUMN_OBJ) + "," + + row.getBool(COLUMN_IS_LITERAL) + "," + + row.getInt(COLUMN_TRIPLE_TYPE) + "," + + row.getInt(COLUMN_RULE) + "," + + row.getLong(COLUMN_V1) + "," + + row.getLong(COLUMN_V2) + "," + + row.getLong(COLUMN_V3) + "," + + row.getInt(COLUMN_INFERRED_STEPS) + ");"); + System.out.println("-------Insert ----------"); + System.out.println(row); + } + + Statement delete = QueryBuilder.delete() + .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS) + .where(QueryBuilder.eq(COLUMN_SUB, row.getLong(CassandraDB.COLUMN_SUB))) + .and(QueryBuilder.eq(COLUMN_PRE, row.getLong(CassandraDB.COLUMN_PRE))) + .and(QueryBuilder.eq(COLUMN_OBJ, row.getLong(CassandraDB.COLUMN_OBJ))) + .and(QueryBuilder.eq(COLUMN_IS_LITERAL, row.getBool(COLUMN_IS_LITERAL))); + session.execute(delete); + System.out.println(row); + } + */ +// SimpleClientDataStax scds = new SimpleClientDataStax(); +// scds.connect(DEFAULT_HOST); +// +// System.out.println("Select Primary Key"); +// //modified select partition key and delete using partition key +// Statement select = QueryBuilder.select() +// .all() +// .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS) +// .where(QueryBuilder.eq(COLUMN_INFERRED_STEPS, 0)); +// select.setFetchSize(100); +// ResultSet result = scds.getSession().execute(select); +// //List rows = scds.getSession().executeAsync(statement); +// //List rows = scds.getSession().execute(select).all(); +// +// while(true){ +// Row delrow = result.one(); +// if(delrow == null) +// break; +// Where dQuery = QueryBuilder.delete() +// .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS) +// .where(QueryBuilder.eq(COLUMN_SUB, delrow.getLong(CassandraDB.COLUMN_SUB))) +// .and(QueryBuilder.eq(COLUMN_PRE, delrow.getLong(CassandraDB.COLUMN_PRE))) +// .and(QueryBuilder.eq(COLUMN_OBJ, delrow.getLong(CassandraDB.COLUMN_OBJ))) +// .and(QueryBuilder.eq(COLUMN_IS_LITERAL, delrow.getBool(COLUMN_IS_LITERAL))); +// System.out.println(delrow); +// session.execute(dQuery); +// } + +// Where dQuery = QueryBuilder.delete() +// .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS) +// .where(QueryBuilder.eq(COLUMN_RULE, ByteBufferUtil.bytes(0))); +// scds.getSession().execute(dQuery); + +// scds.close(); + +// } + + //create by LiYang +// public static void createReasonTable(){ +// SimpleClientDataStax scds = new SimpleClientDataStax(); +// scds.connect(DEFAULT_HOST); +// //Statement st = QueryBuilder +// +// for (int i = 1; i <= 7; i++ ){ +// System.out.println("Select Primary Key"); +// //modified select partition key and delete using partition key +// Statement select = QueryBuilder.select() +// .all() +// .from(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS) +// .where(QueryBuilder.eq(COLUMN_INFERRED_STEPS, i)); +// select.setFetchSize(100); +// ResultSet result = scds.getSession().execute(select); +// +// Session session = scds.getSession(); +// while(true){ +// Row insertrow = result.one(); +// if(insertrow == null) +// break; +// Insert insert = QueryBuilder +// .insertInto(KEYSPACE, COLUMNFAMILY_JUSTIFICATIONS) +// .value(COLUMN_SUB, insertrow.getLong(CassandraDB.COLUMN_SUB)) +// .value(COLUMN_PRE, insertrow.getLong(CassandraDB.COLUMN_PRE)) +// .value(COLUMN_OBJ, insertrow.getLong(CassandraDB.COLUMN_OBJ)) +// .value(COLUMN_IS_LITERAL, insertrow.getBool(COLUMN_IS_LITERAL)) +// .value(COLUMN_TRIPLE_TYPE, insertrow.getLong(CassandraDB.COLUMN_TRIPLE_TYPE)) +// .value(COLUMN_SUB, insertrow.getLong(CassandraDB.COLUMN_SUB)); +// +// } +// } +// } public static void main(String[] args) { try { - CassandraDB db = new CassandraDB("localhost", 9160); - db.init(); - db.createIndexOnTripleType(); - db.createIndexOnRule(); + CassandraDB db = new CassandraDB(); + db.init(); +// db.createIndexOnTripleType(); +// db.createIndexOnRule(); +// db.createIndexOnInferredSteps(); +// db.createIndexOnTransitiveLevel(); // db.insertResources(100, "Hello World!"); Set schemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); db.loadSetIntoMemory(schemaTriples, filters, 0); + //db.loadMapIntoMemory(filters, inverted) + System.out.println(schemaTriples); + //modified 2015/5/19 System.out.println("Transitive: " + db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE)); System.exit(0); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CreateTables.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CreateTables.java new file mode 100644 index 0000000..8280f5f --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/CreateTables.java @@ -0,0 +1,66 @@ +package cn.edu.neu.mitt.mrj.io.dbs; + +import org.apache.cassandra.transport.SimpleClient; + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Host; +import com.datastax.driver.core.Metadata; +import com.datastax.driver.core.Session; + +public class CreateTables { + private Cluster cluster; + private Session session; + + public Session getSession(){ + return this.session; + } + + public void connect(String node){ + cluster = Cluster.builder() + .addContactPoint(node) + .build(); + Metadata metadata = cluster.getMetadata(); + System.out.printf("Connected to cluster: %s\n", + metadata.getClusterName()); + for(Host host : metadata.getAllHosts()){ + System.out.printf("Datatacenter: %s; Host: %s; Rack: %s|n", + host.getDatacenter(), host.getAddress(), host.getRack()); + } + session = cluster.connect(); + } + + //javaDriver21 + public void createSchema(Integer step){ + session.execute("CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + ".step" + step + + " ( " + + "sub" + " bigint, " + + "pre" + " bigint, " + + "obj" + " bigint, " + + "rule int, " + + "v1" + " bigint, " + + "v2" + " bigint, " + + "v3" + " bigint, " + + "transitivelevel int" + + ", primary key((sub, pre, obj, rule) ,v1, v2, v3 )) WITH compaction = {'class': 'LeveledCompactionStrategy'}"); + } + + public void close(){ + session.close(); + cluster.close(); + } + + public static void main(String args[]){ + CreateTables client = new CreateTables(); + client.connect(CassandraDB.DEFAULT_HOST); + for (int i = 1; i < 14; i++) { + client.createSchema(i); + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + client.close(); + } +} diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java new file mode 100644 index 0000000..9bf3734 --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/io/dbs/MrjMultioutput.java @@ -0,0 +1,146 @@ +/** + * + */ +package cn.edu.neu.mitt.mrj.io.dbs; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskInputOutputContext; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; +import org.apache.hadoop.util.ReflectionUtils; + +/** + * @author L + * + */ + +public class MrjMultioutput extends MultipleOutputs { + + private Map taskContexts = new HashMap(); + + public MrjMultioutput(TaskInputOutputContext context) { + super(context); + } + + + + //This is copied from hadoop 0.23.11 + // maybe resolve the problem of construct job redundantly + @Override + protected TaskAttemptContext getContext(String nameOutput) + throws IOException { + TaskAttemptContext taskContext = taskContexts.get(nameOutput); + + if (taskContext != null) { + return taskContext; + } + + // The following trick leverages the instantiation of a record writer via + // the job thus supporting arbitrary output formats. + Job job = new Job(context.getConfiguration()); + job.setOutputFormatClass(getNamedOutputFormatClass(context, nameOutput)); + job.setOutputKeyClass(getNamedOutputKeyClass(context, nameOutput)); + job.setOutputValueClass(getNamedOutputValueClass(context, nameOutput)); + + taskContext = new TaskAttemptContext( + job.getConfiguration(), context.getTaskAttemptID()); + + taskContexts.put(nameOutput, taskContext); + + return taskContext; + } + + + + @Override + protected synchronized RecordWriter getRecordWriter( + TaskAttemptContext taskContext, String columnFamilyName) + throws IOException, InterruptedException { + + + // look for record-writer in the cache + RecordWriter writer = recordWriters.get(columnFamilyName); + +// System.out.println("get Record Writer"); + + // If not in cache, create a new one + if (writer == null) { + // get the record writer from context output format +// FileOutputFormat.setOutputName(taskContext, baseFileName); +// System.out.println("Before ConfigHelper.setOutputColumnFamily"); +// System.out.println(ConfigHelper.getOutputColumnFamily(taskContext.getConfiguration())); + + + ConfigHelper.setOutputColumnFamily(taskContext.getConfiguration(), columnFamilyName); +// CqlConfigHelper.setOutputCql(taskContext.getConfiguration(), getCql(columnFamilyNameName)); + +// CqlBulkOutputFormat.setColumnFamilySchema( +// taskContext.getConfiguration(), +// columnFamilyName, +// getSchema(columnFamilyName)); +// +// CqlBulkOutputFormat.setColumnFamilyInsertStatement( +// taskContext.getConfiguration(), +// columnFamilyName, +// getInsertStatement(columnFamilyName)); + + + + try { +// System.out.println(taskContext.getOutputFormatClass()); + writer = ((OutputFormat) ReflectionUtils.newInstance( + taskContext.getOutputFormatClass(), taskContext.getConfiguration())) + .getRecordWriter(taskContext); + +// System.out.println(writer.getClass()); + } catch (ClassNotFoundException e) { + throw new IOException(e); + } + + // if counters are enabled, wrap the writer with context + // to increment counters + if (countersEnabled) { + writer = new MultipleOutputs.RecordWriterWithCounter(writer, columnFamilyName, context); + } + + // add the record-writer to the cache + recordWriters.put(columnFamilyName, writer); + } + return writer; + } + + + String getCql(String columnFamilyNameName){ + if (columnFamilyNameName == "alltriples") { + System.out.println("get cql allt"); + return ("UPDATE alltriples SET inferredsteps =? , isliteral =? , tripletype =?"); + } + System.out.println("get cql step"); + return("UPDATE " + columnFamilyNameName + " SET transitivelevel =? "); + } + +// String getSchema(String columnFamilyNameName){ +//// System.out.println(columnFamilyNameName + " schema"); +// if (columnFamilyNameName == "alltriples") { +// return CassandraDB.getAlltripleSchema(); +// } +// return CassandraDB.getStepsSchema(columnFamilyNameName); +// } +// +// String getInsertStatement(String columnFamilyNameName){ +//// System.out.println(columnFamilyNameName + " insert statement"); +// if (columnFamilyNameName == "alltriples") { +// return CassandraDB.getAlltripleStatement(); +// } +// return CassandraDB.getStepsStatement(columnFamilyNameName); +// } + +} diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java index 0b848cd..c8a6157 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustification.java @@ -2,17 +2,22 @@ * Project Name: mrj-0.1 * File Name: OWLHorstJustification.java * @author Gang Wu - * 2015��2��5�� ����4:58:08 + * 2015��2��5�� ����4:58:08 * * Description: * TODO */ package cn.edu.neu.mitt.mrj.justification; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.net.URI; import java.util.Set; +import jdk.internal.dynalink.beans.StaticClass; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; @@ -29,13 +34,23 @@ import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.omg.CORBA.PUBLIC_MEMBER; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import ch.qos.logback.core.Context; import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.reasoner.Experiments; import cn.edu.neu.mitt.mrj.utils.TripleKeyMapComparator; + + + + + + +//modified cassandra java 2.0.5 import com.datastax.driver.core.TupleValue; /** @@ -53,6 +68,12 @@ public class OWLHorstJustification extends Configured implements Tool { public static long pre = -1; public static long obj = -1; public static Path justificationsDirBase = new Path("/justification"); + + public static long totaltriples; + private static int tripleamount = 0; + public static int id; //?? + + private boolean bClearOriginals = false; /** * @@ -79,6 +100,10 @@ public void parseArgs(String[] args) { numMapTasks = Integer.valueOf(args[++i]); if (args[i].equalsIgnoreCase("--reducetasks")) numReduceTasks = Integer.valueOf(args[++i]); + + // Added by WuGang 2015-06-08 + if (args[i].equalsIgnoreCase("--clearoriginals")) + bClearOriginals = true; } } @@ -93,7 +118,7 @@ public static void prepareInput(long sub, long pre, long obj, boolean literal) { Configuration conf = new Configuration(); try { int step = 0; - Path justificationsDir = new Path(justificationsDirBase, String.valueOf(step)); // ����Ŀ¼�£�����һ����original���ļ����ڴ洢��ʼ��justification��triple + Path justificationsDir = new Path(justificationsDirBase, String.valueOf(step)); // ��Ŀ¼�£����һ����original���ļ����ڴ洢��ʼ��justification��triple FileSystem fs = FileSystem.get(URI.create(justificationsDir.toString()), conf); if (!fs.exists(justificationsDir)) { SequenceFile.Writer writer = SequenceFile.createWriter(fs, @@ -115,6 +140,9 @@ private Job createJustificationJob(int step) throws IOException { // Job Configuration conf = new JobConf(); conf.setInt("maptasks", numMapTasks); + + conf.setInt("id", id); + Job job = new Job(conf); job.setJobName("OWL Horst Justification - Step " + step); job.setJarByClass(OWLHorstJustification.class); @@ -142,61 +170,114 @@ private Job createJustificationJob(int step) throws IOException { job.setOutputKeyClass(Triple.class); // reduce output key (in next loop it will be tried to expanded) job.setOutputValueClass(MapWritable.class); // reduce output value is an explanation job.setOutputFormatClass(SequenceFileOutputFormat.class); - + return job; } public long launchClosure(String[] args) throws IOException, InterruptedException, ClassNotFoundException { + parseArgs(args); + + // Added by WuGang 2015-06-08 +// if (bClearOriginals) +// CassandraDB.removeOriginalTriples(); + + long total = 0; // Total justifications long newExpanded = -1; // count of explanations that expanded in this loop long startTime = System.currentTimeMillis(); int step = 0; - parseArgs(args); + id = Experiments.id + 200; + System.out.println("id : " + id); + + prepareInput(sub, pre, obj, false); // Default it is not a literal. + File outputFile = new File("output"); + outputFile.createNewFile(); + BufferedWriter out = new BufferedWriter(new FileWriter(outputFile, true)); + +// out.write("id : " + id + "\r\n"); +// System.out.println(sub + " " + pre + " " + obj); + out.write("id : " + id + "\r\n"); + out.write("sub : " + sub + " pre : " + pre + " obj : " + obj + "\r\n"); + + // find justifications do{ log.info(">>>>>>>>>>>>>>>>>>>> Processing justification in step - " + step + " <<<<<<<<<<<<<<<<<<<<<<<<<"); + + out.write("step : " + step + "\r\n"); + +// out.write("total : " + totaltriples + "\r\n"); + Job job = createJustificationJob(step); - + job.waitForCompletion(true); +// int Retotal = 0; +// Retotal = conf.getInt("id", 111); + //需要在 job.waitForCompletion(true); 之后。 + Long result = job.getCounters().findCounter("Triples", "Triples").getValue(); + out.write("Reduce triples : " + result + "\r\n"); + + newExpanded = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); Counter counterToProcess = job.getCounters().findCounter("OWL Horst Justifications Job", "ExplanationOutputs"); total += counterToProcess.getValue(); + + step++; }while (newExpanded > 0); + //modified cassandra java 2.0.5 CassandraDB db = null; + try{ - db = new CassandraDB("localhost", 9160); + db = new CassandraDB(); db.getDBClient().set_keyspace(CassandraDB.KEYSPACE); Set> justifications = db.getJustifications(); int count = 0; + for (Set justification : justifications){ - System.out.println(">>>Justification - " + ++count + ":"); +// int tripleamount = 0; +// System.out.println(">>>Justification - " + ++count + ":"); +// out.write(">>>Justification - " + ++count + ":" + "\r\n"); for(TupleValue triple : justification){ long sub = triple.getLong(0); long pre = triple.getLong(1); long obj = triple.getLong(2); - System.out.println("\t<" + sub + ", " + pre + ", " + obj + ">" + - " - <" + db.idToLabel(sub) + ", " + db.idToLabel(pre) + ", " + db.idToLabel(obj) + ">"); +// System.out.println("\t<" + sub + ", " + pre + ", " + obj + ">" + +// " - <" + db.idToLabel(sub) + ", " + db.idToLabel(pre) + ", " + db.idToLabel(obj) + ">"); +// out.write("\t<" + sub + ", " + pre + ", " + obj + ">" + +// " - <" + db.idToLabel(sub) + ", " + db.idToLabel(pre) + ", " + db.idToLabel(obj) + ">" + "\r\n"); + tripleamount++; } +// System.out.println(tripleamount); + out.write("tripleamount : " + tripleamount + "\r\n"); } + + db.CassandraDBClose(); + }catch(Exception e){ System.err.println(e.getMessage()); } - - - + System.out.println("Time (in seconds): " + (System.currentTimeMillis() - startTime) / 1000); System.out.println("Number justifications: " + total); +// out.write("tripleamount : " + tripleamount + "\r\n"); + + out.write("Time (in seconds): " + (System.currentTimeMillis() - startTime) / 1000 + "\r\n"); + out.write("Number justifications: " + total + "\r\n\r\n"); + out.flush(); + out.close(); + + return total; } @@ -214,7 +295,7 @@ public int run(String[] args) throws Exception { public static void main(String[] args) { if (args.length < 2) { - System.out.println("USAGE: OWLHorstJustification [DerivedTriples base path] [Justifications base path] [options]"); + System.out.println("USAGE: OWLHorstJustification [options]"); return; } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationMapper.java index df05ca2..d7a4bf0 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationMapper.java @@ -13,6 +13,7 @@ import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer.Context; import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -393,5 +394,9 @@ protected void setup(Context context) throws IOException, InterruptedException { te.printStackTrace(); } } + + protected void cleanup(Context context) throws IOException, InterruptedException{ + db.CassandraDBClose(); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java index d477f56..b80060a 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/justification/OWLHorstJustificationReducer.java @@ -3,10 +3,17 @@ */ package cn.edu.neu.mitt.mrj.justification; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; import java.io.IOException; +import java.io.OutputStreamWriter; import java.util.HashSet; import java.util.Set; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.MapWritable; import org.apache.hadoop.io.Writable; @@ -15,8 +22,10 @@ import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.io.dbs.SimpleClientDataStax; +import cn.edu.neu.mitt.mrj.reasoner.Experiments; import com.datastax.driver.core.DataType; +//modified cassandra java 2.0.5 import com.datastax.driver.core.TupleType; import com.datastax.driver.core.TupleValue; import com.datastax.driver.core.querybuilder.Insert; @@ -33,15 +42,52 @@ public class OWLHorstJustificationReducer extends // private static Logger log = LoggerFactory.getLogger(OWLHorstJustificationReducer.class); private static SimpleClientDataStax sClient = null; + private long triplenum = 0; + @Override protected void reduce(MapWritable key, Iterable values, Context context) throws IOException, InterruptedException { long total = 0; + int id = 0; + Configuration reduceconf = context.getConfiguration(); + id = reduceconf.getInt("id", 2); + for (LongWritable count:values){ total += count.get(); } -// System.out.println("Total count is: " + total); + triplenum = total; + System.out.println("Reduce total count is: " + total); + //modified cassandra java 2.0.5 + +// reduceconf.setInt("id", (int)total); + + + + +// File outputFile = new File("output"); +// outputFile.createNewFile(); +// BufferedWriter out = new BufferedWriter(new FileWriter(outputFile, true)); +// out.write("Total count is: " + total); +// out.flush(); +// out.close(); + +// try{ +// Path pt=new Path("./result"); +// FileSystem fs = FileSystem.get(new Configuration()); +// BufferedWriter br=new BufferedWriter(new OutputStreamWriter(fs.create(pt,true))); +// // TO append data to a file, use fs.append(Path f) +// String line; +// line="Total count is: " + total; +// System.out.println(line); +// br.write(line); +// br.close(); +// }catch(Exception e){ +// System.out.println("File not found"); +// } + +// System.out.println("Reduce id : " + Experiments.id); //均是0 + if (total == key.size()){ // Find a candidate justification, output it to the database Set resultJustification = new HashSet(); @@ -52,15 +98,19 @@ protected void reduce(MapWritable key, Iterable values, Context co theValue.setLong(1, ((Triple)triple).getPredicate()); theValue.setLong(2, ((Triple)triple).getObject()); resultJustification.add(theValue); + System.out.println(" _______ " + ((Triple)triple).getSubject()); } + System.out.println("Write a candidate justification to database=========== "); + System.out.println(resultJustification.toString()); // log.info("Write a candidate justification to database=========== "); // log.info(resultJustification.toString()); + System.out.println(" REDUCE id : " + id); Insert insert = QueryBuilder .insertInto(CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_RESULTS) .value(CassandraDB.COLUMN_JUSTIFICATION, resultJustification) - .value(CassandraDB.COLUMN_ID, UUIDs.timeBased()); + .value("id", id); sClient.getSession().execute(insert); // Added by WuGang 2015-02-14 @@ -72,6 +122,8 @@ protected void reduce(MapWritable key, Iterable values, Context co } } // else do nothing. +// OWLHorstJustification.totaltriples = total; + } @Override @@ -84,6 +136,8 @@ protected void setup(Context context) @Override protected void cleanup(Context context) throws IOException, InterruptedException { + context.getCounter("Triples", "Triples").increment(triplenum); + sClient.close(); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/Experiments.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/Experiments.java new file mode 100644 index 0000000..d856550 --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/Experiments.java @@ -0,0 +1,90 @@ +package cn.edu.neu.mitt.mrj.reasoner; + +import java.io.IOException; +import java.net.InetAddress; +import java.util.Random; +import java.util.concurrent.ThreadLocalRandom; + +import jdk.internal.dynalink.beans.StaticClass; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.ToolRunner; +import org.omg.CORBA.PUBLIC_MEMBER; + +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.justification.OWLHorstJustification; + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Host; +import com.datastax.driver.core.Metadata; +import com.datastax.driver.core.ResultSet; +import com.datastax.driver.core.Row; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.SocketOptions; +import com.datastax.driver.core.Cluster.Builder; +public class Experiments { + + public static int id; + + public static void main(String[] args){ + Builder builder = Cluster.builder(); + builder.addContactPoint(CassandraDB.DEFAULT_HOST); + SocketOptions socketoptions = new SocketOptions().setKeepAlive(true).setConnectTimeoutMillis(5 * 10000).setReadTimeoutMillis(100000); + builder.withSocketOptions(socketoptions); + Cluster cluster = builder.build(); + Metadata metadata = cluster.getMetadata(); + Session session = cluster.connect(); + +// Random r = new Random(System.currentTimeMillis()) ; +// int random = 0; +// if (r.nextBoolean()) { +// random = r.nextInt(101) ; +// } else { +// random = -r.nextInt(101) ; +// } + for (id = 0; id < 10; id++) { + long random = ThreadLocalRandom.current().nextLong(-9223372036854775808L, 9223372036854775807L); +// long startTime = System.currentTimeMillis(); + ResultSet results = session.execute("SELECT sub ,pre ,obj FROM mrjks.resultrows WHERE TOKEN(isliteral , rule , sub ) > " + random + " LIMIT 1;"); +// System.out.println(results); + for (Row row : results){ + Configuration conf = new Configuration(); + try { + FileSystem hdfs = FileSystem.get(conf); + Path deledir= new Path("/justification"); + boolean isDeleted=hdfs.delete(deledir,true); + } catch (IOException e1) { + e1.printStackTrace(); + } + +// System.out.println("id : " + id); + + Long sub, pre, obj; + sub = row.getLong("sub"); + pre = row.getLong("pre"); + obj = row.getLong("obj"); + System.out.println("sub : " + sub + " pre : " + pre + " obj : " + obj); + //不能加空格 + String[] argStrings = {"--maptasks" , "8" , "--reducetasks" , "8" , "--subject" , sub.toString() , "--predicate" , pre.toString() , "--object" , obj.toString() ,"--clearoriginals"}; +// OWLHorstJustification OWJ = new OWLHorstJustification(); + System.out.println(argStrings); + OWLHorstJustification.main(argStrings); + +// try { +// OWJ.launchClosure(argStrings); +// } catch (ClassNotFoundException | IOException +// | InterruptedException e) { +// System.out.println("launchClosure error"); +// e.printStackTrace(); +// } + + } +// System.out.println("Time (in seconds): " + (System.currentTimeMillis() - startTime) / 1000); + } + cluster.close(); + } +} diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java index 03a6e64..dcc2125 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/MapReduceReasonerJobConfig.java @@ -2,23 +2,27 @@ * Project Name: mrj-0.1 * File Name: MapReduceJobConfig.java * @author Gang Wu - * 2014��12��28�� ����10:44:16 + * 2014��12��28�� ����10:44:16 * * Description: * TODO */ package cn.edu.neu.mitt.mrj.reasoner; + import java.io.IOException; +import java.nio.ByteBuffer; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; import org.apache.cassandra.hadoop.cql3.CqlInputFormat; import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; @@ -31,72 +35,115 @@ public class MapReduceReasonerJobConfig { // Input from CassandraDB.COLUMNFAMILY_JUSTIFICATIONS - private static void configureCassandraInput(Job job, Set filters) { + private static void configureCassandraInput(Job job, Set typeFilters, Set transitiveLevelFilters, int certainStep) { //Set the input - ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost"); + ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); // Should not use 9160 port in cassandra 2.1.2 because new cql3 port is 9042, please refer to conf/cassandra.yaml //ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160"); - ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner"); + ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); - if (filters.size() == 0){ - CqlConfigHelper.setInputCql(job.getConfiguration(), - "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + - " WHERE TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") > ? AND TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") <= ? ALLOW FILTERING"); + if (typeFilters.size() == 0){ + + if (transitiveLevelFilters.size() == 0) + CqlConfigHelper.setInputCql(job.getConfiguration(), + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " WHERE TOKEN(" + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + ") > ? AND TOKEN(" + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + ") <= ? ALLOW FILTERING"); +// "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + +// " WHERE TOKEN(" + +// CassandraDB.COLUMN_SUB + ", " + +// CassandraDB.COLUMN_PRE + ", " + +// CassandraDB.COLUMN_OBJ + ", " + +// CassandraDB.COLUMN_IS_LITERAL + +// ") > ? AND TOKEN(" + +// CassandraDB.COLUMN_SUB + ", " + +// CassandraDB.COLUMN_PRE + ", " + +// CassandraDB.COLUMN_OBJ + ", " + +// CassandraDB.COLUMN_IS_LITERAL + +// ") <= ? ALLOW FILTERING"); + else{ + Integer max = java.util.Collections.max(transitiveLevelFilters); + Integer min = java.util.Collections.min(transitiveLevelFilters); + + + CqlConfigHelper.setInputCql(job.getConfiguration(), + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " WHERE TOKEN(" + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + ") > ? AND TOKEN(" + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + ") <= ? " + +// CassandraDB.COLUMN_INFERRED_STEPS + " = " + certainStep + " AND " + +// CassandraDB.COLUMN_TRANSITIVE_LEVELS + " >= " + min + " AND " + +// CassandraDB.COLUMN_TRANSITIVE_LEVELS + " <= " + max + + " ALLOW FILTERING"); + } + } - else if (filters.size() == 1){ + else if (typeFilters.size() == 1){ + if (transitiveLevelFilters.size() != 0){ // stepFilter is only for handling transitive property + System.err.println("This is not supported!!!"); + return; + } + CqlConfigHelper.setInputCql(job.getConfiguration(), "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + " WHERE TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + ") > ? AND TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") <= ? AND " + - CassandraDB.COLUMN_TRIPLE_TYPE + " = " + filters.toArray()[0] + - " ALLOW FILTERING"); + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + ") <= ? "); +// ") <= ? AND " + +// CassandraDB.COLUMN_TRIPLE_TYPE + " = " + typeFilters.toArray()[0] + +// " ALLOW FILTERING"); }else{ + if (transitiveLevelFilters.size() != 0){ // stepFilter is only for handling transitive property + System.err.println("This is not supported!!!"); + return; + } + + // The support of IN clause in cassandra db's SELECT is restricted. // So we have to try to manually cluster the values in the filters. // see http://www.datastax.com/documentation/cql/3.1/cql/cql_reference/select_r.html#reference_ds_d35_v2q_xj__selectIN System.out.println("<<<<<<<>>>>>>>>"); System.out.println("<<<<<<<>>>>>>>>"); - Integer max = java.util.Collections.max(filters); - Integer min = java.util.Collections.min(filters); + Integer max = java.util.Collections.max(typeFilters); + Integer min = java.util.Collections.min(typeFilters); CqlConfigHelper.setInputCql(job.getConfiguration(), "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + " WHERE TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + ") > ? AND TOKEN(" + - CassandraDB.COLUMN_SUB + ", " + - CassandraDB.COLUMN_PRE + ", " + - CassandraDB.COLUMN_OBJ + ", " + - CassandraDB.COLUMN_IS_LITERAL + - ") <= ? AND " + - CassandraDB.COLUMN_TRIPLE_TYPE + " >= " + min + " AND " + - CassandraDB.COLUMN_TRIPLE_TYPE + " <= " + max + - " ALLOW FILTERING"); + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + ") <= ? "); +// + "AND " + +// CassandraDB.COLUMN_TRIPLE_TYPE + " >= " + min + " AND " + +// CassandraDB.COLUMN_TRIPLE_TYPE + " <= " + max + +// " ALLOW FILTERING"); // String strFilter = filters.toString(); // String strInFilterClause = strFilter.substring(1, strFilter.length()-1); // remove "[" and "]" characters of Set.toString() @@ -120,7 +167,8 @@ else if (filters.size() == 1){ } CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); - ConfigHelper.setInputSplitSize(job.getConfiguration(), 180); + //Modifide by LiYang + ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); job.setInputFormatClass(CqlInputFormat.class); System.out.println("ConfigHelper.getInputSplitSize - input: " + ConfigHelper.getInputSplitSize(job.getConfiguration())); System.out.println("CqlConfigHelper.getInputPageRowSize - input: " + CqlConfigHelper.getInputPageRowSize(job.getConfiguration())); @@ -129,38 +177,53 @@ else if (filters.size() == 1){ // Output to CassandraDB.COLUMNFAMILY_JUSTIFICATIONS - private static void configureCassandraOutput(Job job) { + private static void configureCassandraOutput(Job job, int step) { //Set the output job.setOutputKeyClass(Map.class); job.setOutputValueClass(List.class); - job.setOutputFormatClass(CqlOutputFormat.class); - ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost"); - ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner"); - ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); - String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + - " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; - CqlConfigHelper.setOutputCql(job.getConfiguration(), query); + + job.setOutputFormatClass(CqlBulkOutputFormat.class); + CqlBulkOutputFormat.setColumnFamilySchema(job.getConfiguration(), CassandraDB.COLUMNFAMILY_JUSTIFICATIONS, CassandraDB.getJustificationsSchema()); + CqlBulkOutputFormat.setColumnFamilyInsertStatement(job.getConfiguration(), CassandraDB.COLUMNFAMILY_JUSTIFICATIONS, CassandraDB.getJustificationseStatement()); + CqlBulkOutputFormat.setDeleteSourceOnSuccess(job.getConfiguration(), true); + + ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); + ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); + + ConfigHelper.setOutputKeyspace(job.getConfiguration(), CassandraDB.KEYSPACE); + ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + +// MrjMultioutput.addNamedOutput(job, CassandraDB.COLUMNFAMILY_ALLTRIPLES, CqlBulkOutputFormat.class, ByteBuffer.class, List.class); +// MrjMultioutput.addNamedOutput(job, "step" + step, CqlBulkOutputFormat.class, ByteBuffer.class, List.class); +// CqlConfigHelper.setOutputCql(conf, "select * from step1"); } - // In each derivation, we may create a set of jobs + // In each derivation, we may create a set of jobs + // certainStep is optional, if it is specified then we can use it to filter transitiveLevel with non-equal operator + // (see cql specification) public static Job createNewJob(Class classJar, String jobName, - Set filters, int numMapTasks, int numReduceTasks, - boolean bConfigCassandraInput, boolean bConfigCassandraOutput) + Set typeFilters, Set transitiveLevelFilters, int certainStep, int numMapTasks, int numReduceTasks, + boolean bConfigCassandraInput, boolean bConfigCassandraOutput, Integer step) throws IOException { Configuration conf = new Configuration(); conf.setInt("maptasks", numMapTasks); - conf.set("input.filter", filters.toString()); - + conf.set("input.filter", typeFilters.toString()); + + conf.set("mapreduce.output.bulkoutputformat.streamthrottlembits", "400"); + Job job = new Job(conf); job.setJobName(jobName); job.setJarByClass(classJar); job.setNumReduceTasks(numReduceTasks); + job.setNumReduceTasks(8); + if (bConfigCassandraInput) - configureCassandraInput(job, filters); + configureCassandraInput(job, typeFilters, transitiveLevelFilters, certainStep); if (bConfigCassandraOutput) - configureCassandraOutput(job); + configureCassandraOutput(job, step); + // Added by WuGang 2010-05-25 System.out.println("Create a job - " + jobName); @@ -169,6 +232,44 @@ public static Job createNewJob(Class classJar, String jobName, return job; } - - +/* + public static void CreateTables(String jobname){ + Builder builder = Cluster.builder(); + builder.addContactPoint(CassandraDB.DEFAULT_HOST); + SocketOptions socketoptions= new SocketOptions().setKeepAlive(true).setReadTimeoutMillis(10 * 10000).setConnectTimeoutMillis(5 * 10000); + Cluster clu = builder.build(); + Session session = clu.connect(); + + String query = ""; + if(jobname == "RDFS special properties reasoning"){ + query = "CREATE TABLE IF NOT EXISTS " + "mrjks" + "." + jobname + + " ( " + + "sub" + " bigint, " + + "pre" + " bigint, " + + "obj" + " bigint, " + + "rule int, " + + "v1" + " bigint, " + + "v2" + " bigint, " + + "v3" + " bigint, " + + "transitiveleves int" + + ", primary key((sub, pre, obj, rule) ,v1, v2, v3 ))"; + } + else { + query = "CREATE TABLE IF NOT EXISTS " + "mrjks" + "." + jobname + + " ( " + + "sub" + " bigint, " + + "pre" + " bigint, " + + "obj" + " bigint, " + + "rule int, " + + "v1" + " bigint, " + + "v2" + " bigint, " + + "v3" + " bigint, " + + ", primary key((id, rule) ,v1, v2, v3))"; + } + + session.execute(query); + System.out.println(query); + System.out.println("--------Create Table----------"); + } + */ } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/RDFSOWLReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/RDFSOWLReasoner.java new file mode 100644 index 0000000..64ffe76 --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/RDFSOWLReasoner.java @@ -0,0 +1,81 @@ +package cn.edu.neu.mitt.mrj.reasoner; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import cn.edu.neu.mitt.mrj.reasoner.owl.OWLReasoner; +import cn.edu.neu.mitt.mrj.reasoner.rdfs.RDFSReasoner; + +public class RDFSOWLReasoner { + + protected static Logger log = LoggerFactory.getLogger(RDFSOWLReasoner.class); + + static int step = 0; + + private static void parseArgs(String[] args) { + + for(int i=0;i 0; + firstLoop = false; + } +// log.info("Number triples derived: " + totalDerivation); +// log.info("Time derivation: " + (System.currentTimeMillis() - startTime)); + System.out.println("Number triples derived: " + totalDerivation); + System.out.println("Time derivation: " + (System.currentTimeMillis() - startTime)); + } catch (Exception e) { + log.error(e.getMessage()); + e.printStackTrace(); + } + } + +} \ No newline at end of file diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java new file mode 100644 index 0000000..08fba3e --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustifications.java @@ -0,0 +1,79 @@ +package cn.edu.neu.mitt.mrj.reasoner; + + +import java.util.List; +import java.util.Map; + +import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlInputFormat; +import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; + +public class ReasonedJustifications extends Configured implements Tool{ + public int run(String[] args) throws Exception{ + + Configuration conf = new Configuration(); + + Job job = new Job(conf); + job.setJobName(" Test "); + job.setJarByClass(ReasonedJustifications.class); + job.setNumReduceTasks(8); + + ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); + ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); + ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + CqlConfigHelper.setInputCql(job.getConfiguration(), + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " WHERE TOKEN(" + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + //CassandraDB.COLUMN_IS_LITERAL + + ") > ? AND TOKEN(" + + CassandraDB.COLUMN_IS_LITERAL + ", " + + CassandraDB.COLUMN_RULE + ", " + + CassandraDB.COLUMN_SUB + + //CassandraDB.COLUMN_IS_LITERAL + + ") <= ? ALLOW FILTERING"); + CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); + //Modified by LiYang + ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); + job.setInputFormatClass(CqlInputFormat.class); + job.setOutputKeyClass(Map.class); + job.setOutputValueClass(List.class); + job.setOutputFormatClass(CqlOutputFormat.class); + ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); + ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); + + ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + String query = "UPDATE " + CassandraDB.KEYSPACE + "." + "resultrows" + + " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=?, " + CassandraDB.COLUMN_TRANSITIVE_LEVELS + "=?"; + CqlConfigHelper.setOutputCql(job.getConfiguration(), query); + + job.setMapperClass(ReasonedJustificationsMapper.class); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(IntWritable.class); + job.setReducerClass(ReasonedJustificationsReducer.class); + + + + job.waitForCompletion(true); + + return 0; + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new ReasonedJustifications(), args); + System.exit(res); + } + +} diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java new file mode 100644 index 0000000..e2142fc --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsMapper.java @@ -0,0 +1,91 @@ +package cn.edu.neu.mitt.mrj.reasoner; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Map; + +import org.apache.cassandra.thrift.Compression; +import org.apache.cassandra.thrift.CqlPreparedResult; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Mapper.Context; + +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Metadata; +import com.datastax.driver.core.ResultSet; +import com.datastax.driver.core.Row; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.SimpleStatement; + +public class ReasonedJustificationsMapper extends Mapper{ + private Cluster cluster; + private Session session; + //** + public void map(Long keys, Row rows, Context context) throws IOException, InterruptedException{ + + Integer inferredsteps; + Integer transitivelevel; + // for (Row rows : row){ + if (rows.getInt(CassandraDB.COLUMN_RULE) != 0) { + + String conKey; + //***** + conKey = rows.getLong(CassandraDB.COLUMN_SUB) //��ʹ��ByteBufferUtil�� + + "_" + rows.getLong(CassandraDB.COLUMN_PRE) + + "_" + rows.getLong(CassandraDB.COLUMN_OBJ) + + "_" + rows.getBool(CassandraDB.COLUMN_IS_LITERAL) + + "_" + rows.getInt(CassandraDB.COLUMN_TRIPLE_TYPE) + + "_" + rows.getInt(CassandraDB.COLUMN_RULE) + + "_" + rows.getLong(CassandraDB.COLUMN_V1) + + "_" + rows.getLong(CassandraDB.COLUMN_V2) + + "_" + rows.getLong(CassandraDB.COLUMN_V3) + + "_" + rows.getInt(CassandraDB.COLUMN_INFERRED_STEPS); // Modified by WuGang, 2015-07-15 + transitivelevel = rows.getInt(CassandraDB.COLUMN_TRANSITIVE_LEVELS); // Added by WuGang, 2015-07-15 + + context.write(new Text(conKey), new IntWritable(transitivelevel)); + } + //} + + } + + public void setup(Context context) throws IOException, InterruptedException{ + cluster = Cluster.builder().addContactPoint(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host).build(); + Metadata metadata = cluster.getMetadata(); + System.out.printf("-------Connected to cluster: %s\n", metadata.getClusterName()); + session = cluster.connect(); + + String query = "CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + "." + "resultrows" + + " ( " + + CassandraDB.COLUMN_IS_LITERAL + " boolean, " + // partition key + CassandraDB.COLUMN_RULE + " int, " + + CassandraDB.COLUMN_SUB + " bigint, " + // partition key + CassandraDB.COLUMN_TRIPLE_TYPE + " int, " + + CassandraDB.COLUMN_PRE + " bigint, " + // partition key + CassandraDB.COLUMN_OBJ + " bigint, " + // partition key + CassandraDB.COLUMN_V1 + " bigint, " + + CassandraDB.COLUMN_V2 + " bigint, " + + CassandraDB.COLUMN_V3 + " bigint, " + + CassandraDB.COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key + CassandraDB.COLUMN_TRANSITIVE_LEVELS + " int, " + + " PRIMARY KEY ((" + CassandraDB.COLUMN_IS_LITERAL + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_SUB + "), " + + CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 + + //", " + COLUMN_TRIPLE_TYPE + + " ) ) "; + +// session.execute(query); +// query = "CREATE INDEX on mrjks.resultrows (sub) ;"; +// session.execute(query); +// query = "CREATE INDEX on mrjks.resultrows (obj) ;"; +// session.execute(query); +// query = "CREATE INDEX on mrjks.resultrows (pre) ;"; +// session.execute(query); +// query = "CREATE INDEX on mrjks.resultrows (isliteral) ;"; +// session.execute(query); + + } +} diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java new file mode 100644 index 0000000..ca30fc7 --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/ReasonedJustificationsReducer.java @@ -0,0 +1,46 @@ +package cn.edu.neu.mitt.mrj.reasoner; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; + +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; + +public class ReasonedJustificationsReducer extends Reducer, List>{ + public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException{ + + for (IntWritable value : values) { + //Prepare the insert keys collection + String[] splitkeys = key.toString().split("_"); + Map keys = new LinkedHashMap(); + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(Long.parseLong(splitkeys[0]))); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(Long.parseLong(splitkeys[1]))); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(Long.parseLong(splitkeys[2]))); + //bool + keys.put(CassandraDB.COLUMN_IS_LITERAL, Boolean.valueOf(splitkeys[3])?ByteBuffer.wrap(new byte[]{1}):ByteBuffer.wrap(new byte[]{0})); + keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(Integer.parseInt(splitkeys[4]))); + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes(Integer.parseInt(splitkeys[5]))); + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(Long.parseLong(splitkeys[6]))); + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(Long.parseLong(splitkeys[7]))); + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(Long.parseLong(splitkeys[8]))); + + //prepare the insert variables collection + List variables = new ArrayList(); + int var_inferredsteps = Integer.parseInt(value.toString()); + variables.add(ByteBufferUtil.bytes(var_inferredsteps)); + int var_transitivelevel = Integer.parseInt(splitkeys[9]); + variables.add(ByteBufferUtil.bytes(var_transitivelevel)); + context.write(keys, variables); + } + + } + +} diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java index cb27ef0..8cad3d8 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesMapper.java @@ -16,6 +16,7 @@ import org.apache.cassandra.thrift.UnavailableException; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer.Context; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -35,7 +36,7 @@ public class OWLAllSomeValuesMapper extends Mapper= previousDerivation) { - - log.info("And I met a triple with RDF_TYPE as predicate: " + value); + //DEL +// log.info("And I met a triple with RDF_TYPE as predicate: " + value); - // ��Ҫ���⴫��һ��w - if (someValues.containsKey(value.getObject())) { //�ҵ���һ��(x,rdf:type,w)��������Ԫ�飬����w����v owl:someValuesFrom w + // ��Ҫ���⴫��һ��w + if (someValues.containsKey(value.getObject())) { //�ҵ���һ��(x,rdf:type,w)��������Ԫ�飬����w����v owl:someValuesFrom w log.info("I met someValuesFrom: " + value); Collection values = someValues.get(value.getObject()); Iterator itr = values.iterator(); bKey[0] = 2; bValue[0] = 1; - bValue[17] = 0; // ��������һ��someValues + bValue[17] = 0; // ��������һ��someValues NumberUtils.encodeLong(bKey, 9, value.getSubject()); - NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��wд��value�� + NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��wд��value�� while (itr.hasNext()) { byte[] bytes = itr.next(); System.arraycopy(bytes, 0, bKey, 1, 8); System.arraycopy(bytes, 8, bValue, 1, 8); - context.write(oKey, oValue); //�������((p,x),v) -> ((p,x),(v,w,0)) + context.write(oKey, oValue); //�������((p,x),v) -> ((p,x),(v,w,0)) } } - // ��Ҫ���⴫��һ��v - if (allValues.containsKey(value.getObject())) { //�ҵ���һ��(w,rdf:type,v)��������Ԫ�飬����v����v owl:allValuesFrom u + // ��Ҫ���⴫��һ��v + if (allValues.containsKey(value.getObject())) { //�ҵ���һ��(w,rdf:type,v)��������Ԫ�飬����v����v owl:allValuesFrom u log.info("I met allValuesFrom: " + value); Collection values = allValues.get(value.getObject()); Iterator itr = values.iterator(); bKey[0] = 1; bValue[0] = 1; - bValue[17] = 1; // ��������һ��allValues + bValue[17] = 1; // ��������һ��allValues NumberUtils.encodeLong(bKey, 9, value.getSubject()); - NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��vд��value�� + NumberUtils.encodeLong(bValue, 9, value.getObject()); //Added by WuGang, ��vд��value�� while (itr.hasNext()) { byte[] bytes = itr.next(); System.arraycopy(bytes, 0, bKey, 1, 8); System.arraycopy(bytes, 8, bValue, 1, 8); - context.write(oKey, oValue); //�������((p,w),u) -> ((p,w),(u,v,1)) + context.write(oKey, oValue); //�������((p,w),u) -> ((p,w),(u,v,1)) } } } else { - // onPropertySome�з��õ������е���onPropertySome�����õ����ԣ������е� v owl:someValuesFrom w��ʽ����Ԫ���е�w - if (onPropertySome.contains(value.getPredicate())) {//ijһ����Ԫ��u p x�е�p��һ��onPropertySome�����õ������� + // onPropertySome�з��õ������е���onPropertySome�����õ����ԣ������е� v owl:someValuesFrom w��ʽ����Ԫ���е�w + if (onPropertySome.contains(value.getPredicate())) {//ijһ����Ԫ��u p x�е�p��һ��onPropertySome�����õ������� //Rule 15 - someValuesFrom log.info("I met onPropertySome: " + value); bKey[0] = 2; @@ -102,19 +103,19 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup NumberUtils.encodeLong(bKey, 1, value.getPredicate()); NumberUtils.encodeLong(bKey, 9, value.getObject()); NumberUtils.encodeLong(bValue, 1, value.getSubject()); - context.write(oKey, oValue); //�����((p,x),(u,,)) value�ĺ�����field������ֵ,û�и�ֵ + context.write(oKey, oValue); //�����((p,x),(u,,)) value�ĺ�����field������ֵ,û�и�ֵ } - // onPropertyAll�з��õ������е���onPropertyAll�����õ����ԣ������е� v owl:allValuesFrom u��ʽ����Ԫ���е�u - if (onPropertyAll.contains(value.getPredicate())) {//ijһ����Ԫ��w p x�е�p��һ��onPropertyAll�����õ������� + // onPropertyAll�з��õ������е���onPropertyAll�����õ����ԣ������е� v owl:allValuesFrom u��ʽ����Ԫ���е�u + if (onPropertyAll.contains(value.getPredicate())) {//ijһ����Ԫ��w p x�е�p��һ��onPropertyAll�����õ������� //Rule 16 - allValuesFrom log.info("I met onPropertyAll: " + value); bKey[0] = 1; - bValue[0] = 0; // Added by WuGang, ԭ������������һ�У����ж����������Ԫ��ʱ�������������reduce�����л���ɴ��� + bValue[0] = 0; // Added by WuGang, ԭ������������һ�У����ж����������Ԫ��ʱ�������������reduce�����л���ɴ��� NumberUtils.encodeLong(bKey, 1, value.getPredicate()); NumberUtils.encodeLong(bKey, 9, value.getSubject()); NumberUtils.encodeLong(bValue, 1, value.getObject()); - context.write(oKey, oValue); //�������((p,w),(x,,)) value�ĺ�����field������ֵ,û�и�ֵ + context.write(oKey, oValue); //�������((p,w),(x,,)) value�ĺ�����field������ֵ,û�и�ֵ } } } @@ -122,7 +123,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup @Override public void setup(Context context) throws IOException { previousDerivation = context.getConfiguration().getInt("reasoner.previousDerivation", -1); - + // List filesProperty = MultiFilesReader.recursiveListStatus(context, "FILTER_ONLY_OWL_ON_PROPERTY"); // Map> allValuesTmp = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_OWL_ALL_VALUES", context); // Map> someValuesTmp = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_OWL_SOME_VALUES", context); @@ -150,6 +151,11 @@ public void setup(Context context) throws IOException { makeJoin(onPropertyTmp, context, someValuesTmp, allValuesTmp, someValues, allValues, onPropertySome, onPropertyAll); + + + db.CassandraDBClose(); + + }catch (TTransportException e) { e.printStackTrace(); } catch (InvalidRequestException e) { @@ -206,7 +212,7 @@ protected void makeJoin(Map> onPropertyTmp, Context contex } if (allValuesTmp.containsKey(sub)) { - // col��Ӧ�������е�subject��Ӧ��object������subject��object����subject, owl:allValuesFrom, object + // col��Ӧ�������е�subject��Ӧ��object������subject��object����subject, owl:allValuesFrom, object Collection col = allValuesTmp.get(sub); if (col != null) { Iterator itr = col.iterator(); @@ -231,4 +237,5 @@ protected void makeJoin(Map> onPropertyTmp, Context contex } } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java index 9d56f78..0161e11 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLAllSomeValuesReducer.java @@ -28,18 +28,18 @@ public class OWLAllSomeValuesReducer extends Reducer resources = new LinkedList(); // Added by WuGang - private LinkedList others = new LinkedList(); // ��types����һ�� - private LinkedList s_a_types = new LinkedList(); // ��types����һ��,���ڴ洢��someValues(0)����allValues(1)���� + private LinkedList others = new LinkedList(); // ��types����һ�� + private LinkedList s_a_types = new LinkedList(); // ��types����һ��,���ڴ洢��someValues(0)����allValues(1)���� @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { - log.info("I'm in OWLAllSomeValuesReducer"); + //log.info("I'm in OWLAllSomeValuesReducer"); types.clear(); resources.clear(); byte[] bKey = key.getBytes(); - long rSubject = NumberUtils.decodeLong(bKey, 9); // rSubject����key�ĵڶ���Long����ʼλ��Ϊ9���ͷ����һ��byte�� + long rSubject = NumberUtils.decodeLong(bKey, 9); // rSubject����key�ĵڶ���Long����ʼλ��Ϊ9���ͷ����һ��byte�� long predicate = NumberUtils.decodeLong(bKey, 1); // Added by WuGang 2010-07-14 Iterator itr = values.iterator(); @@ -48,7 +48,7 @@ public void reduce(BytesWritable key, Iterable values, Context co byte[] bValue = value.getBytes(); if (bValue[0] == 1) { //Type triple types.add(NumberUtils.decodeLong(bValue, 1)); - others.add(NumberUtils.decodeLong(bValue, 9)); // Added by WuGang, ��types����Ҫ���⴫��һ��long�ͣ���һ��byte + others.add(NumberUtils.decodeLong(bValue, 9)); // Added by WuGang, ��types����Ҫ���⴫��һ��long�ͣ���һ��byte s_a_types.add(bValue[17]); } else { //Resource triple resources.add(NumberUtils.decodeLong(bValue, 1)); @@ -66,7 +66,7 @@ public void reduce(BytesWritable key, Iterable values, Context co while (itrResource.hasNext()) { long resource = itrResource.next(); triple.setSubject(resource); - // ����Types���͵�value����someValuesΪ����((p,x),(v,w))����allValuesΪ����((p,w),(u,v)) + // ����Types���͵�value����someValuesΪ����((p,x),(v,w))����allValuesΪ����((p,w),(u,v)) Iterator itrTypes = types.listIterator(); Iterator itrOthers = others.listIterator(); Iterator itrSATypes = s_a_types.listIterator(); @@ -74,14 +74,14 @@ public void reduce(BytesWritable key, Iterable values, Context co long type = itrTypes.next(); triple.setObject(type); - // Added by WuGang����triple��ֵ + // Added by WuGang����triple��ֵ long other = itrOthers.next(); byte s_a_type = itrSATypes.next(); - triple.setRsubject(rSubject); // ��someValues������x,��allValues������w + triple.setRsubject(rSubject); // ��someValues������x,��allValues������w // Modified by WuGang 2010-07-14 // triple.setRpredicate(TriplesUtils.RDF_TYPE); //rdf:type triple.setRpredicate(predicate); - triple.setRobject(other); // ��someValues������w,��allValues������v + triple.setRobject(other); // ��someValues������w,��allValues������v switch (s_a_type) { case 0: triple.setType(TriplesUtils.OWL_HORST_15); @@ -95,7 +95,7 @@ public void reduce(BytesWritable key, Iterable values, Context co // System.out.println("Generate an extended triple for OWLAllSomeValues: " + triple); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } } @@ -110,4 +110,12 @@ public void setup(Context context) { triple.setObjectLiteral(false); triple.setPredicate(TriplesUtils.RDF_TYPE); } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); + } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java index a4afd43..3323bd6 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPMapper.java @@ -85,7 +85,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup @Override public void setup(Context context) throws IOException { - + CassandraDB db; try { db = new CassandraDB(); @@ -94,6 +94,7 @@ public void setup(Context context) throws IOException { subpropSchemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); + //modified 2015/5/31 db.loadSetIntoMemory(subpropSchemaTriples, filters, -1); } @@ -101,18 +102,21 @@ public void setup(Context context) throws IOException { subclassSchemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBCLASS); + //modified 2015/5/31 db.loadSetIntoMemory(subclassSchemaTriples, filters, -1); } - } catch (TTransportException e) { - e.printStackTrace(); - } catch (InvalidRequestException e) { - e.printStackTrace(); - } catch (UnavailableException e) { - e.printStackTrace(); - } catch (TimedOutException e) { - e.printStackTrace(); - } catch (SchemaDisagreementException e) { - e.printStackTrace(); + db.CassandraDBClose(); + //modified 2015/5/31 +// } catch (TTransportException e) { +// e.printStackTrace(); +// } catch (InvalidRequestException e) { +// e.printStackTrace(); +// } catch (UnavailableException e) { +// e.printStackTrace(); +// } catch (TimedOutException e) { +// e.printStackTrace(); +// } catch (SchemaDisagreementException e) { +// e.printStackTrace(); } catch (TException e) { e.printStackTrace(); } @@ -120,4 +124,5 @@ public void setup(Context context) throws IOException { } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java index e98f5ba..731fb98 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLEquivalenceSCSPReducer.java @@ -33,7 +33,7 @@ public class OWLEquivalenceSCSPReducer extends Reducer> subpropSchemaTriples = null; public static Map> subclassSchemaTriples = null; public static Map> equivalenceClassesSchemaTriples = null; // Added by WuGang @@ -90,7 +90,7 @@ public void reduce(LongWritable key, Iterable values, Context con } } - if (!found) { // ���������ó��Ľ�� + if (!found) { // ��������ó��Ľ�� triple.setObject(resource); triple.setSubject(key.get()); triple.setPredicate(TriplesUtils.RDFS_SUBCLASS); @@ -107,9 +107,8 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRpredicate(TriplesUtils.OWL_EQUIVALENT_CLASS); triple.setRobject(triple.getSubject()); } - // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } @@ -146,12 +145,12 @@ public void reduce(LongWritable key, Iterable values, Context con } // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } //Subproperties - // Modified by WuGang,����ò��Ӧ����superProperties + // Modified by WuGang,����ò��Ӧ����superProperties // itr2 = equivalenceProperties.iterator(); itr2 = superProperties.iterator(); while (itr2.hasNext()) { @@ -180,12 +179,12 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRobject(triple.getObject()); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } //Subclasses - // Modified by WuGang,����ò��Ӧ����superClasses + // Modified by WuGang,����ò��Ӧ����superClasses // itr2 = equivalenceClasses.iterator(); itr2 = superClasses.iterator(); while (itr2.hasNext()) { @@ -213,9 +212,8 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRsubject(triple.getSubject()); triple.setRpredicate(TriplesUtils.RDFS_SUBCLASS); triple.setRobject(triple.getObject()); - // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); } } } @@ -255,7 +253,8 @@ public void setup(Context context) throws IOException { Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_PROPERTY); equivalencePropertiesSchemaTriples = db.loadMapIntoMemory(filters); - } + } + db.CassandraDBClose(); }catch (TTransportException e) { e.printStackTrace(); } catch (InvalidRequestException e) { @@ -271,4 +270,11 @@ public void setup(Context context) throws IOException { } } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java index b78a782..2ca8a07 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueMapper.java @@ -43,7 +43,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup } //TODO: check whether also the schema is modified oKey.set(value.getSubject()); - if (value.getPredicate() == TriplesUtils.RDF_TYPE && // ����14b������value����(u rdf:type v)��Ŀ������reduce������(u p w)�����ǻ�Ҫ����14b(v owl:hasValue w) + if (value.getPredicate() == TriplesUtils.RDF_TYPE && // ����14b������value����(u rdf:type v)��Ŀ������reduce������(u p w)�����ǻ�Ҫ���14b(v owl:hasValue w) hasValue.contains(value.getObject()) && onProperty.contains(value.getObject())) { // System.out.println("In OWLHasValueMapper for 14b: " + value); // Added by Wugang @@ -52,7 +52,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup oValue.set(values, 0, 9); context.write(oKey, oValue); - } else if (value.getPredicate() != TriplesUtils.RDF_TYPE // ����14a������value����(u p w)��Ŀ������reduce������(u rdf:type v)�����ǻ�Ҫ����14a(v owl:hasValue w) + } else if (value.getPredicate() != TriplesUtils.RDF_TYPE // ����14a������value����(u p w)��Ŀ������reduce������(u rdf:type v)�����ǻ�Ҫ���14a(v owl:hasValue w) && hasValueInverted.contains(value.getObject()) && onPropertyInverted.contains(value.getPredicate())) { // System.out.println("In OWLHasValueMapper for 14a: " + value); // Added by Wugang @@ -62,6 +62,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup oValue.set(values, 0, 17); context.write(oKey, oValue); + } // Moved into if-else by WuGang, 20150203 @@ -70,7 +71,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup public void setup(Context context) throws IOException { previousStep = context.getConfiguration().getInt("reasoner.previousStep", -1); - + try{ CassandraDB db = new CassandraDB(); @@ -95,6 +96,7 @@ public void setup(Context context) throws IOException { onPropertyInverted = new HashSet(); db.loadSetIntoMemory(onPropertyInverted, filters, -1, true); } + db.CassandraDBClose(); }catch(TException te){ te.printStackTrace(); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java index cdae522..c85b693 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLHasValueReducer.java @@ -44,8 +44,10 @@ public class OWLHasValueReducer extends Reducer values, Context context) throws IOException, InterruptedException { Iterator itr = values.iterator(); + System.out.println("step 6"); while (itr.hasNext()) { byte[] v = itr.next().getBytes(); + System.out.println("step6 has values reduce"); if (v.length > 0) { if (v[0] == 0) { //Rule 14b // System.out.println("In OWLHasValueReducer for 14b: "); // Added by Wugang @@ -69,9 +71,8 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setRsubject(object); // v triple.setRpredicate(TriplesUtils.OWL_HAS_VALUE); // owl:hasValue triple.setRobject(triple.getObject()); // w -// System.out.println("In OWLHasValueReducer for 14b output: "+triple); // Added by Wugang - - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + System.out.println("In OWLHasValueReducer for 14b output: "+triple); // Added by Wugang + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); // context.write(source, triple); } } @@ -97,11 +98,11 @@ public void reduce(LongWritable key, Iterable values, Context con triple.setType(TriplesUtils.OWL_HORST_14a); triple.setRsubject(triple.getObject()); // v // triple.setRpredicate(TriplesUtils.OWL_HAS_VALUE); // owl:hasValue - triple.setRpredicate(predicate); // p // Modified by WuGang, 2010-08-26,�����Ϣ�������»ָ��� + triple.setRpredicate(predicate); // p // Modified by WuGang, 2010-08-26,�����Ϣ�������»ָ��� triple.setRobject(object); // w // System.out.println("In OWLHasValueReducer for 14a output: "+triple); // Added by Wugang - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); // context.write(source, triple); } } @@ -130,6 +131,7 @@ public void setup(Context context) throws IOException { onPropertyFilter.add(TriplesUtils.SCHEMA_TRIPLE_ON_PROPERTY); onPropertyMap = db.loadMapIntoMemory(onPropertyFilter); onProperty2Map = db.loadMapIntoMemory(onPropertyFilter, true); + db.CassandraDBClose(); }catch (TTransportException e) { e.printStackTrace(); } catch (InvalidRequestException e) { @@ -144,4 +146,11 @@ public void setup(Context context) throws IOException { e.printStackTrace(); } } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java index 2fc767b..d6bf4a6 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveMapper.java @@ -40,6 +40,7 @@ public void map(Long key, Row row, Context context) throws IOException,Interrupt /* Check if the triple has the functional property. If yes output * a key value so it can be matched in the reducer. */ + if (schemaFunctionalProperties.contains(value.getPredicate()) && !value.isObjectLiteral()) { //Set as key a particular flag plus the predicate @@ -95,13 +96,16 @@ public void map(Long key, Row row, Context context) throws IOException,Interrupt } context.write(this.key, new LongWritable(predicate)); } + + //System.out.println("Cassandra time :"+(System.currentTimeMillis() - time)); + } protected void setup(Context context) throws IOException { previousTransDerivation = context.getConfiguration().getInt("reasoner.previosTransitiveDerivation", -1); previousDerivation = context.getConfiguration().getInt("reasoner.previousDerivation", -1); hasSchemaChanged = false; - + try{ CassandraDB db = new CassandraDB(); @@ -142,6 +146,8 @@ protected void setup(Context context) throws IOException { filters.add(TriplesUtils.SCHEMA_TRIPLE_TRANSITIVE_PROPERTY); hasSchemaChanged = db.loadSetIntoMemory(schemaTransitiveProperties, filters, previousDerivation); } + + db.CassandraDBClose(); }catch(TException te){ te.printStackTrace(); } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java index e098029..486af50 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLNotRecursiveReducer.java @@ -37,27 +37,28 @@ public class OWLNotRecursiveReducer extends Reducer set = new HashSet(); protected Map> schemaInverseOfProperties = null; - + protected void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { byte[] bytes = key.getBytes(); long rsubject=0, rpredicate=0, robject=0; long key1=0, key2=0, value1 = 0; - + switch(bytes[0]) { // case 0: // case 1: //Functional and inverse functional property case 0: // Modified by WuGang, Functional case 1: // Modified by WuGang, Inverse Functional // System.out.println("Processing Functional & Inverse Functional Property."); - key1 = NumberUtils.decodeLong(bytes, 1); // ����Functional������subject������Inverse Functional������object + key1 = NumberUtils.decodeLong(bytes, 1); // ����Functional������subject������Inverse Functional������object key2 = NumberUtils.decodeLong(bytes, 9); // predicate long minimum = Long.MAX_VALUE; set.clear(); Iterator itr = values.iterator(); + while (itr.hasNext()) { long value = itr.next().get(); - value1 = value; // Added by Wugang���������ֵ������Functional������ԭʼ��Ԫ���object������Inverse Functional������ԭʼ��Ԫ���subject + value1 = value; // Added by Wugang���������ֵ������Functional������ԭʼ��Ԫ���object������Inverse Functional������ԭʼ��Ԫ���subject if (value < minimum) { if (minimum != Long.MAX_VALUE) set.add(minimum); @@ -97,7 +98,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setObject(object); // System.out.println("Find a derive in functional and inverse functional property!" + triple); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); outputSize++; } context.getCounter("OWL derived triples", "functional and inverse functional property").increment(outputSize); @@ -116,13 +117,13 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setRsubject(subject); triple.setRobject(object); triple.setType(TriplesUtils.OWL_HORST_3); - + itr = values.iterator(); while (itr.hasNext()) { triple.setPredicate(itr.next().get()); triple.setRpredicate(triple.getPredicate()); // Added by WuGang // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); context.getCounter("OWL derived triples", "simmetric property").increment(1); } @@ -144,7 +145,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setRsubject(subject); triple.setRobject(object); triple.setRpredicate(predicate); - + /* I only output the last key of the inverse */ Collection inverse = schemaInverseOfProperties.get(predicate); if (inverse != null) { @@ -154,7 +155,7 @@ else if (bytes[0] == 1){ //Inverse Functional triple.setPredicate(derivedPredicate); // Only one of the inverse, the others will be completed in outputInverseOf() //triple.setPredicate(itrInverse.next()); // Commented by WuGang 2015-01-27 // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); context.getCounter("OWL derived triples", "inverse of").increment(1); // Moved to here by WuGang, 2015-01-27 @@ -171,7 +172,7 @@ else if (bytes[0] == 1){ //Inverse Functional break; case 4: case 5: - // �ⲿ���Ƿ�����inferTransitivityStatements�д������أ��˴���û���� + // �ⲿ���Ƿ�����inferTransitivityStatements�д�����أ��˴���û���� //Transitive property. I copy to a temporary directory setting a special triple source subject = NumberUtils.decodeLong(bytes, 1); object = NumberUtils.decodeLong(bytes, 9); @@ -191,7 +192,7 @@ else if (bytes[0] == 1){ //Inverse Functional transitiveSource.setDerivation(TripleSource.TRANSITIVE_ENABLED); triple.setPredicate(Math.abs(predicate)); // context.write(transitiveSource, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, transitiveSource, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); context.getCounter("OWL derived triples", "transitive property input").increment(1); } default: @@ -213,7 +214,7 @@ private void outputInverseOf(long subject, long object, long predicate, Set> schemaInverseOfProperties_reverse = db.loadMapIntoMemory(filters, true); schemaInverseOfProperties.putAll(schemaInverseOfProperties_reverse); + + db.CassandraDBClose(); }catch (TTransportException e) { e.printStackTrace(); } catch (InvalidRequestException e) { @@ -255,4 +258,11 @@ public void setup(Context context) throws IOException { } } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java index ef48ffc..24381fd 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLReasoner.java @@ -1,9 +1,18 @@ package cn.edu.neu.mitt.mrj.reasoner.owl; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.HashSet; +import java.util.List; import java.util.Set; +import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; +import org.apache.cassandra.thrift.InvalidRequestException; +import org.apache.cassandra.thrift.SchemaDisagreementException; +import org.apache.cassandra.thrift.TimedOutException; +import org.apache.cassandra.thrift.UnavailableException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; @@ -17,13 +26,16 @@ import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import ch.qos.logback.classic.db.DBAppender; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.io.files.readers.FilesTriplesReader; import cn.edu.neu.mitt.mrj.partitioners.MyHashPartitioner; import cn.edu.neu.mitt.mrj.reasoner.MapReduceReasonerJobConfig; +import cn.edu.neu.mitt.mrj.utils.Cassandraconf; import cn.edu.neu.mitt.mrj.utils.FileUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; @@ -37,7 +49,7 @@ public class OWLReasoner extends Configured implements Tool { public static final String OWL_PROP_INHERITANCE_TMP = "/dir-tmp-prop-inheritance/"; public static final String OWL_PROP_INHERITANCE = "/dir-prop-inheritance/"; public static final String OWL_TRANSITIVITY_BASE = OWL_PROP_INHERITANCE_TMP + "dir-transitivity-base/"; - public static final String OWL_TRANSITIVITY = "dir-transitivity/"; // Added by WuGang 2010-08-25���¼ӵ�Ŀ¼ + public static final String OWL_TRANSITIVITY = "dir-transitivity/"; // Added by WuGang 2010-08-25���¼ӵ�Ŀ¼ public static final String OWL_SYNONYMS_TABLE = "dir-table-synonyms/"; public static final String OWL_SYNONYMS_TABLE_NEW = "_table_synonyms_new/"; @@ -48,7 +60,7 @@ public class OWLReasoner extends Configured implements Tool { public static final String OWL_ALL_VALUE_TMP = "/dir-tmp-all-some-values/"; public static final String OWL_HAS_VALUE_TMP = "/dir-tmp-has-value/"; - private CassandraDB db; + public CassandraDB db; private int numMapTasks = -1; private int numReduceTasks = -1; @@ -100,10 +112,12 @@ public static void main(String[] args) { try { OWLReasoner owlreasoner = new OWLReasoner(); - owlreasoner.db = new CassandraDB("localhost", 9160); - owlreasoner.db.init(); +// owlreasoner.db = new CassandraDB(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host, 9160); +// owlreasoner.db.init(); ToolRunner.run(new Configuration(), owlreasoner, args); + +// owlreasoner.db.CassandraDBClose(); } catch (Exception e) { e.printStackTrace(); } @@ -119,7 +133,20 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio boolean firstCycle = true; int currentStep = 0; - int lastDerivationStep = 0; + int lastDerivationStep = 0; + + //Modified 2015/6/28 + try { + db = new CassandraDB(); +// db.init(); // 这不要init() 否则会出现 TTransportException: java.net.SocketException: 断开的管道 + /* + * getRowCountAccordingInferredSteps 类似的函数中出错。 + * 具体原因不确定,可能跟client使用有关。 + */ + } catch (Exception e) { + e.printStackTrace(); + } + do { if (!firstCycle && lastDerivationStep == (currentStep - 4)) @@ -128,6 +155,8 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio System.out.println(">>>>>>>>>>> Start new OWL Reasoner loop <<<<<<<<<<<"); long propDerivation = inferPropertiesInheritance(args); System.out.println("----------- End inferPropertiesInheritance"); + //Get Attention! + System.out.println("----------- Start inferTransitivityStatements"); derivedTriples = inferTransitivityStatements(args) + propDerivation; System.out.println("----------- End inferTransitivityStatements"); if (derivedTriples > 0) @@ -154,7 +183,7 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio break; currentStep++; long hasValueDerivation = inferHasValueStatements(args); - System.out.println("-----------inferHasValueStatements����"); + System.out.println("-----------inferHasValueStatements����"); derivedTriples += hasValueDerivation; if (hasValueDerivation > 0) lastDerivationStep = currentStep; @@ -162,7 +191,7 @@ public long launchClosure(String[] args) throws IOException, InterruptedExceptio break; currentStep++; long someAllDerivation = inferSomeAndAllValuesStatements(args); - System.out.println("-----------inferSomeAndAllValuesStatements����"); + System.out.println("-----------inferSomeAndAllValuesStatements����"); derivedTriples += someAllDerivation; if (someAllDerivation > 0) lastDerivationStep = currentStep; @@ -192,8 +221,10 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter OWLReasoner.class, "OWL reasoner: infer properties inherited statements (not recursive), step " + step, new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), // not supported + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 5); job.getConfiguration().setInt("reasoner.step", step); job.getConfiguration().setInt("reasoner.previosTransitiveDerivation", previousTransitiveDerivation); job.getConfiguration().setInt("reasoner.previousDerivation", previousInferPropertiesDerivation); @@ -203,7 +234,7 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(OWLNotRecursiveReducer.class); - + job.waitForCompletion(true); @@ -230,23 +261,49 @@ private long inferPropertiesInheritance(String[] args) throws IOException, Inter private long inferTransitivityStatements(String[] args) throws IOException, InterruptedException, ClassNotFoundException { boolean derivedNewStatements = true; -// System.out.println("��inferTransitivityStatements��ͷ��"); +// System.out.println("��inferTransitivityStatements��ͷ��"); // We'll not use filesystem but db.getTransitiveStatementsCount() long derivation = 0; int level = 0; - long beforeInferCount = db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE); + + //modified 2015/5/19 + long beforeInferCount = db.getRowCountAccordingTripleTypeWithLimitation(TriplesUtils.TRANSITIVE_TRIPLE, 1); + while ((beforeInferCount > 0) && derivedNewStatements && shouldInferTransitivity) { -// System.out.println("��ʼ��inferTransitivityStatements��whileѭ����Ѱ�ҡ�"); +// System.out.println("��ʼ��inferTransitivityStatements��whileѭ����Ѱ�ҡ�"); level++; + Set levels = new HashSet(); + levels.add(new Integer(level-1)); + if (level > 1) + levels.add(new Integer(level-2)); + //Configure input. Take only the directories that are two levels below - Job job = MapReduceReasonerJobConfig.createNewJob( - OWLReasoner.class, - "OWL reasoner: transitivity rule. Level " + level, - new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), - numMapTasks, - numReduceTasks, true, true); + Job job = null; + + // for the first two level, we use the whole data in the database + if (level <= 2) + job = MapReduceReasonerJobConfig.createNewJob( + OWLReasoner.class, + "OWL reasoner: transitivity rule. Level " + level, + new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), + 0, + numMapTasks, + numReduceTasks, true, true, 6); + // for the level more than two, we only consider the last two level derived data in the current step + if (level > 2) + job = MapReduceReasonerJobConfig.createNewJob( + OWLReasoner.class, + "OWL reasoner: transitivity rule. Level " + level, + new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + levels, + step, + numMapTasks, + numReduceTasks, true, true ,7); + + job.getConfiguration().setInt("reasoning.baseLevel", step); job.getConfiguration().setInt("reasoning.transitivityLevel", level); job.getConfiguration().setInt("maptasks", Math.max(numMapTasks / 10, 1)); @@ -257,18 +314,35 @@ private long inferTransitivityStatements(String[] args) job.setReducerClass(OWLTransitivityReducer.class); job.waitForCompletion(true); - - // About duplication, we will modify the checkTransitivity to return transitive triple counts - // and then do subtraction. - - long afterInferCount = db.getRowCountAccordingTripleType(TriplesUtils.TRANSITIVE_TRIPLE); - derivation = afterInferCount - beforeInferCount; - derivedNewStatements = (derivation > 0); - beforeInferCount = afterInferCount; // Update beforeInferCount + long stepNotFilteredDerivation = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); + + long stepDerivation = 0; + if (stepNotFilteredDerivation > 0) { + try { + db.createIndexOnInferredSteps(); + } catch (InvalidRequestException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (UnavailableException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (TimedOutException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (SchemaDisagreementException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (TException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + stepDerivation = db.getRowCountAccordingInferredSteps(level); + } + derivation += stepDerivation; + derivedNewStatements = stepDerivation > 0; } previousTransitiveDerivation = step; - return derivation; } @@ -281,7 +355,8 @@ private long inferSameAsStatements(String[] args) { try { boolean derivedSynonyms = true; int derivationStep = 1; - long previousStepDerived = 0; // Added by WuGang 2015-01-30 +// long previousStepDerived = 0; // Added by WuGang 2015-01-30 + while (derivedSynonyms) { if (db.getRowCountAccordingTripleType(TriplesUtils.DATA_TRIPLE_SAME_AS)==0) // We need not to infer on SameAs return 0; @@ -292,43 +367,56 @@ private long inferSameAsStatements(String[] args) { OWLReasoner.class, "OWL reasoner: build the synonyms table from same as triples - step " + derivationStep++, filters, // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), // Added by WuGang, 2015-07-12 + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 8); job.setMapperClass(OWLSameAsMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLSameAsReducer.class); - + job.waitForCompletion(true); // System.out.println("In FilesOWLReasoner: " + job.getCounters().findCounter("synonyms", "replacements").getValue()); Counter cDerivedSynonyms = job.getCounters().findCounter("synonyms","replacements"); long currentStepDerived = cDerivedSynonyms.getValue(); // Added by WuGang 2015-01-30 - derivedTriples += currentStepDerived; - derivedSynonyms = (currentStepDerived - previousStepDerived) > 0; // Modified by WuGang 2015-01-30 + derivedSynonyms = currentStepDerived > 0; // Added by WuGang 2015-07-12 +// derivedTriples += currentStepDerived; +// derivedSynonyms = (currentStepDerived - previousStepDerived) > 0; // Modified by WuGang 2015-01-30 //derivedSynonyms = currentStepDerived > 0; - previousStepDerived = currentStepDerived; // Added by WuGang 2015-01-30 +// previousStepDerived = currentStepDerived; // Added by WuGang 2015-01-30 } //Filter the table. + + //modified 2015/5/19 long tableSize = db.getRowCountAccordingTripleType(TriplesUtils.SYNONYMS_TABLE); // System.out.println("tableSize Ϊ : " + tableSize); // System.out.println("sizeDictionary Ϊ : " + sizeDictionary); // System.out.println("derivedTriples Ϊ : " + derivedTriples); - if (tableSize > sizeDictionary || derivedTriples > 0) { + //modified 2015/5/19 + if (tableSize > sizeDictionary || derivedTriples > 0) { + //for(int j =0 ;j <= 3 ; j++){ //1) Calculate the URIs distribution and get the first 2M. job = MapReduceReasonerJobConfig.createNewJob( OWLReasoner.class, "OWL reasoner: sampling more common resources", new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), // Added by WuGang, 2015-07-12 + step, // not used here numMapTasks, - numReduceTasks, true, false); // input from cassandra, but output to hdfs + numReduceTasks, true, false, 9); // input from cassandra, but output to hdfs job.getConfiguration().setInt("reasoner.samplingPercentage", sampling); //Sampling at 10% job.getConfiguration().setInt("reasoner.threshold", resourceThreshold); //Threshold resources + /* + * output to hdfs + */ + job.setMapperClass(OWLSampleResourcesMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(LongWritable.class); @@ -341,7 +429,7 @@ private long inferSameAsStatements(String[] args) { SequenceFileOutputFormat.setOutputPath(job, commonResourcesPath); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); - + job.waitForCompletion(true); @@ -378,8 +466,10 @@ private long inferSameAsStatements(String[] args) { OWLReasoner.class, "OWL reasoner: replace triples using the sameAs synonyms: reconstruct triples", new HashSet(), // FileUtils.FILTER_ONLY_HIDDEN.getClass(), + new HashSet(), // Added by WuGang, 2015-07-12 + step, // not used here numMapTasks, - numReduceTasks, false, true); // input from hdfs, but output to cassandra + numReduceTasks, false, true, 10); // input from hdfs, but output to cassandra SequenceFileInputFormat.addInputPath(job, tmpPath); job.setInputFormatClass(SequenceFileInputFormat.class); @@ -388,6 +478,7 @@ private long inferSameAsStatements(String[] args) { job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLSameAsReconstructReducer.class); + job.waitForCompletion(true); FileSystem fs = FileSystem.get(job.getConfiguration()); @@ -401,6 +492,7 @@ private long inferSameAsStatements(String[] args) { fs.rename(new Path(args[0] + "/dir-input"), new Path(args[0] + "/_dir-input")); } + //modified 2015/5/19 sizeDictionary = tableSize; } catch (Exception e) { @@ -418,17 +510,19 @@ private long inferEquivalenceStatements(String[] args) throws IOException, Inter step++; Set filters = new HashSet(); - filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBCLASS); - filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); - filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_CLASS); - filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_PROPERTY); +// filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBCLASS); +// filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); +// filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_CLASS); +// filters.add(TriplesUtils.SCHEMA_TRIPLE_EQUIVALENT_PROPERTY); Job job = MapReduceReasonerJobConfig.createNewJob( OWLReasoner.class, "OWL reasoner: infer equivalence from subclass and subprop. step " + step, filters, + new HashSet(), // Added by WuGang, 20150712 + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 11); job.getConfiguration().setInt("maptasks", Math.max(job.getConfiguration().getInt("maptasks", 0) / 10, 1)); job.getConfiguration().setInt("reasoner.step", step); @@ -436,7 +530,7 @@ private long inferEquivalenceStatements(String[] args) throws IOException, Inter job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLEquivalenceSCSPReducer.class); - + job.waitForCompletion(true); return job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); } @@ -448,19 +542,23 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, boolean derivedNewStatements = true; long totalDerivation = 0; int previousSomeAllValuesDerivation = -1; + boolean firstCycle = true; // Added by Wugang 20150111 - long countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15); // see OWLAllSomeValuesReducer - long countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16); // see OWLAllSomeValuesReducer + //long countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15); // see OWLAllSomeValuesReducer + //long countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16); // see OWLAllSomeValuesReducer while (derivedNewStatements) { step++; + Job job = MapReduceReasonerJobConfig.createNewJob( OWLReasoner.class, "OWL reasoner: some and all values rule. step " + step, new HashSet(), + new HashSet(), + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 12); job.getConfiguration().setInt("reasoner.step", step); job.getConfiguration().setInt("reasoner.previousDerivation", previousSomeAllValuesDerivation); previousSomeAllValuesDerivation = step; @@ -469,21 +567,48 @@ private long inferSomeAndAllValuesStatements(String[] args) throws IOException, job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(OWLAllSomeValuesReducer.class); - + job.waitForCompletion(true); // Added by Wugang 20150111 - countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15) - countRule15; // see OWLAllSomeValuesReducer - countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer - totalDerivation = countRule15 + countRule16; + // countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15) - countRule15; // see OWLAllSomeValuesReducer + // countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer + // totalDerivation = countRule15 + countRule16; - derivedNewStatements = (totalDerivation > 0); + + Counter derivedTriples = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS"); + long notFilteredDerivation = derivedTriples.getValue(); + long stepDerivation = 0; + if (firstCycle) + notFilteredDerivation -= previousSomeAllValuesCycleDerivation; + if (notFilteredDerivation > 0) { + previousSomeAllValuesCycleDerivation += notFilteredDerivation; + //Modified by LiYang 2015/9/21 +// try { +// db.createIndexOnInferredSteps(); +// } catch (TException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } + try { + db.createIndexOnInferredSteps(); + } catch (TException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + stepDerivation = db.getRowCountAccordingInferredSteps(step - 1); + totalDerivation += stepDerivation; + derivedNewStatements = stepDerivation > 0; + } else { + derivedNewStatements = false; + } + firstCycle = false; } // Added by Wugang 20150111 - countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15) - countRule15; // see OWLAllSomeValuesReducer - countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer - totalDerivation = countRule15 + countRule16; + //countRule15 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_15) - countRule15; // see OWLAllSomeValuesReducer + //countRule16 = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_16) - countRule16; // see OWLAllSomeValuesReducer + //totalDerivation = countRule15 + countRule16; return totalDerivation; } @@ -495,16 +620,18 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup step++; // Added by Wugang 20150111 - long countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a); // see OWLAllSomeValuesReducer - long countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b); // see OWLAllSomeValuesReducer + //long countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a); // see OWLAllSomeValuesReducer + //long countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b); // see OWLAllSomeValuesReducer Job job = MapReduceReasonerJobConfig.createNewJob( OWLReasoner.class, "OWL reasoner: hasValue rule. step " + step, new HashSet(), + new HashSet(), + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 13); long schemaOnPropertySize = db.getRowCountAccordingTripleType(TriplesUtils.SCHEMA_TRIPLE_ON_PROPERTY); if (schemaOnPropertySize == 0) @@ -523,9 +650,29 @@ private long inferHasValueStatements(String[] args) throws IOException, Interrup // Get inferred count if (job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue() > 0) { - countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a) - countRule14a; // see OWLAllSomeValuesReducer - countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b) - countRule14b; // see OWLAllSomeValuesReducer - return(countRule14a + countRule14b); + // countRule14a = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14a) - countRule14a; // see OWLAllSomeValuesReducer + // countRule14b = db.getRowCountAccordingRule((int)TriplesUtils.OWL_HORST_14b) - countRule14b; // see OWLAllSomeValuesReducer + // return(countRule14a + countRule14b); + try { + db.createIndexOnInferredSteps(); + } catch (InvalidRequestException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (UnavailableException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (TimedOutException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (SchemaDisagreementException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (TException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + long stepDerivation = db.getRowCountAccordingInferredSteps(step - 1); + return stepDerivation; } else { return 0; } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java index 5b02e6f..0462b42 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructMapper.java @@ -17,9 +17,9 @@ import cn.edu.neu.mitt.mrj.utils.FileUtils; import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; - import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; public class OWLSameAsDeconstructMapper extends Mapper { @@ -82,8 +82,8 @@ public void map(TripleSource key, Triple value, Context context) throws IOExcept context.write(oKey, oValue); - //�����oKey��һ����Դ����ν�����п��ܣ�����sameas���͵Ļ��϶���һ����������Ҫ�ֱ���Ϊ��ν�����Σ� - //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte + //�����oKey��һ����Դ����ν�����п��ܣ�����sameas���͵Ļ��϶���һ����������Ҫ�ֱ���Ϊ��ν�����Σ� + //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte ++tripleId; } @@ -92,7 +92,7 @@ public void map(TripleSource key, Triple value, Context context) throws IOExcept @Override public void setup(Context context) { oValue = new BytesWritable(bValue); - + try { String taskId = context.getConfiguration().get("mapred.task.id").substring(context.getConfiguration().get("mapred.task.id").indexOf("_m_") + 3); taskId = taskId.replaceAll("_", ""); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java index fa3135e..8d1a1a5 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsDeconstructReducer.java @@ -10,6 +10,7 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.utils.NumberUtils; public class OWLSameAsDeconstructReducer extends Reducer { @@ -17,7 +18,7 @@ public class OWLSameAsDeconstructReducer extends Reducer storage = new LinkedList(); @@ -38,9 +39,9 @@ public void reduce(LongWritable key, Iterable values, Context con byte[] bValue = iValue.getBytes(); // System.out.println("In processing things before storage, size of iValue is: " + iValue.getLength()); // System.out.println("In processing things before storage, size of bValue is: " + bValue.length); - // ���ڲ�һ���ں�ʱ��������bValue[0]=4���Ǹ�key��value�� - // ���������֮ǰ�Ƚ����滻��resource������storage�ﱣ����������һ��������֮��Ϳ���ֱ�ӽ����滻�ˣ���value�滻 - // ���(�����whileѭ��֮��)�ٰ�storage���汣���ֵ�����滻һ�¡� + // ���ڲ�һ���ں�ʱ��������bValue[0]=4���Ǹ�key��value�� + // ���������֮ǰ�Ƚ����滻��resource������storage�ﱣ����������һ��������֮��Ϳ���ֱ�ӽ����滻�ˣ���value�滻 + // ���(�����whileѭ��֮��)�ٰ�storage���汣���ֵ�����滻һ�¡� if (bValue[0] == 4) {//Same as long resource = NumberUtils.decodeLong(bValue, 1); replacement = true; @@ -54,14 +55,14 @@ public void reduce(LongWritable key, Iterable values, Context con byte[] bTempValue = new byte[15+8]; // Added by WuGang System.arraycopy(bValue, 0, bTempValue, 0, 15); // Added by WuGang System.arraycopy(bOriginalResource, 0, bTempValue, 15, 8); // Added by WuGang - iValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource + iValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource context.write(oKey, iValue); countOutput++; context.getCounter("reasoner", "substitutions").increment(1); } } - Iterator itr2 = storage.iterator(); //���storageΪ����˵���������������û�п�����sameas���滻�Ķ��� + Iterator itr2 = storage.iterator(); //���storageΪ����˵���������������û�п�����sameas���滻�Ķ��� while (itr2.hasNext()) { byte[] bValue = itr2.next(); oValue.set(bValue, 0, bValue.length); @@ -70,15 +71,19 @@ public void reduce(LongWritable key, Iterable values, Context con // System.out.println("In processing things in storage, size of bValue is: " + bValue.length); System.arraycopy(bValue, 0, bTempValue, 0, 15); // Added by WuGang System.arraycopy(bOriginalResource, 0, bTempValue, 15, 8); // Added by WuGang - oValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource + oValue.set(bTempValue, 0, bTempValue.length); // Added by Wugang, �����д��һ���滻ǰ��resource context.write(oKey, oValue); } - //�����oKey��һ����Դ������sameas����������Ķ����滻���ˣ���ʵ�����Ѿ�����sameas�滻����ˣ� - //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte + //�����oKey��һ����Դ������sameas����������Ķ����滻���ˣ���ʵ�����Ѿ�����sameas�滻����ˣ� + //�����oVlaue��owl:sameas��Ԫ������������tripleId+key.getStep()+key.getDerivation()��ǰ�滹��һ��byte if (replacement) { //Increment counter of replacements context.getCounter("reasoner", "substitutions").increment(countOutput + storage.size()); } } + public void setup(Context context) throws IOException, InterruptedException{ + CassandraDB.setConfigLocation(); + + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java index a526c85..ed4b73f 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsMapper.java @@ -29,7 +29,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup /* Source triple: s owl:sameAs o */ long olKey = 0; long olValue = 0; - if (value.getSubject() > value.getObject()) { //key�����Ǵ�ֵ��value����Сֵ + if (value.getSubject() > value.getObject()) { //key�����Ǵ�ֵ��value����Сֵ olKey = value.getSubject(); olValue = value.getObject(); } else { @@ -37,18 +37,21 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup olValue = value.getSubject(); } - // ����С�Ǹ�ֵ��ʶÿһ���� + // ����С�Ǹ�ֵ��ʶÿһ���� oKey.set(olKey); bValue[0] = 0; NumberUtils.encodeLong(bValue, 1, olValue); oValue.set(bValue, 0, bValue.length); - context.write(oKey, oValue); //����key�Ǵ�ֵ��value��Сֵ����������Ե�֪ÿһ��resource�������ĸ��� + context.write(oKey, oValue); //����key�Ǵ�ֵ��value��Сֵ����������Ե�֪ÿһ��resource�������ĸ��� oKey.set(olValue); bValue[0] = 1; NumberUtils.encodeLong(bValue, 1, olKey); oValue.set(bValue, 0, bValue.length); - context.write(oKey, oValue); //����key��Сֵ��value�Ǵ�ֵ����������Ե�֪ÿһ�����а�����Щresource + context.write(oKey, oValue); //����key��Сֵ��value�Ǵ�ֵ����������Ե�֪ÿһ�����а�����Щresource + } + public void setup(Context context) throws IOException{ + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java index 827e360..887503b 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReconstructMapper.java @@ -8,6 +8,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.utils.NumberUtils; public class OWLSameAsReconstructMapper extends Mapper { @@ -17,25 +18,26 @@ public class OWLSameAsReconstructMapper extends Mapper values, Context context) throws IOException, InterruptedException { // System.out.println("In OWLSameAsReconstructReducer!!!"); @@ -28,31 +28,31 @@ public void reduce(BytesWritable key, Iterable values, Context co oKey.setDerivation(bKey[12]); int elements = 0; - Iterator itr = values.iterator(); ////�����1�ֽڿ�ʼ����owlsameas�������0�ֽڱ�������ڱ������滻��λ�ã����ܵ�ֵ��0,1,2,3,4��4�DZ�ʾν����owl:sameas������Ϊ��owl:sameas������0��ʾ���1��ʾν�2��3��ʾ��� + Iterator itr = values.iterator(); ////�����1�ֽڿ�ʼ����owlsameas�������0�ֽڱ�������ڱ������滻��λ�ã����ܵ�ֵ��0,1,2,3,4��4�DZ�ʾν����owl:sameas������Ϊ��owl:sameas������0��ʾ���1��ʾν�2��3��ʾ��� while (itr.hasNext()) { elements++; byte[] bValue = itr.next().getBytes(); - long resource = NumberUtils.decodeLong(bValue, 1); //�������owlsameas��������潫���������滻�� - long originalResource = NumberUtils.decodeLong(bValue, 9); // Added by Wugang, �����滻ǰ��resource + long resource = NumberUtils.decodeLong(bValue, 1); //�������owlsameas��������潫���������滻�� + long originalResource = NumberUtils.decodeLong(bValue, 9); // Added by Wugang, �����滻ǰ��resource switch (bValue[0]) { case 0: - oValue.setSubject(resource); //�滻���� - oValue.setRsubject(originalResource); // Added by Wugang, ԭʼ���� + oValue.setSubject(resource); //�滻���� + oValue.setRsubject(originalResource); // Added by Wugang, ԭʼ���� // System.out.println("Replacing subject: " + resource); break; case 1: - oValue.setPredicate(resource); //�滻ν�� - oValue.setRpredicate(originalResource); // Added by Wugang, ԭʼν�� + oValue.setPredicate(resource); //�滻ν�� + oValue.setRpredicate(originalResource); // Added by Wugang, ԭʼν�� // System.out.println("Replacing predicate: " + resource); break; - case 2: //�滻���� - case 3: //�滻���� + case 2: //�滻���� + case 3: //�滻���� if (bValue[0] == 2) oValue.setObjectLiteral(false); else oValue.setObjectLiteral(true); oValue.setObject(resource); - oValue.setRobject(originalResource); // Added by Wugang, ԭʼ���� + oValue.setRobject(originalResource); // Added by Wugang, ԭʼ���� // System.out.println("Replacing object: " + resource); break; default: @@ -61,24 +61,24 @@ public void reduce(BytesWritable key, Iterable values, Context co } if (elements == 3){ - // Added by WuGang, ���rule11 + // Added by WuGang, ���rule11 // oValue.setRsubject(rsubject) if ((oValue.getSubject() == oValue.getRsubject()) && (oValue.getPredicate() == oValue.getRpredicate()) && (oValue.getObject() == oValue.getRobject())) - oValue.setType(TriplesUtils.OWL_HORST_NA); // �滻ǰ��û�б仯������һ��sameasrule + oValue.setType(TriplesUtils.OWL_HORST_NA); // �滻ǰ��û�б仯������һ��sameasrule else { if ((oValue.getPredicate() == TriplesUtils.OWL_SAME_AS) && (oValue.getRpredicate() == TriplesUtils.OWL_SAME_AS)) - oValue.setType(TriplesUtils.OWL_HORST_7); // Ӧ��OWL Horst����7 + oValue.setType(TriplesUtils.OWL_HORST_7); // Ӧ��OWL Horst����7 else - oValue.setType(TriplesUtils.OWL_HORST_11); // Ӧ��OWL - // Horst����11 + oValue.setType(TriplesUtils.OWL_HORST_11); // Ӧ��OWL + // Horst����11 } // System.out.println("Find a complete replacment of triple: " + oValue); - CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context); + CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context); // context.write(oKey, oValue); } } @@ -86,5 +86,13 @@ public void reduce(BytesWritable key, Iterable values, Context co @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + + } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java index a7988da..83fbdf7 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSameAsReducer.java @@ -25,7 +25,7 @@ public class OWLSameAsReducer extends Reducer duplicates = new HashSet(); private List storage = new LinkedList(); - + @Override public void reduce(LongWritable key, Iterable values, Context context) throws IOException, InterruptedException { @@ -44,12 +44,12 @@ public void reduce(LongWritable key, Iterable values, Context con BytesWritable value = itr.next(); long lValue = NumberUtils.decodeLong(value.getBytes(), 1); // System.out.println("processing " + lValue + " with the first byte is: " + value.getBytes()[0]); - if (value.getBytes()[0] != 0) { // 1��ÿһ��value����һ����Ա + if (value.getBytes()[0] != 0) { // 1��ÿһ��value����һ����Ա //Store in-memory storage.add(lValue); // System.out.println("Storage size is: " + storage.size()); //} - } else { // 0���ϲ�һ��resource�����ĸ����飨valueֵ�� + } else { // 0���ϲ�һ��resource�����ĸ����飨valueֵ�� // System.out.println("Prepare to repalce: lValue is " + lValue + " and oValue.getSubject() is " + oValue.getSubject()); if (lValue < oValue.getSubject()) { // System.out.println("Hahahahah, I'm here!"); @@ -65,7 +65,7 @@ public void reduce(LongWritable key, Iterable values, Context con long lValue = itr2.next(); if (!duplicates.contains(lValue)) { oValue.setObject(lValue); - CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context); + CassandraDB.writeJustificationToMapReduceContext(oValue, oKey, context); duplicates.add(lValue); } } @@ -91,4 +91,11 @@ public void setup(Context context) { oKey.setDerivation(TripleSource.OWL_DERIVED); oKey.setStep(context.getConfiguration().getInt("reasoner.step", 0)); } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java index ace1796..2c8aa57 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesMapper.java @@ -45,6 +45,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup } public void setup(Context context) { + threshold = context.getConfiguration().getInt("reasoner.samplingPercentage", 0); } } \ No newline at end of file diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java index d2c658e..50dfe04 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLSampleResourcesReducer.java @@ -6,6 +6,8 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; + public class OWLSampleResourcesReducer extends Reducer { //private static Logger log = LoggerFactory.getLogger(OWLSampleResourcesReducer.class); @@ -34,6 +36,8 @@ public void reduce(LongWritable key, Iterable values, Context cont @Override public void setup(Context context) { + CassandraDB.setConfigLocation(); + threshold = context.getConfiguration().getInt("reasoner.threshold", 0); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java index b2b04bd..09232eb 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityMapper.java @@ -28,7 +28,9 @@ public class OWLTransitivityMapper extends Mapper minLevel) { + if (level > minLevel) { NumberUtils.encodeLong(keys,0,value.getPredicate()); NumberUtils.encodeLong(keys,8,value.getSubject()); oKey.set(keys, 0, 16); @@ -63,19 +65,20 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup values[0] = 3; else values[0] = 2; - NumberUtils.encodeLong(values, 1, step); + NumberUtils.encodeLong(values, 1, level); NumberUtils.encodeLong(values, 9, value.getObject()); oValue.set(values, 0, 17); context.write(oKey, oValue); } - //����u p w, w p v���������key��(p, w),�����value��(value[0], key.getStep(), value.getObject) + //����u p w, w p v���������key��(p, w),�����value��(value[0], key.getStep(), value.getObject) } } @Override public void setup(Context context) { + level = context.getConfiguration().getInt("reasoning.transitivityLevel", 0); baseLevel = context.getConfiguration().getInt("reasoning.baseLevel", 0) - 1; minLevel = Math.max(1, (int)Math.pow(2,level - 2)) + baseLevel; diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java index 7ad71eb..beb7b8d 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/owl/OWLTransitivityReducer.java @@ -69,9 +69,9 @@ public void reduce(BytesWritable key, Iterable values, Context co triple.setPredicate(NumberUtils.decodeLong(key.getBytes(),0)); - // Added by WuGang,���extended triple������Ϊu p w,����w��һ���ؼ���resource�������ع�ԭʼ��ruleǰ�� + // Added by WuGang,���extended triple������Ϊu p w,����w��һ���ؼ��resource�������ع�ԭʼ��ruleǰ�� triple.setType(TriplesUtils.OWL_HORST_4); -// triple.setRsubject(rsubject); // �������������������ģ���μ�����Ĵ��� +// triple.setRsubject(rsubject); // �����������������ģ���μ�����Ĵ��� triple.setRpredicate(NumberUtils.decodeLong(key.getBytes(),0)); triple.setRobject(NumberUtils.decodeLong(key.getBytes(), 8)); @@ -87,13 +87,15 @@ public void reduce(BytesWritable key, Iterable values, Context co triple.setSubject(entry.getKey()); triple.setObject(entry2.getKey()); - // Added by Wugang, ���extended triple��ʵ�������rsubject�費���ö�����ν����Ϊ�˱��������������ð� - triple.setRsubject(triple.getSubject()); // ��Ϊ��ѡȡu p w��Ϊ���triple��������е�u�������������ĺ�������� + // Added by Wugang, ���extended triple��ʵ�������rsubject�費���ö�����ν����Ϊ�˱������������ð� + triple.setRsubject(triple.getSubject()); // ��Ϊ��ѡȡu p w��Ϊ���triple��������е�u������������ĺ�������� - source.setStep((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel)); + // Modified by WuGang, 2015-07-15 + //source.setStep((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel)); + source.setTransitiveLevel((int)(Math.abs(entry.getValue()) + Math.abs(entry2.getValue()) - baseLevel)); // context.write(source, triple); - CassandraDB.writeJustificationToMapReduceContext(triple, source, context); + CassandraDB.writeJustificationToMapReduceContext(triple, source, context); // System.out.println("In OWLTransitivityReducer: " + triple); } @@ -104,12 +106,19 @@ public void reduce(BytesWritable key, Iterable values, Context co @Override public void setup(Context context) { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - baseLevel = context.getConfiguration().getInt("reasoning.baseLevel", 1) - 1; level = context.getConfiguration().getInt("reasoning.transitivityLevel", -1); // Modified by WuGang 2015-01-28 //source.setDerivation(TripleSource.OWL_DERIVED); + source.setStep(baseLevel + 1); // Added by WuGang, 2015-07-15 source.setDerivation(TripleSource.TRANSITIVE_ENABLED); triple.setObjectLiteral(false); } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); + } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java index b112445..d709301 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSReasoner.java @@ -2,9 +2,20 @@ import java.io.IOException; +import java.nio.ByteBuffer; import java.util.HashSet; +import java.util.List; import java.util.Set; +import org.apache.cassandra.hadoop.ColumnFamilyOutputFormat; +import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; +import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; +import org.apache.cassandra.thrift.InvalidRequestException; +import org.apache.cassandra.thrift.SchemaDisagreementException; +import org.apache.cassandra.thrift.TimedOutException; +import org.apache.cassandra.thrift.UnavailableException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.BytesWritable; @@ -13,11 +24,14 @@ import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; import cn.edu.neu.mitt.mrj.reasoner.MapReduceReasonerJobConfig; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; +//import org.apache.hadoop.mapred.lib.MultipleOutputs; public class RDFSReasoner extends Configured implements Tool { @@ -26,7 +40,7 @@ public class RDFSReasoner extends Configured implements Tool { private int numReduceTasks = -1; public static int step = 0; private int lastExecutionPropInheritance = -1; - private int lastExecutionDomRange = -1; + private int lastExecutionDomRange = -1; private void parseArgs(String[] args) { @@ -69,22 +83,25 @@ public static void main(String[] args) { // The derivation will be launched in run() - public long launchDerivation(String[] args) throws IOException, InterruptedException, ClassNotFoundException { + public long launchDerivation(String[] args) throws IOException, InterruptedException, ClassNotFoundException, InvalidRequestException, UnavailableException, TimedOutException, SchemaDisagreementException, TException { long time = System.currentTimeMillis(); - parseArgs(args); Job job = null; long derivation = 0; - + + // RDFS subproperty inheritance reasoning // job = createNewJob("RDFS subproperty inheritance reasoning", "FILTER_ONLY_HIDDEN"); job = MapReduceReasonerJobConfig.createNewJob( RDFSReasoner.class, "RDFS subproperty inheritance reasoning", new HashSet(), + new HashSet(), // Added by WuGang, 2015-07-13 + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 1); + job.setMapperClass(RDFSSubPropInheritMapper.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); @@ -93,10 +110,12 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep job.getConfiguration().setInt("lastExecution.step", lastExecutionPropInheritance); lastExecutionPropInheritance = step; //TODO: configureOutputJob(job, args[0], "dir-rdfs-derivation/dir-subprop-inherit"); + job.waitForCompletion(true); long propInheritanceDerivation = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); derivation += propInheritanceDerivation; + // RDFS subproperty domain and range reasoning // job = createNewJob("RDFS subproperty domain and range reasoning", "FILTER_ONLY_HIDDEN"); @@ -104,14 +123,17 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep RDFSReasoner.class, "RDFS subproperty domain and range reasoning", new HashSet(), + new HashSet(), // Added by WuGang, 2015-07-13 + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 2); job.setMapperClass(RDFSSubPropDomRangeMapper.class); job.setMapOutputKeyClass(BytesWritable.class); // Modified by WuGang, 2010-08-26 job.setMapOutputValueClass(LongWritable.class); //job.setPartitionerClass(MyHashPartitioner.class); // Is this ok? seems not necessary job.setReducerClass(RDFSSubpropDomRangeReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); + job.getConfiguration().setInt("lastExecution.step", lastExecutionDomRange); lastExecutionDomRange = step; //TODO: configureOutputJob(job, args[0], "dir-rdfs-derivation/dir-subprop-domain-range"); @@ -122,7 +144,7 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep // RDFS cleaning up subprop duplicates // We remove it for simplicity. That means we will not support stop and restart from breakpoints - + //RDFS subclass reasoning // job = createNewJob("RDFS subclass reasoning", "FILTER_ONLY_TYPE_SUBCLASS"); @@ -132,13 +154,16 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep RDFSReasoner.class, "RDFS subclass reasoning", filters, + new HashSet(), // Added by WuGang, 2015-07-13 + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 3); job.setMapperClass(RDFSSubclasMapper.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(RDFSSubclasReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); + // configureOutputJob(job, args[0], "dir-rdfs-output/dir-subclass-" + step); job.waitForCompletion(true); derivation += job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); @@ -163,14 +188,16 @@ public long launchDerivation(String[] args) throws IOException, InterruptedExcep RDFSReasoner.class, "RDFS special properties reasoning", filters, + new HashSet(), // Added by WuGang, 2015-07-13 + step, // not used here numMapTasks, - numReduceTasks, true, true); + numReduceTasks, true, true, 4); job.setMapperClass(RDFSSpecialPropsMapper.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(RDFSSpecialPropsReducer.class); job.getConfiguration().setInt("reasoner.step", ++step); - + // configureOutputJob(job, args[0], "dir-rdfs-output/dir-special-props-" + step); job.waitForCompletion(true); derivation += job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter","REDUCE_OUTPUT_RECORDS").getValue(); diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java index 065a112..9a8e1b4 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsMapper.java @@ -87,8 +87,7 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup } @Override - public void setup(Context context) throws IOException { - + public void setup(Context context) throws IOException { try{ CassandraDB db = new CassandraDB(); if (memberProperties == null) { @@ -111,6 +110,9 @@ public void setup(Context context) throws IOException { filters.add(TriplesUtils.SCHEMA_TRIPLE_LITERAL_SUBCLASS); db.loadSetIntoMemory(literalSubclasses, filters, -1); } + + db.CassandraDBClose(); + } catch(TTransportException tte){ tte.printStackTrace(); } catch (InvalidRequestException e) { @@ -124,5 +126,16 @@ public void setup(Context context) throws IOException { } catch (TException e) { e.printStackTrace(); } + } + +// protected void cleanup(Context context) throws IOException, InterruptedException{ +// try { +// CassandraDB db = new CassandraDB(); +// db.UnIndex(); +// db.CassandraDBClose(); +// } catch (Exception e) { +// // TODO: handle exception +// } +// } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java index dd87d22..34913b0 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSpecialPropsReducer.java @@ -2,10 +2,13 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import org.apache.cassandra.thrift.Cassandra; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; @@ -20,11 +23,19 @@ public class RDFSSpecialPropsReducer extends Reducer keys = new LinkedHashMap(); + private Map allkeys = new LinkedHashMap(); + private List allvariables = new ArrayList(); + private List allTValues = new ArrayList(); + private List stepsValues = new ArrayList(); @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { byte[] bKey = key.getBytes(); Iterator itr = values.iterator(); + + + while (itr.hasNext()) { long value = itr.next().get(); if (value == TriplesUtils.RDFS_LITERAL && (bKey[0] == 0 || bKey[0] == 2)) @@ -54,9 +65,9 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); context.getCounter("RDFS derived triples", "subproperty of member").increment(1); + + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); break; case 2: // Rule 13 oTriple.setSubject(NumberUtils.decodeLong(bKey, 1)); @@ -69,8 +80,8 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_DATATYPE); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + // context.write(source, oTriple); context.getCounter("RDFS derived triples", "subclass of literal").increment(1); break; case 3: // Rule 8 @@ -85,11 +96,11 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CLASS); context.getCounter("RDFS derived triples", "subclass of resource").increment(1); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + //context.write(source, oTriple); break; - case 4: // û�ж�Ӧ��rdfs rule�� - case 5: // û�ж�Ӧ��rdfs rule�� + case 4: // û�ж�Ӧ��rdfs rule�� + case 5: // û�ж�Ӧ��rdfs rule�� oTriple.setSubject(NumberUtils.decodeLong(bKey, 1)); oTriple.setPredicate(TriplesUtils.RDFS_MEMBER); // oTriple.setPredicate(NumberUtils.decodeLong(bKey, 9)); @@ -99,18 +110,27 @@ else if (value == TriplesUtils.RDFS_RESOURCE && bKey[0] == 3) else oTriple.setObjectLiteral(true); context.getCounter("RDFS derived triples", "subproperty inheritance of member").increment(1); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); // context.write(source, oTriple); default: break; } + } @Override public void setup(Context context) { - CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); + + } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + super.cleanup(context); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java index 1fa8b13..7ca4151 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropDomRangeMapper.java @@ -4,6 +4,7 @@ import java.util.HashSet; import java.util.Set; +import org.apache.cassandra.thrift.Cassandra.AsyncProcessor.system_add_column_family; import org.apache.cassandra.thrift.InvalidRequestException; import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TimedOutException; @@ -11,6 +12,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Mapper.Context; import org.apache.thrift.TException; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; @@ -60,15 +62,14 @@ public void map(Long key, Row row, Context context) throws IOException, Interru return; Triple value = CassandraDB.readJustificationFromMapReduceRow(row); - //Check if the predicate has a domain if (domainSchemaTriples.contains(value.getPredicate())) { NumberUtils.encodeLong(bKey,0,value.getSubject()); // Added by WuGang, 2010-08-26 NumberUtils.encodeLong(bKey,8,value.getObject()); // Added by WuGang, 2010-08-26 // oKey.set(value.getSubject()); oKey.set(bKey, 0, 16); // Modified by WuGang, 2010-08-26 - oValue.set(value.getPredicate() << 1); // ����ͨ��oValue�����һλ��0��ȷ������ǰ��������domain - context.write(oKey, oValue); // ��<, p>����ȥ, for rule 2 + oValue.set(value.getPredicate() << 1); // ����ͨ��oValue�����һλ��0��ȷ������ǰ�������domain + context.write(oKey, oValue); // ��<, p>����ȥ, for rule 2 } //Check if the predicate has a range @@ -78,8 +79,8 @@ public void map(Long key, Row row, Context context) throws IOException, Interru NumberUtils.encodeLong(bKey,8,value.getSubject()); // Added by WuGang, 2010-08-26 // oKey.set(value.getObject()); oKey.set(bKey, 0, 16); // Modified by WuGang, 2010-08-26 - oValue.set((value.getPredicate() << 1) | 1); // ����ͨ��oValue�����һλ��1��ȷ������ǰ��������range - context.write(oKey, oValue); // ��<, p>����ȥ, for rule 3 + oValue.set((value.getPredicate() << 1) | 1); // ����ͨ��oValue�����һλ��1��ȷ������ǰ�������range + context.write(oKey, oValue); // ��<, p>����ȥ, for rule 3 } } @@ -88,22 +89,25 @@ public void map(Long key, Row row, Context context) throws IOException, Interru protected void setup(Context context) throws IOException { hasSchemaChanged = false; previousExecutionStep = context.getConfiguration().getInt("lastExecution.step", -1); - - try{ + + try{ CassandraDB db = new CassandraDB(); + if (domainSchemaTriples == null) { domainSchemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_DOMAIN_PROPERTY); hasSchemaChanged = db.loadSetIntoMemory(domainSchemaTriples, filters, previousExecutionStep); + // db not close } if (rangeSchemaTriples == null) { rangeSchemaTriples = new HashSet(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_RANGE_PROPERTY); - + hasSchemaChanged |= db.loadSetIntoMemory(rangeSchemaTriples, filters, previousExecutionStep); + db.CassandraDBClose(); } }catch(TTransportException tte){ tte.printStackTrace(); @@ -120,13 +124,15 @@ protected void setup(Context context) throws IOException { } // Some debug codes -// System.out.println("In mapper setup, peviousExecutionStep= " + previousExecutionStep + " and hasSchemaChanged status: " + hasSchemaChanged); -// System.out.println("Input split: " + context.getInputSplit()); -// try { -// System.out.println("Input split length: " + context.getInputSplit().getLength()); -// } catch (InterruptedException e) { -// e.printStackTrace(); -// } + System.out.println("In mapper setup, peviousExecutionStep= " + previousExecutionStep + " and hasSchemaChanged status: " + hasSchemaChanged); + System.out.println("Input split: " + context.getInputSplit()); + try { + System.out.println("Input split length: " + context.getInputSplit().getLength()); + } catch (InterruptedException e) { + e.printStackTrace(); + } } + + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java index 190743d..04f66fe 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubPropInheritMapper.java @@ -7,6 +7,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Mapper.Context; import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,15 +58,16 @@ protected void map(Long key, Row row, Context context) throws IOException, Inter oKey.set(bKey, 0, 17); oValue.set(pre); context.write(oKey, oValue); +// System.out.println(" i " + i); } - + //Check suprop transitivity if (pre == TriplesUtils.RDFS_SUBPROPERTY && subpropSchemaTriples.contains(obj)) { //Write the 05 + subject bKey[0] = 5; NumberUtils.encodeLong(bKey, 1, sub); oKey.set(bKey, 0, 9); - oValue.set(obj); + oValue.set(obj); context.write(oKey, oValue); } } @@ -84,11 +86,18 @@ protected void setup(Context context) throws IOException { hasSchemaChanged = db.loadSetIntoMemory(subpropSchemaTriples, filters, previousExecutionStep); // hasSchemaChanged = FilesTriplesReader.loadSetIntoMemory(subpropSchemaTriples, context, // "FILTER_ONLY_SUBPROP_SCHEMA", previousExecutionStep); +// System.out.println("AAA"); +// db.createIndexOnInferredSteps(); +// System.out.println("create on inferredsteps"); + db.CassandraDBClose(); } catch (TException e) { e.printStackTrace(); } } else { log.debug("Subprop schema triples already loaded in memory"); } + + } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java index 67ffb1f..56bd6cb 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasMapper.java @@ -5,6 +5,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Mapper.Context; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,6 +37,20 @@ public void map(Long key, Row row, Context context) throws IOException, Interrup oKey.set(bKey, 0, 9); context.write(oKey, oValue); -// System.out.println("׼����RDFSSubclasMapper-"+value); +// System.out.println("׼����RDFSSubclasMapper-"+value); } + + protected void setup(Context context) throws IOException, InterruptedException{ + + } +// protected void cleanup(Context context) throws IOException, InterruptedException{ +// try { +// CassandraDB db = new CassandraDB(); +// db.UnIndex(); +// db.CassandraDBClose(); +// } catch (Exception e) { +// // TODO: handle exception +// } +// } + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java index 1eb0f42..69332a7 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubclasReducer.java @@ -2,9 +2,11 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -27,10 +29,13 @@ import cn.edu.neu.mitt.mrj.utils.NumberUtils; import cn.edu.neu.mitt.mrj.utils.TriplesUtils; -public class RDFSSubclasReducer extends Reducer, List> { - - protected static Logger log = LoggerFactory.getLogger(RDFSSubclasReducer.class); - +public class RDFSSubclasReducer + extends + Reducer, List> { + + protected static Logger log = LoggerFactory + .getLogger(RDFSSubclasReducer.class); + public static Map> subclassSchemaTriples = null; protected Set subclasURIs = new HashSet(); protected Set existingURIs = new HashSet(); @@ -38,7 +43,11 @@ public class RDFSSubclasReducer extends Reducer specialSuperclasses = new HashSet(); private TripleSource source = new TripleSource(); private Triple oTriple = new Triple(); - + private Map keys = new LinkedHashMap(); + private Map allkeys = new LinkedHashMap(); + private List allvariables = new ArrayList(); + private List allTValues = new ArrayList(); + private List stepsValues = new ArrayList(); private void recursiveScanSuperclasses(long value, Set set) { Collection subclassValues = subclassSchemaTriples.get(value); if (subclassValues != null) { @@ -54,65 +63,69 @@ private void recursiveScanSuperclasses(long value, Set set) { } @Override - public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { -// System.out.println("����RDFSSubclasReducer��-"); + public void reduce(BytesWritable key, Iterable values, + Context context) throws IOException, InterruptedException { + // System.out.println("����RDFSSubclasReducer��-"); existingURIs.clear(); Iterator itr = values.iterator(); while (itr.hasNext()) { long value = itr.next().get(); - existingURIs.add(value); //���еı��� + existingURIs.add(value); // ���еı��� } - + Iterator oTypes = existingURIs.iterator(); subclasURIs.clear(); while (oTypes.hasNext()) { long existingURI = oTypes.next(); - recursiveScanSuperclasses(existingURI, subclasURIs); //subclasURIs�����е�subclass + recursiveScanSuperclasses(existingURI, subclasURIs); // subclasURIs�����е�subclass } - + subclasURIs.removeAll(existingURIs); - + oTypes = subclasURIs.iterator(); byte[] bKey = key.getBytes(); - long oKey = NumberUtils.decodeLong(bKey,1); + long oKey = NumberUtils.decodeLong(bKey, 1); oTriple.setSubject(oKey); boolean typeTriple = bKey[0] == 0; - if (!typeTriple) { //It's a subclass triple - oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); // Rule 11 + if (!typeTriple) { // It's a subclass triple + oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); // Rule 11 // Added by WuGang, 2010-08-26 oTriple.setType(TriplesUtils.RDFS_11); oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDFS_SUBCLASS); - } else { //It's a type triple - oTriple.setPredicate(TriplesUtils.RDF_TYPE); // Rule 9 + } else { // It's a type triple + oTriple.setPredicate(TriplesUtils.RDF_TYPE); // Rule 9 // Added by WuGang, 2010-08-26 oTriple.setType(TriplesUtils.RDFS_9); oTriple.setRsubject(oTriple.getSubject()); oTriple.setRpredicate(TriplesUtils.RDF_TYPE); } -// while (oTypes.hasNext()) { -// long oType = oTypes.next(); -// oTriple.setObject(oType); -// context.write(source, oTriple); -// } + // while (oTypes.hasNext()) { + // long oType = oTypes.next(); + // oTriple.setObject(oType); + // context.write(source, oTriple); + // } // Modified by WuGang, 2010-08-26 while (oTypes.hasNext()) { long oType = oTypes.next(); oTriple.setObject(oType); for (long obj : existingURIs) { oTriple.setRobject(obj); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); + context.getCounter("RDFS derived triples", "subproperty of member").increment(1); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + // context.write(source, oTriple); } - } - + } + if (typeTriple) { /* Check special rules */ - if ((subclasURIs.contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY) - || existingURIs.contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY)) - && !memberProperties.contains(oTriple.getSubject())) { // Rule 12���μ�RDFSSpecialPropsReducer + if ((subclasURIs + .contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY) || existingURIs + .contains(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY)) + && !memberProperties.contains(oTriple.getSubject())) { // Rule + // 12���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBPROPERTY); oTriple.setObject(TriplesUtils.RDFS_MEMBER); // Added by WuGang, 2010-08-26 @@ -121,16 +134,21 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CONTAINER_MEMBERSHIP_PROPERTY); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); context.getCounter("RDFS derived triples", "subproperty of member").increment(1); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + + // context.write(source, oTriple); + context.getCounter("RDFS derived triples", + "subproperty of member").increment(1); } - + if (subclasURIs.contains(TriplesUtils.RDFS_DATATYPE) || existingURIs.contains(TriplesUtils.RDFS_DATATYPE)) { specialSuperclasses.clear(); - recursiveScanSuperclasses(oTriple.getSubject(), specialSuperclasses); - if (!specialSuperclasses.contains(TriplesUtils.RDFS_LITERAL)) { // Rule 13���μ�RDFSSpecialPropsReducer + recursiveScanSuperclasses(oTriple.getSubject(), + specialSuperclasses); + if (!specialSuperclasses.contains(TriplesUtils.RDFS_LITERAL)) { // Rule + // 13���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); oTriple.setObject(TriplesUtils.RDFS_LITERAL); // Added by WuGang, 2010-08-26 @@ -139,17 +157,21 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_DATATYPE); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); - context.getCounter("RDFS derived triples", "subclass of Literal").increment(1); + context.getCounter("RDFS derived triples", "subproperty of member").increment(1); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + // context.write(source, oTriple); + context.getCounter("RDFS derived triples", + "subclass of Literal").increment(1); } } - + if (subclasURIs.contains(TriplesUtils.RDFS_CLASS) || existingURIs.contains(TriplesUtils.RDFS_CLASS)) { specialSuperclasses.clear(); - recursiveScanSuperclasses(oTriple.getSubject(), specialSuperclasses); - if (!specialSuperclasses.contains(TriplesUtils.RDFS_RESOURCE)) { // Rule 8���μ�RDFSSpecialPropsReducer + recursiveScanSuperclasses(oTriple.getSubject(), + specialSuperclasses); + if (!specialSuperclasses.contains(TriplesUtils.RDFS_RESOURCE)) { // Rule + // 8���μ�RDFSSpecialPropsReducer oTriple.setPredicate(TriplesUtils.RDFS_SUBCLASS); oTriple.setObject(TriplesUtils.RDFS_RESOURCE); // Added by WuGang, 2010-08-26 @@ -158,23 +180,28 @@ public void reduce(BytesWritable key, Iterable values, Context con oTriple.setRpredicate(TriplesUtils.RDF_TYPE); oTriple.setRobject(TriplesUtils.RDFS_CLASS); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); - context.getCounter("RDFS derived triples", "subclass of resource").increment(1); + context.getCounter("RDFS derived triples", "subproperty of member").increment(1); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + // context.write(source, oTriple); + context.getCounter("RDFS derived triples", + "subclass of resource").increment(1); } } } - - //Update the counters + + // Update the counters if (typeTriple) - context.getCounter("RDFS derived triples", "subclass inheritance rule").increment(subclasURIs.size()); + context.getCounter("RDFS derived triples", + "subclass inheritance rule").increment(subclasURIs.size()); else - context.getCounter("RDFS derived triples", "subclass transitivity rule").increment(subclasURIs.size()); + context.getCounter("RDFS derived triples", + "subclass transitivity rule").increment(subclasURIs.size()); } - + @Override public void setup(Context context) throws IOException { - CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. + CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works + // around. if (subclassSchemaTriples == null) { CassandraDB db; @@ -183,6 +210,7 @@ public void setup(Context context) throws IOException { Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBCLASS); subclassSchemaTriples = db.loadMapIntoMemory(filters); + db.CassandraDBClose(); } catch (TTransportException e) { e.printStackTrace(); } catch (InvalidRequestException e) { @@ -197,16 +225,18 @@ public void setup(Context context) throws IOException { e.printStackTrace(); } } - + if (memberProperties == null) { CassandraDB db; try { db = new CassandraDB(); Set filters = new HashSet(); filters.add(TriplesUtils.SCHEMA_TRIPLE_MEMBER_SUBPROPERTY); - + memberProperties = new HashSet(); db.loadSetIntoMemory(memberProperties, filters, -1); + + db.CassandraDBClose(); } catch (TTransportException e) { e.printStackTrace(); } catch (InvalidRequestException e) { @@ -219,10 +249,19 @@ public void setup(Context context) throws IOException { e.printStackTrace(); } catch (TException e) { e.printStackTrace(); - } + } } source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); + + } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + + super.cleanup(context); } } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java index 42c5d39..20c6e08 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropDomRangeReducer.java @@ -3,9 +3,11 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.AbstractMap; +import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -15,6 +17,7 @@ import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TimedOutException; import org.apache.cassandra.thrift.UnavailableException; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; @@ -42,75 +45,118 @@ public class RDFSSubpropDomRangeReducer extends Reducer keys = new LinkedHashMap(); + private Map allkeys = new LinkedHashMap(); + private List allvariables = new ArrayList(); + private List allTValues = new ArrayList(); + private List stepsValues = new ArrayList(); @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { - byte[] bKey = key.getBytes(); // Added by Wugang, 2010-08-26 -// long uri = key.get(); //��domain���ԣ���s����range���ԣ���o - long uri = NumberUtils.decodeLong(bKey, 0); //��domain������s����range������o - long uri_opposite = NumberUtils.decodeLong(bKey, 8); //��domain������o����range������s - - derivedProps.clear(); //���x - - //Get the predicates with a range or domain associated to this URIs - propURIs.clear(); - Iterator itr = values.iterator(); - while (itr.hasNext()) - propURIs.add(itr.next().get()); //���p - - Iterator itrProp = propURIs.iterator(); - while (itrProp.hasNext()) { - Collection objects = null; - long propURI = itrProp.next(); - if ((propURI & 0x1) == 1) { - objects = rangeSchemaTriples.get(propURI >> 1); - context.getCounter("derivation", "range matches").increment(1); - } else { - objects = domainSchemaTriples.get(propURI >> 1); - context.getCounter("derivation", "domain matches").increment(1); - } - - if (objects != null) { - Iterator itr3 = objects.iterator(); - while (itr3.hasNext()) -// derivedProps.add(itr3.next()); - derivedProps.add(new AbstractMap.SimpleEntry(itr3.next(), propURI)); // Modified by WuGang, 2010-08-26 - } + byte[] bKey = key.getBytes(); // Added by Wugang, 2010-08-26 + // long uri = key.get(); //��domain���ԣ���s����range���ԣ���o + long uri = NumberUtils.decodeLong(bKey, 0); // ��domain������s����range������o + long uri_opposite = NumberUtils.decodeLong(bKey, 8); // ��domain������o����range������s + + Configuration conf = context.getConfiguration(); + derivedProps.clear(); // ���x + + Logger logger = LoggerFactory.getLogger(CassandraDB.class); + long time = System.currentTimeMillis(); + + // Get the predicates with a range or domain associated to this URIs + propURIs.clear(); + Iterator itr = values.iterator(); + while (itr.hasNext()) + propURIs.add(itr.next().get()); // ���p + +// logger.info("while1 " + (System.currentTimeMillis() - time)); +// System.out.println("while1 " + (System.currentTimeMillis() - time)); + + Iterator itrProp = propURIs.iterator(); + while (itrProp.hasNext()) { + Collection objects = null; + long propURI = itrProp.next(); + if ((propURI & 0x1) == 1) { + objects = rangeSchemaTriples.get(propURI >> 1); + context.getCounter("derivation", "range matches").increment(1); + } else { + objects = domainSchemaTriples.get(propURI >> 1); + context.getCounter("derivation", "domain matches").increment(1); } - - //Derive the new statements -// Iterator itr2 = derivedProps.iterator(); - Iterator> itr2 = derivedProps.iterator(); // Modified by WuGang, 2010-08-26 - oTriple.setSubject(uri); - oTriple.setPredicate(TriplesUtils.RDF_TYPE); - oTriple.setObjectLiteral(false); - while (itr2.hasNext()) { -// oTriple.setObject(itr2.next()); - Entry entry = itr2.next(); - oTriple.setObject(entry.getKey()); // Modified by WuGang, 2010-08-26 - // Added by WuGang, 2010-08-26 - long propURI = entry.getValue(); - oTriple.setRpredicate(propURI >> 1); // Modified by WuGang 2010-12-03����RDFSSubPropDomRangeMapper�������ˣ����ڱ��������ƻ��� - if ((propURI & 0x1) == 1) { // Rule 3, for range - oTriple.setType(TriplesUtils.RDFS_3); - oTriple.setRsubject(uri_opposite); - oTriple.setRobject(uri); - }else{ // Rule 2, for domain - oTriple.setType(TriplesUtils.RDFS_2); - oTriple.setRsubject(uri); - oTriple.setRobject(uri_opposite); - } - - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); - //context.write(source, oTriple); + + if (objects != null) { + Iterator itr3 = objects.iterator(); + while (itr3.hasNext()) + // derivedProps.add(itr3.next()); + derivedProps.add(new AbstractMap.SimpleEntry( + itr3.next(), propURI)); // Modified by WuGang, + // 2010-08-26 } - context.getCounter("RDFS derived triples", "subprop range and domain rule").increment(derivedProps.size()); - } + } + +// logger.info("while2 " + (System.currentTimeMillis() - time)); + time = System.currentTimeMillis(); +// System.out.println("while2 " + (System.currentTimeMillis() - time)); + + // Derive the new statements + // Iterator itr2 = derivedProps.iterator(); + Iterator> itr2 = derivedProps.iterator(); // Modified + // by + // WuGang, + // 2010-08-26 + oTriple.setSubject(uri); + oTriple.setPredicate(TriplesUtils.RDF_TYPE); + oTriple.setObjectLiteral(false); + while (itr2.hasNext()) { + // oTriple.setObject(itr2.next()); + Entry entry = itr2.next(); + oTriple.setObject(entry.getKey()); // Modified by WuGang, 2010-08-26 + // Added by WuGang, 2010-08-26 + long propURI = entry.getValue(); + oTriple.setRpredicate(propURI >> 1); // Modified by WuGang + // 2010-12-03����RDFSSubPropDomRangeMapper�������ˣ����ڱ��������ƻ��� + if ((propURI & 0x1) == 1) { // Rule 3, for range + oTriple.setType(TriplesUtils.RDFS_3); + oTriple.setRsubject(uri_opposite); + oTriple.setRobject(uri); + } else { // Rule 2, for domain + oTriple.setType(TriplesUtils.RDFS_2); + oTriple.setRsubject(uri); + oTriple.setRobject(uri_opposite); + } + + context.getCounter("RDFS derived triples", "subproperty of member").increment(1); + CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + +// logger.info("write " + (System.currentTimeMillis() - time)); + time = System.currentTimeMillis(); +// System.out.println("finish " + (System.currentTimeMillis() - time)); + // CassandraDB.writealltripleToMapReduceContext(oTriple, source, + // context); + // context.write(source, oTriple); + // _output.write(conf.get(CassandraDB.COLUMNFAMILY_ALLTRIPLES), + // ByteBufferUtil.bytes(key.toString()), + // Collections.singletonList(m)); + // Reporter reporter = null ; + // _output.getCollector(CassandraDB.COLUMNFAMILY_ALLTRIPLES, + // reporter).collect(key, arg1);; + } + + // logger.info(" " + (System.currentTimeMillis() - time)); + context.getCounter("RDFS derive triples", + "subprop range and domain rule").increment(derivedProps.size()); + // logger.info("finish " + (System.currentTimeMillis() - time)); + // Mutation m = new Mutation(); + } + + @Override public void setup(Context context) throws IOException { CassandraDB.setConfigLocation(); // 2014-12-11, Very strange, this works around. - +// outputKey = ByteBufferUtil.bytes(context.getConfiguration().get(CassandraDB.COLUMNFAMILY_ALLTRIPLES)); try{ CassandraDB db = new CassandraDB(); if (domainSchemaTriples == null) { @@ -124,6 +170,7 @@ public void setup(Context context) throws IOException { filters.add(TriplesUtils.SCHEMA_TRIPLE_RANGE_PROPERTY); rangeSchemaTriples = db.loadMapIntoMemory(filters); } + db.CassandraDBClose(); }catch(TTransportException tte){ tte.printStackTrace(); } catch (InvalidRequestException e) { @@ -140,5 +187,20 @@ public void setup(Context context) throws IOException { source.setDerivation(TripleSource.RDFS_DERIVED); source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); + } + + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + + + super.cleanup(context); + } + + + + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java index b8402e6..0950e87 100644 --- a/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/reasoner/rdfs/RDFSSubpropInheritReducer.java @@ -2,13 +2,17 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.cassandra.hadoop.cql3.CqlBulkOutputFormat; +import org.apache.cassandra.thrift.Cassandra.AsyncProcessor.system_add_column_family; import org.apache.cassandra.thrift.InvalidRequestException; import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TimedOutException; @@ -21,6 +25,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.sun.corba.se.spi.ior.Writeable; + import cn.edu.neu.mitt.mrj.data.Triple; import cn.edu.neu.mitt.mrj.data.TripleSource; import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; @@ -38,6 +44,12 @@ public class RDFSSubpropInheritReducer extends Reducer keys = new LinkedHashMap(); + private Map allkeys = new LinkedHashMap(); + private List allvariables = new ArrayList(); + private List allTValues = new ArrayList(); + private List stepsValues = new ArrayList(); private void recursiveScanSubproperties(long value, Set set) { Collection subprops = subpropSchemaTriples.get(value); @@ -56,8 +68,8 @@ private void recursiveScanSubproperties(long value, Set set) { @Override public void reduce(BytesWritable key, Iterable values, Context context) throws IOException, InterruptedException { + byte[] bKey = key.getBytes(); - switch(bKey[0]) { case 2: case 3: // rdfs rule 7 @@ -67,11 +79,17 @@ public void reduce(BytesWritable key, Iterable values, Context con propURIs.clear(); //filter the properties that are already present Iterator itr = values.iterator(); + /* + * values在使用iterator之后会将值清空,使用list记录values + */ + List list1 = new ArrayList(); while (itr.hasNext()) { long value = itr.next().get(); + list1.add(value); if (!propURIs.contains(value)) { recursiveScanSubproperties(value, propURIs); } + } Iterator itr3 = propURIs.iterator(); @@ -90,10 +108,9 @@ public void reduce(BytesWritable key, Iterable values, Context con // Modified by WuGang, 2010-08-26 while (itr3.hasNext()) { oTriple.setPredicate(itr3.next()); - for (LongWritable pre : values) { - oTriple.setRpredicate(pre.get()); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); -// context.write(source, oTriple); + for (Long pre : list1) { + oTriple.setRpredicate(pre); + context.getCounter("RDFS derived triples", "subproperty of member").increment(1); } } @@ -105,8 +122,10 @@ public void reduce(BytesWritable key, Iterable values, Context con propURIs.clear(); //filter the properties that are already present Iterator itr2 = values.iterator(); + List list2 = new ArrayList(); while (itr2.hasNext()) { long value = itr2.next().get(); + list2.add(value); if (!propURIs.contains(value)) { recursiveScanSubproperties(value, propURIs); } @@ -125,13 +144,15 @@ public void reduce(BytesWritable key, Iterable values, Context con // context.write(source, oTriple); // } // Modified by WuGang, 2010-08-26 + while (itr4.hasNext()) { oTriple.setObject(itr4.next()); - for(LongWritable obj:values){ - oTriple.setRobject(obj.get()); - CassandraDB.writeJustificationToMapReduceContext(oTriple, source, context); + for(Long obj:list2){ + oTriple.setRobject(obj); + context.getCounter("RDFS derived triples", "subproperty of member").increment(1); // context.write(source, oTriple); } + } context.getCounter("RDFS derived triples", "subprop transitivity rule").increment(propURIs.size()); @@ -140,6 +161,7 @@ public void reduce(BytesWritable key, Iterable values, Context con default: break; } + } @Override @@ -154,6 +176,7 @@ public void setup(Context context) throws IOException { filters.add(TriplesUtils.SCHEMA_TRIPLE_SUBPROPERTY); subpropSchemaTriples = db.loadMapIntoMemory(filters); // subpropSchemaTriples = FilesTriplesReader.loadMapIntoMemory("FILTER_ONLY_SUBPROP_SCHEMA", context); + db.CassandraDBClose(); } catch (TTransportException e) { e.printStackTrace(); } catch (InvalidRequestException e) { @@ -175,6 +198,22 @@ public void setup(Context context) throws IOException { source.setStep(context.getConfiguration().getInt("reasoner.step", 0)); oTriple2.setPredicate(TriplesUtils.RDF_TYPE); - oTriple2.setObjectLiteral(false); + oTriple2.setObjectLiteral(false); + + + } + + @Override + protected void cleanup( + Reducer, List>.Context context) + throws IOException, InterruptedException { + /* + * 不写close就会写不进数据库。 + */ + + super.cleanup(context); } + + + } diff --git a/mrj-0.1/src/cn/edu/neu/mitt/mrj/utils/Cassandraconf.java b/mrj-0.1/src/cn/edu/neu/mitt/mrj/utils/Cassandraconf.java new file mode 100644 index 0000000..0c335ef --- /dev/null +++ b/mrj-0.1/src/cn/edu/neu/mitt/mrj/utils/Cassandraconf.java @@ -0,0 +1,8 @@ +package cn.edu.neu.mitt.mrj.utils; + +public class Cassandraconf { + public static final String host = "192.168.171.112"; + public static final String CassandraConfFile = "file:///home/imdb/work/apache-cassandra-2.1.2/conf/cassandra.yaml"; + public static final String partitioner = "Murmur3Partitioner"; + +} diff --git a/mrj-0.1/src/prejustification/SelectInferRows.java b/mrj-0.1/src/prejustification/SelectInferRows.java new file mode 100644 index 0000000..2ecbf7a --- /dev/null +++ b/mrj-0.1/src/prejustification/SelectInferRows.java @@ -0,0 +1,143 @@ +package prejustification; + +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.cassandra.hadoop.ColumnFamilyInputFormat; +import org.apache.cassandra.hadoop.ConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlConfigHelper; +import org.apache.cassandra.hadoop.cql3.CqlInputFormat; +import org.apache.cassandra.hadoop.cql3.CqlOutputFormat; +import org.apache.cassandra.thrift.SlicePredicate; +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; +import cn.edu.neu.mitt.mrj.reasoner.MapReduceReasonerJobConfig; + + +public class SelectInferRows extends Configured implements Tool{ + //private static final Logger logger = LoggerFactory.getLogger(); + public static void main(String[] args) throws Exception{ + int exitCode = ToolRunner.run(new Configuration(), new SelectInferRows(), args); + System.exit(exitCode); + } + + public int run(String[] args) throws Exception{ + + //Job job = new Job(getConf()); +// Job job = MapReduceReasonerJobConfig.createNewJob(SelectInferRows.class, "Select Rows", new HashSet(), 16, 16, true, true); + +// ConfigHelper.setInputInitialAddress(getConf(), CassandraDB.DEFAULT_HOST); +// ConfigHelper.setInputColumnFamily(getConf(), CassandraDB.KEYSPACE, CassandraDB.COLUMN_JUSTIFICATION); + + // job.setJobName("Del Rows"); +// job.setJarByClass(SelectInferRows.class); + + /* + * Select(map) + */ + + + Configuration conf = new Configuration(); + + Job job = new Job(conf); + job.setJobName(" Test "); + job.setJarByClass(SelectInferRows.class); + job.setNumReduceTasks(8); + + ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); + ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); + ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + CqlConfigHelper.setInputCql(job.getConfiguration(), + "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " WHERE TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") > ? AND TOKEN(" + + CassandraDB.COLUMN_SUB + ", " + + CassandraDB.COLUMN_PRE + ", " + + CassandraDB.COLUMN_OBJ + ", " + + CassandraDB.COLUMN_IS_LITERAL + + ") <= ? ALLOW FILTERING"); + CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); + //Modifide by LiYang + ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); + job.setInputFormatClass(CqlInputFormat.class); + job.setOutputKeyClass(Map.class); + job.setOutputValueClass(List.class); + job.setOutputFormatClass(CqlOutputFormat.class); + ConfigHelper.setOutputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); + ConfigHelper.setOutputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); + + ConfigHelper.setOutputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); + String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + + " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; + CqlConfigHelper.setOutputCql(job.getConfiguration(), query); + + job.setMapperClass(SelectInferRowsMap.class); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(ByteBuffer.class); + job.setReducerClass(SelectInferRowsReduce.class); + + +// job.setInputFormatClass(ColumnFamilyInputFormat.class); +// ConfigHelper.setInputInitialAddress(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.host); +// ConfigHelper.setInputPartitioner(job.getConfiguration(), cn.edu.neu.mitt.mrj.utils.Cassandraconf.partitioner); +// ConfigHelper.setInputColumnFamily(job.getConfiguration(), CassandraDB.KEYSPACE, CassandraDB.COLUMNFAMILY_JUSTIFICATIONS); +// CqlConfigHelper.setInputCql(job.getConfiguration(), +// "SELECT * FROM " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + +// " WHERE RULE = 0"); +// CqlConfigHelper.setInputCQLPageRowSize(job.getConfiguration(), CassandraDB.CQL_PAGE_ROW_SIZE); +// ConfigHelper.setInputSplitSize(job.getConfiguration(), 10000000); +// job.setInputFormatClass(CqlInputFormat.class); +// +// +// /* +// * Insert(reduce) +// */ +//// job.setCombinerClass(SelectInferRowsReduce.class); +// job.setOutputKeyClass(Map.class); +// job.setOutputValueClass(List.class); +// //�൱�� ָ�����Ŀ¼ Ҫд�� �������ʾ�Ҳ������Ŀ¼ +// job.setOutputFormatClass(CqlOutputFormat.class); +// +// ConfigHelper.setOutputInitialAddress(job.getConfiguration(), CassandraDB.DEFAULT_HOST); +// ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner"); +// ConfigHelper.setOutputColumnFamily(getConf(), CassandraDB.KEYSPACE, "ruleiszero"); +// ConfigHelper.setOutputKeyspace(job.getConfiguration(), CassandraDB.KEYSPACE);// ** +//// String query = "INSERT INTO mrjks.ruleiszero (" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_IS_LITERAL + ", " + +//// CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 +", " +//// + CassandraDB.COLUMN_INFERRED_STEPS + ") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, )"; +// String query = "UPDATE " + CassandraDB.KEYSPACE + "." + CassandraDB.COLUMNFAMILY_JUSTIFICATIONS + +// " SET " + CassandraDB.COLUMN_INFERRED_STEPS + "=? "; +// CqlConfigHelper.setOutputCql(job.getConfiguration(), query); +// +// +// ConfigHelper.getInputSplitSize(job.getConfiguration()); +// CqlConfigHelper.getInputPageRowSize(job.getConfiguration()); +//// String column_names = CassandraDB.COLUMN_SUB + CassandraDB.COLUMN_PRE + CassandraDB.COLUMN_OBJ + CassandraDB.COLUMN_IS_LITERAL + +//// CassandraDB.COLUMN_TRIPLE_TYPE + CassandraDB.COLUMN_RULE + CassandraDB.COLUMN_V1 + CassandraDB.COLUMN_V2 + CassandraDB.COLUMN_V3; +//// SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBufferUtil.bytes(column_names))); +// +// //��������䣬����������map �� reduce + job.waitForCompletion(true); + + System.out.println("Finished"); + return 0; + + } + +} diff --git a/mrj-0.1/src/prejustification/SelectInferRowsMap.java b/mrj-0.1/src/prejustification/SelectInferRowsMap.java new file mode 100644 index 0000000..201eea0 --- /dev/null +++ b/mrj-0.1/src/prejustification/SelectInferRowsMap.java @@ -0,0 +1,79 @@ +package prejustification; +import cn.edu.neu.mitt.mrj.io.dbs.CassandraDB; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Map; + +import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.hadoop.mapreduce.Mapper; + + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Metadata; +import com.datastax.driver.core.ResultSet; + +import com.datastax.driver.core.Row; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.SimpleStatement; + + + +public class SelectInferRowsMap extends Mapper, ByteBuffer> { + private Cluster cluster; + private Session session; + + + public void map(ByteBuffer key, Row row, Context context) throws IOException, InterruptedException{ + SimpleStatement statement = new SimpleStatement("SELECT * FROM mrjks.justifications"); + statement.setFetchSize(100); + ResultSet results = session.execute(statement); + + System.out.println("---------MAP----------"); + Map keys = new HashMap<>(); + ByteBuffer inferredsteps; + for (Row rows : results){ + if (rows.getInt(CassandraDB.COLUMN_RULE) != 0) { + keys.put(CassandraDB.COLUMN_SUB, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_SUB))); + keys.put(CassandraDB.COLUMN_PRE, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_PRE))); + keys.put(CassandraDB.COLUMN_OBJ, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_SUB))); + keys.put(CassandraDB.COLUMN_IS_LITERAL, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_IS_LITERAL))); + keys.put(CassandraDB.COLUMN_TRIPLE_TYPE, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_TRIPLE_TYPE))); + keys.put(CassandraDB.COLUMN_RULE, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_RULE))); + keys.put(CassandraDB.COLUMN_V1, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_V1))); + keys.put(CassandraDB.COLUMN_V2, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_V2))); + keys.put(CassandraDB.COLUMN_V3, ByteBufferUtil.bytes(rows.getLong(CassandraDB.COLUMN_V3))); + inferredsteps = ByteBufferUtil.bytes(rows.getInt(CassandraDB.COLUMN_INFERRED_STEPS)); + context.write(keys, inferredsteps); + } + } + } + + public void setup(Context context) throws IOException, InterruptedException{ + + cluster = Cluster.builder().addContactPoint(cn.edu.neu.mitt.mrj.utils.Cassandraconf.host).build(); + Metadata metadata = cluster.getMetadata(); + System.out.printf("-------Connected to cluster: %s\n", metadata.getClusterName()); + session = cluster.connect(); + + String cquery1 = "CREATE TABLE IF NOT EXISTS " + CassandraDB.KEYSPACE + "." + "ruleiszero" + + " ( " + + CassandraDB.COLUMN_SUB + " bigint, " + // partition key + CassandraDB.COLUMN_PRE + " bigint, " + // partition key + CassandraDB.COLUMN_OBJ + " bigint, " + // partition key + CassandraDB.COLUMN_IS_LITERAL + " boolean, " + // partition key + CassandraDB.COLUMN_TRIPLE_TYPE + " int, " + + CassandraDB.COLUMN_RULE + " int, " + + CassandraDB.COLUMN_V1 + " bigint, " + + CassandraDB.COLUMN_V2 + " bigint, " + + CassandraDB. COLUMN_V3 + " bigint, " + + CassandraDB.COLUMN_INFERRED_STEPS + " int, " + // this is the only field that is not included in the primary key + " PRIMARY KEY ((" + CassandraDB.COLUMN_SUB + ", " + CassandraDB.COLUMN_PRE + ", " + CassandraDB.COLUMN_OBJ + ", " + CassandraDB.COLUMN_IS_LITERAL + "), " + + CassandraDB.COLUMN_TRIPLE_TYPE + ", " + CassandraDB.COLUMN_RULE + ", " + CassandraDB.COLUMN_V1 + ", " + CassandraDB.COLUMN_V2 + ", " + CassandraDB.COLUMN_V3 + + " ) ) "; + session.execute(cquery1); + } + + +} diff --git a/mrj-0.1/src/prejustification/SelectInferRowsReduce.java b/mrj-0.1/src/prejustification/SelectInferRowsReduce.java new file mode 100644 index 0000000..adcfa19 --- /dev/null +++ b/mrj-0.1/src/prejustification/SelectInferRowsReduce.java @@ -0,0 +1,18 @@ +package prejustification; + +import org.apache.hadoop.mapreduce.Reducer; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Map; +public class SelectInferRowsReduce extends Reducer, ByteBuffer, Map, ByteBuffer> { + public void reduce(Map key, Iterable value, Context context) throws IOException, InterruptedException{ + + for (ByteBuffer inferredsteps : value) { + System.out.println(key); + context.write(key, inferredsteps); + } + + } + +}