diff --git a/include/dyn-aa/LogProcessor.h b/include/dyn-aa/LogProcessor.h index e76f6ed..40d9110 100644 --- a/include/dyn-aa/LogProcessor.h +++ b/include/dyn-aa/LogProcessor.h @@ -10,14 +10,15 @@ namespace dyn_aa { struct LogProcessor { LogProcessor(): CurrentRecordID(0) {} - void processLog(bool Reversed = false); + bool processLog(const std::string &LogFileName, bool Reversed = false); unsigned getCurrentRecordID() const { return CurrentRecordID; } + std::string getCurrentFileName() const {return CurrentFileName; } // initialize is called before processing each log file, and finalize is // called after processing each log file. virtual void initialize() {} - virtual void finalize() {} + virtual bool finalize() { return false; } // beforeRecord is called before processing each log record, and afterRecord // is called after processing each log record. Therefore, a typical callback // flow is: @@ -45,11 +46,11 @@ struct LogProcessor { virtual void processBasicBlock(const BasicBlockRecord &) {} private: - void processLog(const std::string &LogFileName, bool Reversed); static bool ReadData(void *P, int Length, bool Reversed, FILE *LogFile); static off_t GetFileSize(FILE *LogFile); unsigned CurrentRecordID; + std::string CurrentFileName; }; } diff --git a/include/dyn-aa/MissingAliasesClassifier.h b/include/dyn-aa/MissingAliasesClassifier.h index bbcf80a..eb4487e 100644 --- a/include/dyn-aa/MissingAliasesClassifier.h +++ b/include/dyn-aa/MissingAliasesClassifier.h @@ -34,6 +34,7 @@ struct MissingAliasesClassifier: public ModulePass, public LogProcessor { bool isRootCause(Value *V1, Value *V2); // Interfaces of LogProcessor. + void initialize(); void processTopLevel(const TopLevelRecord &Record); void processStore(const StoreRecord &Record); void processCall(const CallRecord &Record); @@ -46,7 +47,7 @@ struct MissingAliasesClassifier: public ModulePass, public LogProcessor { // Argument list list ArgMem; // CallSite list - list CallMem; + Value *CallMem; // Keys are PointerAddress, values are list of DenseMap > > LoadMem; // Keys are PointeeAddress, values are SelectInst or PHINode list diff --git a/include/dyn-aa/Reducer.h b/include/dyn-aa/Reducer.h new file mode 100644 index 0000000..07ad9dc --- /dev/null +++ b/include/dyn-aa/Reducer.h @@ -0,0 +1,53 @@ +// vim: sw=2 + +#ifndef __DYN_AA_REDUCER_H +#define __DYN_AA_REDUCER_H + +#include + +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CFG.h" + +#include "rcs/typedefs.h" +#include "rcs/IDAssigner.h" + +#include "dyn-aa/LogProcessor.h" +#include "dyn-aa/Utils.h" + +typedef void (*ReductionFunction)(Module &); + +using namespace llvm; +using namespace std; +using namespace rcs; + +namespace dyn_aa { +struct Reducer: public ModulePass { + static char ID; + + Reducer(): ModulePass(ID) { + ReductionFunctions.push_back(Reducer::reduceUnexecuted); + ReductionFunctions.push_back(Reducer::reduceUnrelatedFunctions); + ReductionFunctions.push_back(Reducer::reduceGlobalVariables); + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnModule(Module &M); + + bool setReductionOptions(const vector &RO); + + private: + static void reduceUnexecuted(Module &M); + static void reduceUnrelatedFunctions(Module &M); + static void reduceGlobalVariables(Module &M); + static void reduceInstructions(Module &M); + // indicate whether the ith reduction function is executed + vector ReductionOptions; + vector ReductionFunctions; +}; +} + +#endif diff --git a/include/dyn-aa/ReductionVerifier.h b/include/dyn-aa/ReductionVerifier.h new file mode 100644 index 0000000..6208a14 --- /dev/null +++ b/include/dyn-aa/ReductionVerifier.h @@ -0,0 +1,38 @@ +// vim: sw=2 + +#ifndef __DYN_AA_REDUCTION_VERIFIER_H +#define __DYN_AA_REDUCTION_VERIFIER_H + +#include + +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +#include "dyn-aa/BaselineAliasAnalysis.h" +#include "dyn-aa/Utils.h" + +using namespace std; +using namespace llvm; +using namespace dyn_aa; + +namespace dyn_aa { +struct ReductionVerifier: public ModulePass { + static char ID; + + ReductionVerifier(): ModulePass(ID) { } + virtual bool runOnModule(Module &M); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + bool getVerified() { return Verified; } + private: + bool Verified; +}; +} + +#endif diff --git a/include/dyn-aa/TraceSlicer.h b/include/dyn-aa/TraceSlicer.h index 8ee431d..b853e0a 100644 --- a/include/dyn-aa/TraceSlicer.h +++ b/include/dyn-aa/TraceSlicer.h @@ -47,7 +47,10 @@ struct PointerTrace{ struct TraceSlicer: public ModulePass, public LogProcessor { static char ID; - TraceSlicer(): ModulePass(ID) {} + TraceSlicer(): ModulePass(ID), + CurrentFunction(NULL), + Merged(false), + PushCallInst(false) {} virtual bool runOnModule(Module &M); virtual void getAnalysisUsage(AnalysisUsage &AU) const; virtual void print(raw_ostream &O, const Module *M) const; @@ -55,11 +58,11 @@ struct TraceSlicer: public ModulePass, public LogProcessor { // Interfaces of LogProcessor. void processMemAlloc(const MemAllocRecord &Record); void processTopLevel(const TopLevelRecord &Record); + void processEnter(const EnterRecord &Record); void processStore(const StoreRecord &Record); void processCall(const CallRecord &Record); void processReturn(const ReturnRecord &Record); void processBasicBlock(const BasicBlockRecord &Record); - static bool isCalledFunction(Function *F, CallSite CS); static Value *getOperandIfConstant(Value *V); Value *getLatestCommonAncestor(); @@ -68,9 +71,18 @@ struct TraceSlicer: public ModulePass, public LogProcessor { pair TraceRecord, int PointerLabel) const; pair dependsOn(LogRecordInfo &R1, LogRecordInfo &R2); + // for inst, arg, gv: alias, slice; for bb: related; for func: executed + void addMetaData(Value *V, string Kind, Module *M); PointerTrace Trace[2]; unsigned CurrentRecordID; + Function *CurrentFunction; + bool Merged; + bool PushCallInst; + + // for reduction tagging + DenseSet RelatedFunctions; + DenseSet ExecutedBasicBlocks; }; } diff --git a/lib/Analyses/MissingAliasesClassifier.cpp b/lib/Analyses/MissingAliasesClassifier.cpp index b7bbd96..0cc7001 100644 --- a/lib/Analyses/MissingAliasesClassifier.cpp +++ b/lib/Analyses/MissingAliasesClassifier.cpp @@ -148,6 +148,13 @@ bool MissingAliasesClassifier::isRootCause(Value *V1, Value *V2) { return true; } +void MissingAliasesClassifier::initialize() { + ArgMem.clear(); + CallMem = NULL; + LoadMem.clear(); + SelectPHIMem.clear(); +} + void MissingAliasesClassifier::processTopLevel(const TopLevelRecord &Record) { IDAssigner &IDA = getAnalysis(); Value *V = IDA.getValue(Record.PointerValueID); @@ -199,7 +206,7 @@ void MissingAliasesClassifier::processTopLevel(const TopLevelRecord &Record) { if (isa(V)) { LoadMem[Record.LoadedFrom].push_back(make_pair(V, Record.PointeeAddress)); } else if (CS) { - CallMem.push_back(V); + CallMem = V; } else if (isa(V)) { ArgMem.push_back(V); } else if (isa(V) || isa(V)) { @@ -228,25 +235,26 @@ void MissingAliasesClassifier::processCall(const CallRecord &Record) { CallSite CS(V); assert(CS); + Function *CalledFunction = CS.getCalledFunction(); + if (CalledFunction && CalledFunction->isDeclaration()) { + // containing function is called by external function + ArgMem.clear(); + } + // extract from ArgMem - for (list::iterator I = ArgMem.begin(), E = ArgMem.end(); I != E;) { + for (list::iterator I = ArgMem.begin(); I != ArgMem.end(); ++I) { Argument *A = dyn_cast(*I); - if (TraceSlicer::isCalledFunction(A->getParent(), CS)) { - PrevInst[A].insert( - TraceSlicer::getOperandIfConstant(CS.getArgument(A->getArgNo()))); - I = ArgMem.erase(I); - } else - ++I; + PrevInst[A].insert( + TraceSlicer::getOperandIfConstant(CS.getArgument(A->getArgNo()))); } + ArgMem.clear(); // extract from CallMem, for external function if (CS.getType()->isPointerTy()) { - for (list::iterator I = CallMem.begin(), E = CallMem.end(); - I != E;) { - if (V == *I) { - I = CallMem.erase(I); - } else - ++I; + if (CallMem) { + CallSite CS(CallMem); + assert(CallMem == V); + CallMem = NULL; } } } @@ -261,14 +269,10 @@ void MissingAliasesClassifier::processReturn(const ReturnRecord &Record) { Value *ReturnValue = RI->getReturnValue(); if (ReturnValue && ReturnValue->getType()->isPointerTy()) { // extract from CallMem - for (list::iterator I = CallMem.begin(), E = CallMem.end(); - I != E;) { - CallSite CS(*I); - if (TraceSlicer::isCalledFunction(RI->getParent()->getParent(), CS)) { - PrevInst[*I].insert(ReturnValue); - I = CallMem.erase(I); - } else - ++I; + if (CallMem) { + CallSite CS(CallMem); + PrevInst[CallMem].insert(ReturnValue); + CallMem = NULL; } } } diff --git a/lib/Analyses/ReductionVerifier.cpp b/lib/Analyses/ReductionVerifier.cpp index 262d7f4..388e101 100644 --- a/lib/Analyses/ReductionVerifier.cpp +++ b/lib/Analyses/ReductionVerifier.cpp @@ -11,24 +11,17 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include "dyn-aa/BaselineAliasAnalysis.h" #include "dyn-aa/Utils.h" +#include "dyn-aa/Reducer.h" +#include "dyn-aa/ReductionVerifier.h" using namespace std; using namespace llvm; using namespace dyn_aa; -namespace dyn_aa { -struct ReductionVerifier: public ModulePass { - static char ID; - - ReductionVerifier(): ModulePass(ID) { } - virtual bool runOnModule(Module &M); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; -}; -} - -static RegisterPass X("verify-reducer", - "Verify whether reducer keeps " +static RegisterPass X("verify-reduction", + "Verify whether reduction keeps " "the bug", false, // Is CFG Only? true); // Is Analysis? @@ -45,10 +38,11 @@ bool ReductionVerifier::runOnModule(Module &M) { assert(ValueNum < 2); DbgDeclareInst *DDI = dyn_cast(I); if (DDI) { - V[ValueNum++] = DDI->getAddress(); + V[ValueNum] = DDI->getAddress(); } else { - V[ValueNum++] = I; + V[ValueNum] = I; } + ValueNum++; } } } @@ -56,19 +50,18 @@ bool ReductionVerifier::runOnModule(Module &M) { assert(ValueNum == 2); AliasAnalysis &AA = getAnalysis(); - errs().changeColor(raw_ostream::RED); + Verified = AA.alias(V[0], V[1]) == AliasAnalysis::NoAlias; + errs() << "Reduction Verifier: "; - if (AA.alias(V[0], V[1]) == AliasAnalysis::NoAlias) { - errs() << "Pass\n"; + if (Verified) { + errs() << "Verified\n"; } else { - errs() << "Fail\n"; + errs() << "Not verified\n"; } - errs().resetColor(); return false; } void ReductionVerifier::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); AU.addRequired(); } diff --git a/lib/Transforms/Reducer.cpp b/lib/Transforms/Reducer.cpp index 47e6737..a908f95 100644 --- a/lib/Transforms/Reducer.cpp +++ b/lib/Transforms/Reducer.cpp @@ -17,134 +17,157 @@ #include "dyn-aa/LogProcessor.h" #include "dyn-aa/Utils.h" +#include "dyn-aa/Reducer.h" using namespace llvm; using namespace std; using namespace rcs; - -namespace dyn_aa { -struct Reducer: public ModulePass, public LogProcessor { - static char ID; - - Reducer(); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool runOnModule(Module &M); - void processBasicBlock(const BasicBlockRecord &Record); - - private: - DenseSet ExecutedFunctions; - DenseSet ExecutedBasicBlocks; - - void reduceFunctions(Module &M); - void reduceBasicBlocks(Module &M); - void tagPointers(Module &M); -}; -} - using namespace dyn_aa; char Reducer::ID = 0; -static RegisterPass X("remove-untouched-code", - "Remove untouched functions and basic blocks", +static RegisterPass X("reduce-testcase", "Reduce testcase", false, false); -static cl::list ValueIDs("pointer-value", - cl::desc("Value IDs of the two pointers")); - void Reducer::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); } -Reducer::Reducer(): ModulePass(ID) { } - -void Reducer::reduceFunctions(Module &M) { +void Reducer::reduceUnrelatedFunctions(Module &M) { + unsigned NumFunctions = 0, NumUnrelatedFunctions = 0; for (Module::iterator F = M.begin(); F != M.end(); ++F) { - if (!F->isDeclaration() && !ExecutedFunctions.count(F)) { - F->replaceAllUsesWith(UndefValue::get(F->getType())); - F->deleteBody(); + if (!F->isDeclaration()) { + ++NumFunctions; + BasicBlock *BB = &(F->getEntryBlock()); + bool Related = false; + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + if (I->getMetadata("related")) { + Related = true; + break; + } + } + if (!Related) { + ++NumUnrelatedFunctions; + // leads to linking errs + F->deleteBody(); + } } } - errs() << "# of total functions " << M.size() << "\n"; - errs() << "# of deleted functions " << M.size() - ExecutedFunctions.size() - << "\n"; + errs() << "# of total functions " << NumFunctions << "\n"; + errs() << "# of unrelated functions " << NumUnrelatedFunctions << "\n"; } -void Reducer::reduceBasicBlocks(Module &M) { - unsigned NumBasicBlocks = 0; +void Reducer::reduceUnexecuted(Module &M) { + unsigned NumFunctions = 0, NumUnexecutedFunctions = 0; for (Module::iterator F = M.begin(); F != M.end(); ++F) { if (!F->isDeclaration()) { - BasicBlock *UnreachableBB = BasicBlock::Create(F->getContext(), "", F); - new UnreachableInst(UnreachableBB->getContext(), UnreachableBB); - for (Function::iterator BB = F->begin(); BB != F->end(); ++BB) { - ++NumBasicBlocks; - for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; - ++SI) { - if (!ExecutedBasicBlocks.count(*SI)) { - (*SI)->removePredecessor(BB); - BB->getTerminator()->setSuccessor(SI.getSuccessorIndex(), - UnreachableBB); + ++NumFunctions; + BasicBlock *BB = &(F->getEntryBlock()); + bool Executed = false; + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + if (I->getMetadata("executed")) { + Executed = true; + break; + } + } + if (!Executed) { + ++NumUnexecutedFunctions; + F->replaceAllUsesWith(UndefValue::get(F->getType())); + F->deleteBody(); + } else { + // delete unexecuted basic blocks + BasicBlock *UnreachableBB = BasicBlock::Create(F->getContext(), "", F); + new UnreachableInst(UnreachableBB->getContext(), UnreachableBB); + for (Function::iterator BB = F->begin(); BB != F->end(); ++BB) { + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; + ++SI) { + bool Executed = false; + for (BasicBlock::iterator I = (*SI)->begin(); I != (*SI)->end(); + ++I) { + if (I->getMetadata("executed")) { + Executed = true; + break; + } + } + if (!Executed) { + (*SI)->removePredecessor(BB); + BB->getTerminator()->setSuccessor(SI.getSuccessorIndex(), + UnreachableBB); + } } } } - --NumBasicBlocks; } } - errs() << "# of total basic blocks " << NumBasicBlocks << "\n"; - errs() << "# of deleted basic blocks " - << NumBasicBlocks - ExecutedBasicBlocks.size() << "\n"; + errs() << "# of total functions " << NumFunctions << "\n"; + errs() << "# of unexecuted functions " << NumUnexecutedFunctions << "\n"; } -void Reducer::tagPointers(Module &M) { - IDAssigner &IDA = getAnalysis(); - Function *DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - for (unsigned i = 0; i < 2; ++i) { - Value *V = IDA.getValue(ValueIDs[i]); - Instruction *Inst = dyn_cast(V); - if (!Inst) { - Function *F; - if (Argument *A = dyn_cast(V)) { - F = A->getParent(); - } else { - F = M.getFunction("main"); - assert(F); +void Reducer::reduceGlobalVariables(Module &M) { + // get related global variables + DenseSet RelatedGVs; + Function *F = M.getFunction("main"); + BasicBlock *BB = &(F->getEntryBlock()); + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + if (I->getMetadata("slice")) { + DbgDeclareInst *DDI = dyn_cast(I); + if (DDI) { + RelatedGVs.insert(DDI->getAddress()); } - Value *Args[] = { MDNode::get(V->getContext(), V), - MDNode::get(M.getContext(), NULL)}; - Instruction *InsertBefore = F->getEntryBlock().getFirstInsertionPt(); - Inst = CallInst::Create(DeclareFn, Args, "", InsertBefore); } - Inst->setMetadata("alias", MDNode::get(M.getContext(), NULL)); } -} -bool Reducer::runOnModule(Module &M) { - // get executed functions and basic blocks from pointer logs - processLog(); - - // add metadata for input pointers - tagPointers(M); + // remove unrelated global variables + unsigned NumGVs = 0, NumDeletedGVs = 0; + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) { + NumGVs++; + if (I->hasInitializer() && !RelatedGVs.count(I)) { + NumDeletedGVs++; + if (!I->use_empty()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->setInitializer(0); + I->setLinkage(GlobalValue::ExternalLinkage); + } + } + errs() << "# of total global variables " << NumGVs << "\n"; + errs() << "# of deleted global vairables " << NumDeletedGVs << "\n"; +} - // try to reduce the number of functions in the module to something small. - reduceFunctions(M); +void Reducer::reduceInstructions(Module &M) { + // TODO: remained instruction should form new basic block, need to see whether PHI remains constant + for (Module::iterator F = M.begin(); F != M.end(); ++F) { + if (!F->isDeclaration()) { + for (Function::iterator BB = F->begin(); BB != F->end(); ++BB) { + for (BasicBlock::iterator I = BB->begin(); I != BB->end();) { + Instruction *Inst = I++; + if (!Inst->getMetadata("slice") && !Inst->getMetadata("alias")) { + if (!isa(Inst) && !isa(Inst)) { + if (!Inst->getType()->isVoidTy()) + Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); + Inst->eraseFromParent(); + } + } + } + } + } + } +} - // Attempt to delete entire basic blocks at a time to speed up - // convergence... this actually works by setting the terminator of the blocks - // to a return instruction then running simplifycfg, which can potentially - // shrinks the code dramatically quickly - reduceBasicBlocks(M); +bool Reducer::runOnModule(Module &M) { + errs() << "Reducer: try " << ReductionOptions.size() << "\n"; + for (unsigned i = 0; i < ReductionOptions.size(); i++) { + if (ReductionOptions[i]) { + errs() << "Reduction " << (i + 1) << "\n"; + (*ReductionFunctions[i])(M); + } + } return true; } -void Reducer::processBasicBlock(const BasicBlockRecord &Record) { - IDAssigner &IDA = getAnalysis(); - BasicBlock *BB = cast(IDA.getValue(Record.ValueID)); - - unsigned OldBBSize = ExecutedBasicBlocks.size(); - ExecutedBasicBlocks.insert(BB); - if (OldBBSize != ExecutedBasicBlocks.size()) { - Function *F = BB->getParent(); - ExecutedFunctions.insert(F); - } +// return false if all stages have been performed +bool Reducer::setReductionOptions(const vector &RO) { + ReductionOptions.clear(); + ReductionOptions.insert(ReductionOptions.begin(), RO.begin(), RO.end()); + return ReductionOptions.size() <= ReductionFunctions.size(); } diff --git a/lib/Analyses/TraceSlicer.cpp b/lib/Transforms/TraceSlicer.cpp similarity index 71% rename from lib/Analyses/TraceSlicer.cpp rename to lib/Transforms/TraceSlicer.cpp index b130d7c..17b89e9 100644 --- a/lib/Analyses/TraceSlicer.cpp +++ b/lib/Transforms/TraceSlicer.cpp @@ -5,6 +5,7 @@ #include #include +#include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -33,13 +34,16 @@ static cl::list StartingValueIDs( "starting-value", cl::desc("Value IDs of the two pointers")); +static cl::opt SliceForReduction("slice-for-reduction", + cl::desc("Slice for reduction")); + static RegisterPass X("slice-trace", "Slice trace of two input pointers", false, // Is CFG Only? true); // Is Analysis? struct RecordFinder: public LogProcessor { - RecordFinder(): RecordID1(-1), RecordID2(-1) {} + RecordFinder() {} void processTopLevel(const TopLevelRecord &Record) { if (StartingRecordIDs.size() == 2) { @@ -47,21 +51,33 @@ struct RecordFinder: public LogProcessor { return; } if (Record.PointerValueID == StartingValueIDs[0]) { + Filled1 = true; RecordID1 = getCurrentRecordID(); Address1 = Record.PointeeAddress; } if (Record.PointerValueID == StartingValueIDs[1]) { + Filled2 = true; RecordID2 = getCurrentRecordID(); Address2 = Record.PointeeAddress; } - if (Address1 == Address2) { + if (Filled1 && Filled2 && Address1 == Address2) { StartingRecordIDs.push_back(RecordID1); StartingRecordIDs.push_back(RecordID2); assert(StartingRecordIDs.size() == 2); } } + void initialize() { + Filled1 = false; + Filled2 = false; + } + + bool finalize() { + return StartingRecordIDs.size() == 2; + } + private: + bool Filled1, Filled2; unsigned RecordID1, RecordID2; void *Address1, *Address2; }; @@ -75,6 +91,7 @@ bool TraceSlicer::runOnModule(Module &M) { "we need two starting-record"); assert((StartingValueIDs.empty() || StartingValueIDs.size() == 2) && "we need two starting-value"); + string LogFileName = ""; if (StartingRecordIDs.empty()) { // The user specifies staring-value instead of starting-record. Need look // for starting-record in the trace. @@ -82,6 +99,7 @@ bool TraceSlicer::runOnModule(Module &M) { RecordFinder RF; RF.processLog(); CurrentRecordID = RF.getCurrentRecordID(); + LogFileName = RF.getCurrentFileName(); } else { errs() << "Counting log records...\n"; LogCounter LC; @@ -94,9 +112,42 @@ bool TraceSlicer::runOnModule(Module &M) { Trace[i].StartingRecordID = StartingRecordIDs[i]; errs() << "Backward slicing...\n"; - processLog(true); + if (LogFileName != "") + processLog(LogFileName, true); + else + processLog(true); + + if (Trace[0].Active || Trace[1].Active) + errs() << "Fail to merge!\n"; + + if (SliceForReduction) { + print(errs(), &M); + if (Merged) { + // add metadata for values in slice + for (unsigned PointerLabel = 0; PointerLabel < 2; ++PointerLabel) { + for (unsigned i = 0; i < Trace[PointerLabel].Slice.size(); ++i) { + Value *V = Trace[PointerLabel].Slice[i].second; + addMetaData(V, "slice", &M); + if (i == 0) + addMetaData(V, "alias", &M); + } + } + + // add metadata for related basic blocks + for (DenseSet::iterator I = RelatedFunctions.begin(); + I != RelatedFunctions.end(); ++I) { + addMetaData(&((*I)->getEntryBlock()), "related", &M); + } + + // add metadata for executed basic blocks + for (DenseSet::iterator I = ExecutedBasicBlocks.begin(); + I != ExecutedBasicBlocks.end(); ++I) { + addMetaData(*I, "executed", &M); + } + } + } - return false; + return true; } void TraceSlicer::getAnalysisUsage(AnalysisUsage &AU) const { @@ -121,7 +172,7 @@ void TraceSlicer::printTrace(raw_ostream &O, } void TraceSlicer::print(raw_ostream &O, const Module *M) const { - O << "RecID\tPtr\tValueID\tFunc: Inst/Arg\n"; + O << "RecID\tPtr\tValueID\tGV/Inst/Arg\n"; int Index[2]; Index[0] = Trace[0].Slice.size() - 1; Index[1] = Trace[1].Slice.size() - 1; @@ -166,20 +217,21 @@ void TraceSlicer::processTopLevel(const TopLevelRecord &Record) { CurrentRecord.PointeeAddress = Record.PointeeAddress; CurrentRecord.PointerAddress = Record.LoadedFrom; + Function *ContainingFunction = NULL; + if (Argument *A = dyn_cast(V)) + ContainingFunction = A->getParent(); + else if (Instruction *I = dyn_cast(V)) + ContainingFunction = I->getParent()->getParent(); + for (int PointerLabel = 0; PointerLabel < 2; ++PointerLabel) { if (Trace[PointerLabel].StartingRecordID == CurrentRecordID) { - // set StartingFunction - if (Argument *A = dyn_cast(V)) - Trace[PointerLabel].StartingFunction = A->getParent(); - else if (Instruction *I = dyn_cast(V)) - Trace[PointerLabel].StartingFunction = I->getParent()->getParent(); - else - Trace[PointerLabel].StartingFunction = NULL; - + // first value found + Trace[PointerLabel].StartingFunction = ContainingFunction; Trace[PointerLabel].Active = true; Trace[PointerLabel].Slice.push_back(make_pair(CurrentRecordID, CurrentRecord.V)); Trace[PointerLabel].PreviousRecord = CurrentRecord; + CurrentFunction = ContainingFunction; NumContainingSlices++; } else if (Trace[PointerLabel].Active) { pair Result = dependsOn(CurrentRecord, @@ -189,14 +241,26 @@ void TraceSlicer::processTopLevel(const TopLevelRecord &Record) { Trace[PointerLabel].Slice.push_back(make_pair(CurrentRecordID, CurrentRecord.V)); Trace[PointerLabel].PreviousRecord = CurrentRecord; + CurrentFunction = ContainingFunction; NumContainingSlices++; } } } // If two sliced traces meet, we stop tracking if (NumContainingSlices == 2) { - Trace[0].Active = false; - Trace[1].Active = false; + Merged = true; + if (!SliceForReduction) { + Trace[0].Active = false; + Trace[1].Active = false; + } + } +} + +void TraceSlicer::processEnter(const EnterRecord &Record) { + CurrentRecordID--; + for (int PointerLabel = 0; PointerLabel < 2; ++PointerLabel) { + // Starting record must be a TopLevel record + assert(Trace[PointerLabel].StartingRecordID != CurrentRecordID); } } @@ -222,6 +286,7 @@ void TraceSlicer::processStore(const StoreRecord &Record) { Trace[PointerLabel].Slice.push_back(make_pair(CurrentRecordID, CurrentRecord.V)); Trace[PointerLabel].PreviousRecord = CurrentRecord; + CurrentFunction = I->getParent()->getParent(); NumContainingSlices++; } } @@ -239,6 +304,18 @@ void TraceSlicer::processCall(const CallRecord &Record) { LogRecordInfo CurrentRecord; CurrentRecord.V = I; + // get all related function calls for reduction + if (SliceForReduction && PushCallInst) { + for (int PointerLabel = 0; PointerLabel < 2; ++PointerLabel) { + if (Trace[PointerLabel].Active) { + Trace[PointerLabel].Slice.push_back(make_pair(CurrentRecordID, + CurrentRecord.V)); + } + } + PushCallInst = false; + CurrentFunction = I->getParent()->getParent(); + } + for (int PointerLabel = 0; PointerLabel < 2; ++PointerLabel) { // Starting record must be a TopLevel record assert(Trace[PointerLabel].StartingRecordID != CurrentRecordID); @@ -247,8 +324,9 @@ void TraceSlicer::processCall(const CallRecord &Record) { Trace[PointerLabel].PreviousRecord); Trace[PointerLabel].Active = Result.second; if (Result.first) { - Trace[PointerLabel].Slice.push_back(make_pair(CurrentRecordID, - CurrentRecord.V)); + if (!SliceForReduction) + Trace[PointerLabel].Slice.push_back(make_pair(CurrentRecordID, + CurrentRecord.V)); Trace[PointerLabel].PreviousRecord = CurrentRecord; NumContainingSlices++; } @@ -281,7 +359,7 @@ void TraceSlicer::processReturn(const ReturnRecord &Record) { NumContainingSlices++; } else { // print return instruction of the starting function - if (I->getParent()->getParent() == + if (!SliceForReduction && I->getParent()->getParent() == Trace[PointerLabel].StartingFunction) { Trace[PointerLabel].Slice.push_back(make_pair(CurrentRecordID, CurrentRecord.V)); @@ -297,6 +375,24 @@ void TraceSlicer::processBasicBlock(const BasicBlockRecord &Record) { // Starting record must be a TopLevel record assert(Trace[PointerLabel].StartingRecordID != CurrentRecordID); } + if (SliceForReduction) { + // record executed basic blocks + IDAssigner &IDA = getAnalysis(); + Value *V = IDA.getValue(Record.ValueID); + BasicBlock *BB = cast(V); + ExecutedBasicBlocks.insert(BB); + + // record related functions + if (Trace[0].Active || Trace[1].Active) { + Function *F = BB->getParent(); + if (&(F->getEntryBlock()) == BB) { + if (F == CurrentFunction) { + PushCallInst = true; + RelatedFunctions.insert(F); + } + } + } + } } // whether R1 depend on R2, return @@ -309,17 +405,22 @@ pair TraceSlicer::dependsOn(LogRecordInfo &R1, LogRecordInfo &R2) { // R2 is CallRecord return make_pair(R1.V == getOperandIfConstant(CS2.getArgument(R2.ArgNo)), true); - } else if (CS1 && R1.V == R2.V) { + } else if (CS1) { // R2 is an external function call + assert(R1.V == R2.V); return make_pair(false, false); - } else if (ReturnInst *RI = dyn_cast(R1.V)) { - return make_pair(isCalledFunction(RI->getParent()->getParent(), CS2), - true); + } else if (dyn_cast(R1.V)) { + return make_pair(true, true); } else { - return make_pair(false, true); + assert(false); } } else if (Argument *A = dyn_cast(R2.V)) { - if (CS1 && isCalledFunction(A->getParent(), CS1)) { + if (CS1) { + Function *CalledFunction = CS1.getCalledFunction(); + if (CalledFunction && CalledFunction->isDeclaration()) { + // containing function is called by external function + return make_pair(false, false); + } R1.ArgNo = A->getArgNo(); return make_pair(true, true); } else { @@ -364,21 +465,6 @@ pair TraceSlicer::dependsOn(LogRecordInfo &R1, LogRecordInfo &R2) { } } -bool TraceSlicer::isCalledFunction(Function *F, CallSite CS) { - if (CS.getCalledFunction() != NULL) - return F == CS.getCalledFunction(); - // if CS call a value, judge by comparing return type and argument type - // this is a temporary method to solve multithread problem - if (F->getReturnType() != CS.getType()) - return false; - if (F->getFunctionType()->getNumParams() != CS.arg_size()) - return false; - for (unsigned i = 0; i < CS.arg_size(); ++i) - if (F->getFunctionType()->getParamType(i) != (CS.getArgument(i))->getType()) - return false; - return true; -} - // get operand if V is a constant expression Value *TraceSlicer::getOperandIfConstant(Value *V) { Operator *Op = dyn_cast(V); @@ -397,3 +483,32 @@ Value *TraceSlicer::getLatestCommonAncestor() { } return NULL; } + +void TraceSlicer::addMetaData(Value *V, string Kind, Module *M) { + Function *DeclareFn = Intrinsic::getDeclaration(M, Intrinsic::dbg_declare); + Instruction *Inst = dyn_cast(V); + if (!Inst) { + vector Args; + Instruction *InsertBefore; + if (BasicBlock *BB = dyn_cast(V)) { + // add metadata for basic block + Args.push_back(MDNode::get(M->getContext(), NULL)); + InsertBefore = BB->getFirstInsertionPt(); + } else { + Function *F; + if (Argument *A = dyn_cast(V)) { + // add metadata for argument + F = A->getParent(); + } else { + // add metadata for global variable + F = M->getFunction("main"); + assert(F); + } + Args.push_back(MDNode::get(V->getContext(), V)); + InsertBefore = F->getEntryBlock().getFirstInsertionPt(); + } + Args.push_back(MDNode::get(M->getContext(), NULL)); + Inst = CallInst::Create(DeclareFn, Args, "", InsertBefore); + } + Inst->setMetadata(Kind, MDNode::get(M->getContext(), NULL)); +} diff --git a/lib/Utils/LogProcessor.cpp b/lib/Utils/LogProcessor.cpp index c567c86..9e53ba3 100644 --- a/lib/Utils/LogProcessor.cpp +++ b/lib/Utils/LogProcessor.cpp @@ -36,16 +36,18 @@ STATISTIC(NumRecords, "Number of all records"); void LogProcessor::processLog(bool Reversed) { assert(LogFileNames.size() && "Didn't specify the log file."); for (unsigned i = 0; i < LogFileNames.size(); i++) { - processLog(LogFileNames[i], Reversed); + if (processLog(LogFileNames[i], Reversed)) + break; } } -void LogProcessor::processLog(const std::string &LogFileName, bool Reversed) { +bool LogProcessor::processLog(const std::string &LogFileName, bool Reversed) { FILE *LogFile = fopen(LogFileName.c_str(), "rb"); assert(LogFile && "The log file doesn't exist."); errs().changeColor(raw_ostream::BLUE); errs() << "Processing log " << LogFileName << " ...\n"; errs().resetColor(); + CurrentFileName = LogFileName; if (Reversed) { // Set the file position to the end. @@ -110,9 +112,9 @@ void LogProcessor::processLog(const std::string &LogFileName, bool Reversed) { errs().resetColor(); } - finalize(); - fclose(LogFile); + + return finalize(); } bool LogProcessor::ReadData(void *P, int Length, bool Reversed, FILE *LogFile) { diff --git a/tools/dynaa_reduce.py b/tools/dynaa_reduce.py index 9d9e6ee..ebd64e0 100644 --- a/tools/dynaa_reduce.py +++ b/tools/dynaa_reduce.py @@ -18,26 +18,38 @@ parser.add_argument('vid2', help = 'ValueID of Pointer 2') args = parser.parse_args() - cmd = dynaa_utils.load_all_plugins('opt') - # reducer need be put before aa - cmd = ' '.join((cmd, '-remove-untouched-code')) - cmd = ' '.join((cmd, '-simplifycfg')) + cmd = dynaa_utils.load_all_plugins('dynaa_opt') + # cmd = ' '.join((cmd, '-debug-pass=Details')) + + # slice trace and add tags for reducer + cmd = ' '.join((cmd, '-slice-for-reduction')) + cmd = ' '.join((cmd, '-starting-value', args.vid1)) + cmd = ' '.join((cmd, '-starting-value', args.vid2)) + for log in args.logs: + cmd = ' '.join((cmd, '-log-file', log)) # Load the checked AA cmd = dynaa_utils.load_aa(cmd, args.aa) - cmd = ' '.join((cmd, '-verify-reducer')) - cmd = ' '.join((cmd, '-strip')) - for log in args.logs: - cmd = ' '.join((cmd, '-log-file', log)) - cmd = ' '.join((cmd, '-pointer-value', args.vid1)) - cmd = ' '.join((cmd, '-pointer-value', args.vid2)) - cmd = ' '.join((cmd, '-o', args.prog + '.reduce.bc')) + cmd = ' '.join((cmd, '>', args.prog + '.reduce.big.bc')) cmd = ' '.join((cmd, '<', args.prog + '.bc')) rcs_utils.invoke(cmd) + # simplifycfg and strip + cmd = dynaa_utils.load_all_plugins('opt') + cmd = ' '.join((cmd, '-simplifycfg')) + cmd = ' '.join((cmd, '-strip')) + cmd = ' '.join((cmd, '-verify')) + cmd = ' '.join((cmd, '>', args.prog + '.reduce.bc')) + cmd = ' '.join((cmd, '<', args.prog + '.reduce.big.bc')) + rcs_utils.invoke(cmd) + + # reducer may lead to linking errors cmd = ' '.join(('clang++', args.prog + '.reduce.bc', '-o', args.prog + '.reduce')) linking_flags = rcs_utils.get_linking_flags(args.prog) cmd = ' '.join((cmd, ' '.join(linking_flags))) + # rcs_utils.invoke(cmd) + + cmd = ' '.join(('llvm-dis', args.prog + '.reduce.bc')) rcs_utils.invoke(cmd) diff --git a/tools/dynaa_slice_trace.py b/tools/dynaa_slice_trace.py index 395b9fb..c26725a 100644 --- a/tools/dynaa_slice_trace.py +++ b/tools/dynaa_slice_trace.py @@ -11,7 +11,7 @@ parser = argparse.ArgumentParser(description = 'Find out the trace of ' + \ 'two pointers who alias in real execution') parser.add_argument('bc', help = 'the bitcode of the program') - parser.add_argument('log', help = 'the point-to log (.pts)') + parser.add_argument('logs', nargs='+', help = 'the point-to log (.ptss)') parser.add_argument('id1', help = 'RecordID/ValueID of Pointer 1') parser.add_argument('id2', help = 'RecordID/ValueID of Pointer 2') parser.add_argument('--value', @@ -23,7 +23,8 @@ cmd = dynaa_utils.load_all_plugins('opt') cmd = string.join((cmd, '-slice-trace')) - cmd = string.join((cmd, '-log-file', args.log)) + for log in args.logs: + cmd = string.join((cmd, '-log-file', log)) if args.value: cmd = string.join((cmd, '-starting-value', args.id1)) cmd = string.join((cmd, '-starting-value', args.id2)) diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index 9cbadaa..bc22df7 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -5,6 +5,7 @@ #include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/IRReader.h" +#include "llvm/Support/PassNameParser.h" // necessary to support "-load" #include "llvm/Support/PluginLoader.h" #include "llvm/Support/PrettyStackTrace.h" @@ -12,22 +13,82 @@ #include "llvm/Support/Signals.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "rcs/IDAssigner.h" +#include "dyn-aa/BaselineAliasAnalysis.h" #include "dyn-aa/Passes.h" +#include "dyn-aa/TraceSlicer.h" +#include "dyn-aa/Reducer.h" +#include "dyn-aa/ReductionVerifier.h" + using namespace std; using namespace llvm; using namespace rcs; using namespace dynaa; +static cl::list +PassList(cl::desc("Optimizations available:")); + +// return 1 if verified, 0 if not verified, -1 if no reduction available +int reduce(Module *M, const vector &ReductionOptions, Pass *BW = NULL) { + // initialize passes + Reducer *R = new Reducer(); + if (!R->setReductionOptions(ReductionOptions)) { + // all reduction methods are tried + return -1; + } + ReductionVerifier *V = new ReductionVerifier(); + Pass *AA = NULL; + assert(PassList.size() == 1); + for (unsigned i = 0; i < PassList.size(); ++i) { + const PassInfo *PassInf = PassList[i]; + Pass *P = 0; + if (PassInf->getNormalCtor()) + P = PassInf->getNormalCtor()(); + if (P) { + AA = P; + } + } + assert(AA); + + PassManager Passes; + + const std::string &ModuleDataLayout = M->getDataLayout(); + if (!ModuleDataLayout.empty()) + Passes.add(new TargetData(ModuleDataLayout)); + Passes.add(R); + Passes.add(AA); + Passes.add(V); + if (BW) { + Passes.add(BW); + } + Passes.run(*M); + delete M; + return V->getVerified(); +} + int main(int argc, char *argv[]) { sys::PrintStackTraceOnErrorSignal(); llvm::PrettyStackTraceProgram X(argc, argv); cl::ParseCommandLineOptions(argc, argv, "fake opt"); + // Initialize passes + PassRegistry &Registry = *PassRegistry::getPassRegistry(); + initializeCore(Registry); + initializeScalarOpts(Registry); + initializeVectorization(Registry); + initializeIPO(Registry); + initializeAnalysis(Registry); + initializeIPA(Registry); + initializeTransformUtils(Registry); + initializeInstCombine(Registry); + initializeInstrumentation(Registry); + initializeTarget(Registry); + SMDiagnostic Err; Module *M = ParseIRFile("-", Err, getGlobalContext()); if (!M) { @@ -42,19 +103,36 @@ int main(int argc, char *argv[]) { return 1; } - PassManager Passes; - // MemoryInstrumenter does not initialize required passes. Therefore, we need - // manually add them. Otherwise, PassManager won't be able to find the - // required passes. + errs() << "Tagging ...\n"; + PassManager TaggingPasses; const std::string &ModuleDataLayout = M->getDataLayout(); if (!ModuleDataLayout.empty()) - Passes.add(new TargetData(ModuleDataLayout)); - Passes.add(new IDAssigner()); - Passes.add(createMemoryInstrumenterPass()); - Passes.add(createBitcodeWriterPass(Out.os())); - Passes.run(*M); + TaggingPasses.add(new TargetData(ModuleDataLayout)); + TaggingPasses.add(new TraceSlicer()); + TaggingPasses.run(*M); - delete M; + errs() << "Reducing on tested module ...\n"; + vector ReductionOptions; + while (true) { + Module *TestedModule = llvm::CloneModule(M); + ReductionOptions.push_back(true); + int result = reduce(TestedModule, ReductionOptions); + if (result == -1) { + // no reduction available + ReductionOptions.pop_back(); + break; + } else if (result == 0) { + // not verified + errs() << "Disable reduction " << ReductionOptions.size() << "\n"; + ReductionOptions.pop_back(); + ReductionOptions.push_back(false); + } + errs() << "\n"; + } + + errs() << "Reducing on real module ...\n"; + Pass * BW = createBitcodeWriterPass(Out.os()); + reduce(M, ReductionOptions, BW); return 0; }