-
Notifications
You must be signed in to change notification settings - Fork 482
WIP: Baseline which can run big programs #1789
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 23 commits
6846eb2
353871d
3847a44
b7be147
e7f18f8
500c22f
94ab144
809a397
ec7f7ca
8b0605c
e6942d5
9c72e10
e0d7674
bf793f8
09e86d5
76c2374
7fce768
d3b4fae
4c87a38
7444f96
2aa167c
afb1b8f
17c751c
bbb2c39
d70d6fa
4d364ff
c0b4582
1bdb795
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -144,6 +144,15 @@ class AbstractInterpretation | |
| /// Program entry | ||
| void analyse(); | ||
|
|
||
| /// Analyze all entry points (functions without callers) | ||
| void analyseFromAllEntries(); | ||
|
|
||
| /// Get all entry point functions (functions without callers) | ||
| std::vector<const FunObjVar*> collectEntryFunctions(); | ||
|
||
|
|
||
| /// Clear abstract trace for fresh analysis from new entry | ||
| void clearAbstractTrace(); | ||
|
|
||
| static AbstractInterpretation& getAEInstance() | ||
| { | ||
| static AbstractInterpretation instance; | ||
|
|
@@ -358,6 +367,7 @@ class AbstractInterpretation | |
| Map<std::string, std::function<void(const CallICFGNode*)>> func_map; | ||
|
|
||
| Map<const ICFGNode*, AbstractState> abstractTrace; // abstract states immediately after nodes | ||
| Set<const ICFGNode*> allAnalyzedNodes; // All nodes ever analyzed (across all entry points) | ||
| std::string moduleName; | ||
|
|
||
| std::vector<std::unique_ptr<AEDetector>> detectors; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -479,7 +479,12 @@ bool BufOverflowDetector::canSafelyAccessMemory(AbstractState& as, const SVF::SV | |
| SVFIR* svfir = PAG::getPAG(); | ||
| NodeID value_id = value->getId(); | ||
|
|
||
| assert(as[value_id].isAddr()); | ||
| // In multi-entry analysis, some variables may not be initialized as addresses | ||
|
||
| if (!as[value_id].isAddr()) | ||
| { | ||
| // Conservatively assume safe when we don't have address information | ||
| return true; | ||
| } | ||
| for (const auto& addr : as[value_id].getAddrs()) | ||
| { | ||
| NodeID objId = as.getIDFromAddr(addr); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -498,6 +498,9 @@ void AbsExtAPI::handleStrcpy(const CallICFGNode *call) | |
| const SVFVar* arg1Val = call->getArgument(1); | ||
| IntervalValue strLen = getStrlen(as, arg1Val); | ||
| // no need to -1, since it has \0 as the last byte | ||
| // Skip if strLen is bottom or unbounded | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could we share as much code as possible for string handling functions you have |
||
| if (strLen.isBottom() || strLen.lb().is_minus_infinity()) | ||
| return; | ||
| handleMemcpy(as, arg0Val, arg1Val, strLen, strLen.lb().getIntNumeral()); | ||
| } | ||
|
|
||
|
|
@@ -592,6 +595,9 @@ void AbsExtAPI::handleStrcat(const SVF::CallICFGNode *call) | |
| IntervalValue strLen0 = getStrlen(as, arg0Val); | ||
| IntervalValue strLen1 = getStrlen(as, arg1Val); | ||
| IntervalValue totalLen = strLen0 + strLen1; | ||
| // Skip if strLen0 is bottom or unbounded | ||
| if (strLen0.isBottom() || strLen0.lb().is_minus_infinity()) | ||
| return; | ||
| handleMemcpy(as, arg0Val, arg1Val, strLen1, strLen0.lb().getIntNumeral()); | ||
| // do memcpy | ||
| } | ||
|
|
@@ -603,6 +609,9 @@ void AbsExtAPI::handleStrcat(const SVF::CallICFGNode *call) | |
| IntervalValue arg2Num = as[arg2Val->getId()].getInterval(); | ||
| IntervalValue strLen0 = getStrlen(as, arg0Val); | ||
| IntervalValue totalLen = strLen0 + arg2Num; | ||
| // Skip if strLen0 is bottom or unbounded | ||
| if (strLen0.isBottom() || strLen0.lb().is_minus_infinity()) | ||
| return; | ||
| handleMemcpy(as, arg0Val, arg1Val, arg2Num, strLen0.lb().getIntNumeral()); | ||
| // do memcpy | ||
| } | ||
|
|
@@ -640,6 +649,11 @@ void AbsExtAPI::handleMemcpy(AbstractState& as, const SVF::SVFVar *dst, const SV | |
| { | ||
| assert(false && "we cannot support this type"); | ||
| } | ||
| // Handle bottom or unbounded interval - skip memcpy in these cases | ||
| if (len.isBottom() || len.lb().is_minus_infinity()) | ||
| { | ||
| return; | ||
| } | ||
| u32_t size = std::min((u32_t)Options::MaxFieldLimit(), (u32_t) len.lb().getIntNumeral()); | ||
| u32_t range_val = size / elemSize; | ||
| if (as.inVarToAddrsTable(srcId) && as.inVarToAddrsTable(dstId)) | ||
|
|
@@ -672,6 +686,11 @@ void AbsExtAPI::handleMemcpy(AbstractState& as, const SVF::SVFVar *dst, const SV | |
|
|
||
| void AbsExtAPI::handleMemset(AbstractState& as, const SVF::SVFVar *dst, IntervalValue elem, IntervalValue len) | ||
| { | ||
| // Handle bottom or unbounded interval - skip memset in these cases | ||
| if (len.isBottom() || len.lb().is_minus_infinity()) | ||
| { | ||
| return; | ||
| } | ||
| u32_t dstId = dst->getId(); | ||
| u32_t size = std::min((u32_t)Options::MaxFieldLimit(), (u32_t) len.lb().getIntNumeral()); | ||
| u32_t elemSize = 1; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -162,20 +162,106 @@ void AbstractInterpretation::initWTO() | |
| } | ||
| } | ||
|
|
||
| /// Program entry | ||
| /// Collect all entry point functions (functions without callers) | ||
| std::vector<const FunObjVar*> AbstractInterpretation::collectEntryFunctions() | ||
|
||
| { | ||
| std::vector<const FunObjVar*> entryFunctions; | ||
| const CallGraph* callGraph = svfir->getCallGraph(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need to use andersen's call graph if we have. |
||
|
|
||
| for (auto it = callGraph->begin(); it != callGraph->end(); ++it) | ||
| { | ||
| const CallGraphNode* cgNode = it->second; | ||
| const FunObjVar* fun = cgNode->getFunction(); | ||
|
|
||
| // Skip declarations | ||
| if (fun->isDeclaration()) | ||
| continue; | ||
|
|
||
| // Check if function has no callers (entry point) | ||
| if (cgNode->getInEdges().empty()) | ||
| { | ||
| entryFunctions.push_back(fun); | ||
| } | ||
| } | ||
|
|
||
| // If main exists, put it first for priority | ||
| auto mainIt = std::find_if(entryFunctions.begin(), entryFunctions.end(), | ||
| [](const FunObjVar* f) { return f->getName() == "main"; }); | ||
| if (mainIt != entryFunctions.end() && mainIt != entryFunctions.begin()) | ||
| { | ||
| std::iter_swap(entryFunctions.begin(), mainIt); | ||
| } | ||
|
|
||
| return entryFunctions; | ||
| } | ||
|
|
||
| /// Clear abstract trace for fresh analysis from new entry | ||
| void AbstractInterpretation::clearAbstractTrace() | ||
| { | ||
| abstractTrace.clear(); | ||
| } | ||
|
|
||
| /// Program entry - analyze from main if exists, otherwise analyze from all entry points | ||
| void AbstractInterpretation::analyse() | ||
| { | ||
| initWTO(); | ||
| // handle Global ICFGNode of SVFModule | ||
| handleGlobalNode(); | ||
| getAbsStateFromTrace( | ||
| icfg->getGlobalICFGNode())[PAG::getPAG()->getBlkPtr()] = IntervalValue::top(); | ||
|
|
||
| // If -ae-multientry is set, always use multi-entry analysis | ||
| if (Options::AEMultiEntry()) | ||
|
||
| { | ||
| SVFUtil::outs() << "Multi-entry analysis enabled, analyzing from all entry points...\n"; | ||
| analyseFromAllEntries(); | ||
| return; | ||
| } | ||
|
|
||
| // Default behavior: start from main if exists | ||
| if (const CallGraphNode* cgn = svfir->getCallGraph()->getCallGraphNode("main")) | ||
| { | ||
| // Use worklist-based function handling instead of recursive WTO component handling | ||
| const ICFGNode* mainEntry = icfg->getFunEntryICFGNode(cgn->getFunction()); | ||
| handleFunction(mainEntry); | ||
| } | ||
| else | ||
| { | ||
| // No main function found, analyze from all entry points (library code) | ||
| SVFUtil::outs() << "No main function found, analyzing from all entry points...\n"; | ||
| analyseFromAllEntries(); | ||
| } | ||
| } | ||
|
|
||
| /// Analyze all entry points (functions without callers) - for whole-program analysis without main | ||
| void AbstractInterpretation::analyseFromAllEntries() | ||
| { | ||
| initWTO(); | ||
|
|
||
| // Collect all entry point functions | ||
| std::vector<const FunObjVar*> entryFunctions = collectEntryFunctions(); | ||
|
|
||
| if (entryFunctions.empty()) | ||
| { | ||
| SVFUtil::errs() << "Warning: No entry functions found for analysis\n"; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should always have at least one entry function (no caller function). May be an assert is better. |
||
| return; | ||
| } | ||
|
|
||
| // Analyze from each entry point independently (Scenario 2: different entries -> fresh start) | ||
| for (const FunObjVar* entryFun : entryFunctions) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think handle global should be done before entry function? Also it would be good to add each entry icfgnode of entry function into the worklist for later abstract interpretation? |
||
| { | ||
| // Clear abstract trace for fresh analysis from this entry | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. handle global node can be done outside this for loop? It is unclear why we need to clear abstract trace here? The abstract states that for an ICFGNode A should be merged if A has two callers (if both callers are entry functions)? |
||
| clearAbstractTrace(); | ||
|
|
||
| // Handle global node for each entry (global state is shared across entries) | ||
| handleGlobalNode(); | ||
| getAbsStateFromTrace( | ||
| icfg->getGlobalICFGNode())[PAG::getPAG()->getBlkPtr()] = IntervalValue::top(); | ||
|
||
|
|
||
| // Analyze from this entry function | ||
| const ICFGNode* funEntry = icfg->getFunEntryICFGNode(entryFun); | ||
| handleFunction(funEntry); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need to double-check |
||
| } | ||
| } | ||
|
|
||
| /// handle global node | ||
|
|
@@ -661,6 +747,9 @@ bool AbstractInterpretation::handleICFGNode(const ICFGNode* node) | |
| detector->detect(getAbsStateFromTrace(node), node); | ||
| stat->countStateSize(); | ||
|
|
||
| // Track this node as analyzed (for coverage statistics across all entry points) | ||
| allAnalyzedNodes.insert(node); | ||
|
|
||
| // Check if state changed (for fixpoint detection) | ||
| // For entry nodes on first visit, always return true to process successors | ||
| if (isFunEntry && !hadPrevState) | ||
|
|
@@ -1229,15 +1318,39 @@ void AEStat::finializeStat() | |
| generalNumMap["ES_Loc_Addr_AVG_Num"] /= count; | ||
| } | ||
| generalNumMap["SVF_STMT_NUM"] = count; | ||
| generalNumMap["ICFG_Node_Num"] = _ae->svfir->getICFG()->nodeNum; | ||
|
|
||
| u32_t totalICFGNodes = _ae->svfir->getICFG()->nodeNum; | ||
| generalNumMap["ICFG_Node_Num"] = totalICFGNodes; | ||
|
|
||
| // Calculate coverage: use allAnalyzedNodes which tracks all nodes across all entry points | ||
| u32_t analyzedNodes = _ae->allAnalyzedNodes.size(); | ||
| generalNumMap["Analyzed_ICFG_Node_Num"] = analyzedNodes; | ||
|
|
||
| // Coverage percentage (stored as integer percentage * 100 for precision) | ||
| if (totalICFGNodes > 0) | ||
| { | ||
| double coveragePercent = (double)analyzedNodes / (double)totalICFGNodes * 100.0; | ||
| generalNumMap["ICFG_Coverage_Percent"] = (u32_t)(coveragePercent * 100); // Store as percentage * 100 | ||
| } | ||
| else | ||
| { | ||
| generalNumMap["ICFG_Coverage_Percent"] = 0; | ||
| } | ||
|
|
||
| u32_t callSiteNum = 0; | ||
| u32_t extCallSiteNum = 0; | ||
| Set<const FunObjVar *> funs; | ||
| Set<const FunObjVar *> analyzedFuns; | ||
| for (const auto &it: *_ae->svfir->getICFG()) | ||
| { | ||
| if (it.second->getFun()) | ||
| { | ||
| funs.insert(it.second->getFun()); | ||
| // Check if this node was analyzed (across all entry points) | ||
| if (_ae->allAnalyzedNodes.find(it.second) != _ae->allAnalyzedNodes.end()) | ||
| { | ||
| analyzedFuns.insert(it.second->getFun()); | ||
| } | ||
| } | ||
| if (const CallICFGNode *callNode = dyn_cast<CallICFGNode>(it.second)) | ||
| { | ||
|
|
@@ -1252,6 +1365,19 @@ void AEStat::finializeStat() | |
| } | ||
| } | ||
| generalNumMap["Func_Num"] = funs.size(); | ||
| generalNumMap["Analyzed_Func_Num"] = analyzedFuns.size(); | ||
|
|
||
| // Function coverage percentage | ||
| if (funs.size() > 0) | ||
| { | ||
| double funcCoveragePercent = (double)analyzedFuns.size() / (double)funs.size() * 100.0; | ||
| generalNumMap["Func_Coverage_Percent"] = (u32_t)(funcCoveragePercent * 100); // Store as percentage * 100 | ||
| } | ||
| else | ||
| { | ||
| generalNumMap["Func_Coverage_Percent"] = 0; | ||
| } | ||
|
|
||
| generalNumMap["EXT_CallSite_Num"] = extCallSiteNum; | ||
| generalNumMap["NonEXT_CallSite_Num"] = callSiteNum; | ||
| timeStatMap["Total_Time(sec)"] = (double)(endTime - startTime) / TIMEINTERVAL; | ||
|
|
@@ -1280,8 +1406,16 @@ void AEStat::performStat() | |
| unsigned field_width = 30; | ||
| for (NUMStatMap::iterator it = generalNumMap.begin(), eit = generalNumMap.end(); it != eit; ++it) | ||
| { | ||
| // format out put with width 20 space | ||
| std::cout << std::setw(field_width) << it->first << it->second << "\n"; | ||
| // Special handling for percentage fields (stored as percentage * 100) | ||
| if (it->first == "ICFG_Coverage_Percent" || it->first == "Func_Coverage_Percent") | ||
| { | ||
| double percent = (double)it->second / 100.0; | ||
| std::cout << std::setw(field_width) << it->first << std::fixed << std::setprecision(2) << percent << "%\n"; | ||
| } | ||
| else | ||
| { | ||
| std::cout << std::setw(field_width) << it->first << it->second << "\n"; | ||
| } | ||
| } | ||
| SVFUtil::outs() << "-------------------------------------------------------\n"; | ||
| for (TIMEStatMap::iterator it = timeStatMap.begin(), eit = timeStatMap.end(); it != eit; ++it) | ||
|
|
@@ -1605,6 +1739,13 @@ void AbstractInterpretation::updateStateOnCmp(const CmpStmt *cmp) | |
| case CmpStmt::FCMP_TRUE: | ||
| resVal = IntervalValue(1, 1); | ||
| break; | ||
| case CmpStmt::FCMP_ORD: | ||
| case CmpStmt::FCMP_UNO: | ||
| // FCMP_ORD: true if both operands are not NaN | ||
| // FCMP_UNO: true if either operand is NaN | ||
| // Conservatively return [0, 1] since we don't track NaN | ||
| resVal = IntervalValue(0, 1); | ||
| break; | ||
| default: | ||
| assert(false && "undefined compare: "); | ||
| } | ||
|
|
@@ -1719,6 +1860,13 @@ void AbstractInterpretation::updateStateOnCmp(const CmpStmt *cmp) | |
| case CmpStmt::FCMP_TRUE: | ||
| resVal = IntervalValue(1, 1); | ||
| break; | ||
| case CmpStmt::FCMP_ORD: | ||
| case CmpStmt::FCMP_UNO: | ||
| // FCMP_ORD: true if both operands are not NaN | ||
| // FCMP_UNO: true if either operand is NaN | ||
| // Conservatively return [0, 1] since we don't track NaN | ||
| resVal = IntervalValue(0, 1); | ||
| break; | ||
| default: | ||
| assert(false && "undefined compare: "); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
analyzeFromAllProgEntries