-
Notifications
You must be signed in to change notification settings - Fork 482
WIP: Baseline which can run big programs #1789
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
6846eb2
353871d
3847a44
b7be147
e7f18f8
500c22f
94ab144
809a397
ec7f7ca
8b0605c
e6942d5
9c72e10
e0d7674
bf793f8
09e86d5
76c2374
7fce768
d3b4fae
4c87a38
7444f96
2aa167c
afb1b8f
17c751c
bbb2c39
d70d6fa
4d364ff
c0b4582
1bdb795
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,6 +34,7 @@ | |
| #include "Graphs/CallGraph.h" | ||
| #include "WPA/Andersen.h" | ||
| #include <cmath> | ||
| #include <deque> | ||
|
|
||
| using namespace SVF; | ||
| using namespace SVFUtil; | ||
|
|
@@ -162,33 +163,105 @@ void AbstractInterpretation::initWTO() | |
| } | ||
| } | ||
|
|
||
| /// Program entry | ||
| /// Collect entry point functions for analysis. | ||
| /// Entry points are functions without callers (no incoming edges in CallGraph). | ||
| /// Uses a deque to allow efficient insertion at front for prioritizing main() | ||
| std::deque<const FunObjVar*> AbstractInterpretation::collectProgEntryFuns() | ||
| { | ||
| std::deque<const FunObjVar*> entryFunctions; | ||
| const CallGraph* callGraph = svfir->getCallGraph(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need to use andersen's call graph if we have. |
||
|
|
||
| for (auto it = callGraph->begin(); it != callGraph->end(); ++it) | ||
| { | ||
| const CallGraphNode* cgNode = it->second; | ||
| const FunObjVar* fun = cgNode->getFunction(); | ||
|
|
||
| // Skip declarations | ||
| if (fun->isDeclaration()) | ||
| continue; | ||
|
|
||
| // Entry points are functions without callers (no incoming edges) | ||
| if (cgNode->getInEdges().empty()) | ||
| { | ||
| // If main exists, put it first for priority using deque's push_front | ||
| if (fun->getName() == "main") | ||
| { | ||
| entryFunctions.push_front(fun); | ||
| } | ||
| else | ||
| { | ||
| entryFunctions.push_back(fun); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return entryFunctions; | ||
| } | ||
|
|
||
| /// Clear abstract trace for fresh analysis from new entry | ||
| void AbstractInterpretation::clearAbstractTrace() | ||
| { | ||
| abstractTrace.clear(); | ||
| } | ||
|
|
||
| /// Program entry - analyze from all entry points (multi-entry analysis is the default) | ||
| void AbstractInterpretation::analyse() | ||
| { | ||
| initWTO(); | ||
| // handle Global ICFGNode of SVFModule | ||
| handleGlobalNode(); | ||
| getAbsStateFromTrace( | ||
| icfg->getGlobalICFGNode())[PAG::getPAG()->getBlkPtr()] = IntervalValue::top(); | ||
| if (const CallGraphNode* cgn = svfir->getCallGraph()->getCallGraphNode("main")) | ||
|
|
||
| // Always use multi-entry analysis from all entry points | ||
| analyzeFromAllProgEntries(); | ||
| } | ||
|
|
||
| /// Analyze all entry points (functions without callers) - for whole-program analysis without main | ||
| void AbstractInterpretation::analyzeFromAllProgEntries() | ||
| { | ||
| // Collect all entry point functions | ||
| std::deque<const FunObjVar*> entryFunctions = collectProgEntryFuns(); | ||
|
|
||
| if (entryFunctions.empty()) | ||
| { | ||
| // Use worklist-based function handling instead of recursive WTO component handling | ||
| const ICFGNode* mainEntry = icfg->getFunEntryICFGNode(cgn->getFunction()); | ||
| handleFunction(mainEntry); | ||
| SVFUtil::errs() << "Warning: No entry functions found for analysis\n"; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should always have at least one entry function (no caller function). May be an assert is better. |
||
| return; | ||
| } | ||
|
|
||
| // Analyze from each entry point independently (Scenario 2: different entries -> fresh start) | ||
| for (const FunObjVar* entryFun : entryFunctions) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think handle global should be done before entry function? Also it would be good to add each entry icfgnode of entry function into the worklist for later abstract interpretation? |
||
| { | ||
| // Clear abstract trace for fresh analysis from this entry | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. handle global node can be done outside this for loop? It is unclear why we need to clear abstract trace here? The abstract states that for an ICFGNode A should be merged if A has two callers (if both callers are entry functions)? |
||
| clearAbstractTrace(); | ||
|
|
||
| // Handle global node for each entry (global state is shared across entries) | ||
| handleGlobalNode(); | ||
|
|
||
| // Analyze from this entry function | ||
| const ICFGNode* funEntry = icfg->getFunEntryICFGNode(entryFun); | ||
| handleFunction(funEntry); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need to double-check |
||
| } | ||
| } | ||
|
|
||
| /// handle global node | ||
| /// Initializes the abstract state for the global ICFG node and processes all global statements. | ||
| /// This includes setting up the null pointer and black hole pointer (blkPtr) to top value, | ||
| /// which represents unknown/uninitialized memory that can point to any location. | ||
| void AbstractInterpretation::handleGlobalNode() | ||
| { | ||
| const ICFGNode* node = icfg->getGlobalICFGNode(); | ||
| abstractTrace[node] = AbstractState(); | ||
| abstractTrace[node][IRGraph::NullPtr] = AddressValue(); | ||
|
|
||
| // Global Node, we just need to handle addr, load, store, copy and gep | ||
| for (const SVFStmt *stmt: node->getSVFStmts()) | ||
| { | ||
| handleSVFStatement(stmt); | ||
| } | ||
|
|
||
| // Set black hole pointer to top value - this represents unknown/uninitialized | ||
| // memory locations that may point anywhere. This is essential for soundness | ||
| // when analyzing code where pointers may not be fully initialized. | ||
| abstractTrace[node][PAG::getPAG()->getBlkPtr()] = IntervalValue::top(); | ||
| } | ||
|
|
||
| /// get execution state by merging states of predecessor blocks | ||
|
|
@@ -661,6 +734,9 @@ bool AbstractInterpretation::handleICFGNode(const ICFGNode* node) | |
| detector->detect(getAbsStateFromTrace(node), node); | ||
| stat->countStateSize(); | ||
|
|
||
| // Track this node as analyzed (for coverage statistics across all entry points) | ||
| allAnalyzedNodes.insert(node); | ||
|
|
||
| // Check if state changed (for fixpoint detection) | ||
| // For entry nodes on first visit, always return true to process successors | ||
| if (isFunEntry && !hadPrevState) | ||
|
|
@@ -1229,15 +1305,39 @@ void AEStat::finializeStat() | |
| generalNumMap["ES_Loc_Addr_AVG_Num"] /= count; | ||
| } | ||
| generalNumMap["SVF_STMT_NUM"] = count; | ||
| generalNumMap["ICFG_Node_Num"] = _ae->svfir->getICFG()->nodeNum; | ||
|
|
||
| u32_t totalICFGNodes = _ae->svfir->getICFG()->nodeNum; | ||
| generalNumMap["ICFG_Node_Num"] = totalICFGNodes; | ||
|
|
||
| // Calculate coverage: use allAnalyzedNodes which tracks all nodes across all entry points | ||
| u32_t analyzedNodes = _ae->allAnalyzedNodes.size(); | ||
| generalNumMap["Analyzed_ICFG_Node_Num"] = analyzedNodes; | ||
|
|
||
| // Coverage percentage (stored as integer percentage * 100 for precision) | ||
| if (totalICFGNodes > 0) | ||
| { | ||
| double coveragePercent = (double)analyzedNodes / (double)totalICFGNodes * 100.0; | ||
| generalNumMap["ICFG_Coverage_Percent"] = (u32_t)(coveragePercent * 100); // Store as percentage * 100 | ||
| } | ||
| else | ||
| { | ||
| generalNumMap["ICFG_Coverage_Percent"] = 0; | ||
| } | ||
|
|
||
| u32_t callSiteNum = 0; | ||
| u32_t extCallSiteNum = 0; | ||
| Set<const FunObjVar *> funs; | ||
| Set<const FunObjVar *> analyzedFuns; | ||
| for (const auto &it: *_ae->svfir->getICFG()) | ||
| { | ||
| if (it.second->getFun()) | ||
| { | ||
| funs.insert(it.second->getFun()); | ||
| // Check if this node was analyzed (across all entry points) | ||
| if (_ae->allAnalyzedNodes.find(it.second) != _ae->allAnalyzedNodes.end()) | ||
| { | ||
| analyzedFuns.insert(it.second->getFun()); | ||
| } | ||
| } | ||
| if (const CallICFGNode *callNode = dyn_cast<CallICFGNode>(it.second)) | ||
| { | ||
|
|
@@ -1252,6 +1352,19 @@ void AEStat::finializeStat() | |
| } | ||
| } | ||
| generalNumMap["Func_Num"] = funs.size(); | ||
| generalNumMap["Analyzed_Func_Num"] = analyzedFuns.size(); | ||
|
|
||
| // Function coverage percentage | ||
| if (funs.size() > 0) | ||
| { | ||
| double funcCoveragePercent = (double)analyzedFuns.size() / (double)funs.size() * 100.0; | ||
| generalNumMap["Func_Coverage_Percent"] = (u32_t)(funcCoveragePercent * 100); // Store as percentage * 100 | ||
| } | ||
| else | ||
| { | ||
| generalNumMap["Func_Coverage_Percent"] = 0; | ||
| } | ||
|
|
||
| generalNumMap["EXT_CallSite_Num"] = extCallSiteNum; | ||
| generalNumMap["NonEXT_CallSite_Num"] = callSiteNum; | ||
| timeStatMap["Total_Time(sec)"] = (double)(endTime - startTime) / TIMEINTERVAL; | ||
|
|
@@ -1280,8 +1393,16 @@ void AEStat::performStat() | |
| unsigned field_width = 30; | ||
| for (NUMStatMap::iterator it = generalNumMap.begin(), eit = generalNumMap.end(); it != eit; ++it) | ||
| { | ||
| // format out put with width 20 space | ||
| std::cout << std::setw(field_width) << it->first << it->second << "\n"; | ||
| // Special handling for percentage fields (stored as percentage * 100) | ||
| if (it->first == "ICFG_Coverage_Percent" || it->first == "Func_Coverage_Percent") | ||
| { | ||
| double percent = (double)it->second / 100.0; | ||
| std::cout << std::setw(field_width) << it->first << std::fixed << std::setprecision(2) << percent << "%\n"; | ||
| } | ||
| else | ||
| { | ||
| std::cout << std::setw(field_width) << it->first << it->second << "\n"; | ||
| } | ||
| } | ||
| SVFUtil::outs() << "-------------------------------------------------------\n"; | ||
| for (TIMEStatMap::iterator it = timeStatMap.begin(), eit = timeStatMap.end(); it != eit; ++it) | ||
|
|
@@ -1605,6 +1726,13 @@ void AbstractInterpretation::updateStateOnCmp(const CmpStmt *cmp) | |
| case CmpStmt::FCMP_TRUE: | ||
| resVal = IntervalValue(1, 1); | ||
| break; | ||
| case CmpStmt::FCMP_ORD: | ||
| case CmpStmt::FCMP_UNO: | ||
| // FCMP_ORD: true if both operands are not NaN | ||
| // FCMP_UNO: true if either operand is NaN | ||
| // Conservatively return [0, 1] since we don't track NaN | ||
| resVal = IntervalValue(0, 1); | ||
| break; | ||
| default: | ||
| assert(false && "undefined compare: "); | ||
| } | ||
|
|
@@ -1719,6 +1847,13 @@ void AbstractInterpretation::updateStateOnCmp(const CmpStmt *cmp) | |
| case CmpStmt::FCMP_TRUE: | ||
| resVal = IntervalValue(1, 1); | ||
| break; | ||
| case CmpStmt::FCMP_ORD: | ||
| case CmpStmt::FCMP_UNO: | ||
| // FCMP_ORD: true if both operands are not NaN | ||
| // FCMP_UNO: true if either operand is NaN | ||
| // Conservatively return [0, 1] since we don't track NaN | ||
| resVal = IntervalValue(0, 1); | ||
| break; | ||
| default: | ||
| assert(false && "undefined compare: "); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could we share as much code as possible for string handling functions you have