Skip to content

[php] Pre-Parse and Summarize Symbols #5467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,23 +1,29 @@
package io.joern.php2cpg

import io.joern.php2cpg.parser.PhpParser
import io.joern.php2cpg.astcreation.AstCreator
import io.joern.php2cpg.datastructures.PhpProgramSummary
import io.joern.php2cpg.parser.{PhpParseResult, PhpParser}
import io.joern.php2cpg.passes.*
import io.joern.php2cpg.utils.DependencyDownloader
import io.joern.x2cpg.X2Cpg.withNewEmptyCpg
import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass}
import io.joern.x2cpg.utils.ConcurrentTaskUtil
import io.shiftleft.semanticcpg.utils.ExternalCommand
import io.joern.x2cpg.{SourceFiles, X2CpgFrontend}
import io.shiftleft.codepropertygraph.generated.{Cpg, Languages}
import io.shiftleft.semanticcpg.utils.FileUtil.PathExt
import org.slf4j.LoggerFactory
import versionsort.VersionHelper

import java.nio.file.Paths
import scala.collection.mutable
import scala.util.matching.Regex
import scala.util.{Failure, Success, Try}

class Php2Cpg extends X2CpgFrontend[Config] {

private val logger = LoggerFactory.getLogger(this.getClass)
private val logger = LoggerFactory.getLogger(this.getClass)
private val PhpSourceFileExtensions: Set[String] = Set(".php")

private def isPhpVersionSupported: Boolean = {
val result = ExternalCommand.run(Seq("php", "--version"), Some(".")).toTry
Expand Down Expand Up @@ -58,7 +64,7 @@ class Php2Cpg extends X2CpgFrontend[Config] {
// Parse dependencies and add high-level nodes to the CPG
new DependencySymbolsPass(cpg, dependencyDir).createAndApply()
}
new AstCreationPass(config, cpg, parser.get)(config.schemaValidation).createAndApply()
new AstCreationPass(cpg, parseFiles(config, parser)).createAndApply()
new AstParentInfoPass(cpg).createAndApply()
new AnyTypePass(cpg).createAndApply()
TypeNodePass.withTypesFromCpg(cpg).createAndApply()
Expand Down Expand Up @@ -89,4 +95,67 @@ class Php2Cpg extends X2CpgFrontend[Config] {
)
.filter(_.endsWith("composer.json"))
}

/** We need to feed the php parser big groups of file in order to speed up the parsing. Apparently it is some sort of
* slow startup phase which makes single file processing prohibitively slow. On the other hand we need to be careful
* to not choose too big chunks because:
* 1. The argument length to the php executable has system dependent limits 2. We want to make use of multiple CPU
* cores for the rest of the CPG creation.
*/
private[php2cpg] def parseFiles(config: Config, maybeParser: Option[PhpParser]): List[AstCreator] = {

def parseResultToAstCreator(parseResult: PhpParseResult): Option[AstCreator] = {
parseResult match {
case PhpParseResult(fileName, Some(result), _) =>
val relativeFilename = if (fileName == config.inputPath) {
Paths.get(fileName).fileName
} else {
Paths.get(config.inputPath).relativize(Paths.get(fileName)).toString
}
Option(new AstCreator(relativeFilename, fileName, result, config.disableFileContent)(config.schemaValidation))
case PhpParseResult(fileName, None, _) =>
logger.warn(s"Could not parse file $fileName. Results will be missing!")
None
}
}

maybeParser match {
case None => List.empty
case Some(parser) =>
val sourceFiles = SourceFiles
.determine(
config.inputPath,
PhpSourceFileExtensions,
ignoredFilesRegex = Option(config.ignoredFilesRegex),
ignoredFilesPath = Option(config.ignoredFiles)
)
.toArray

// Parse files concurrently in batches, creating AST creators from them
val batchedParserTasks =
sourceFiles
.grouped(20)
.map(fileNames => () => parser.parseFiles(fileNames).flatMap(parseResultToAstCreator).toSeq)

val astCreators = ConcurrentTaskUtil
.runUsingThreadPool(batchedParserTasks.iterator)
.flatMap {
case Failure(exception) => logger.warn(s"Unable to parse PHP file batch, skipping - ", exception); Nil
case Success(astCreators) => astCreators
}

// Pre-parse ASTs on a high level, not including method bodies, etc.
val internalProgramSummary = ConcurrentTaskUtil
.runUsingThreadPool(astCreators.map(x => () => x.summarize).iterator)
.flatMap {
case Failure(exception) => logger.warn(s"Unable to pre-parse PHP file, skipping - ", exception); None
case Success(summary) => Option(summary)
}
.foldLeft(PhpProgramSummary())(_ ++= _)

// The result are AST creators with a reference to the program summary of all internal symbols (types/methods)
astCreators.map(_.withSummary(internalProgramSummary))
}
}

}
Original file line number Diff line number Diff line change
@@ -1,38 +1,34 @@
package io.joern.php2cpg.astcreation

import io.joern.php2cpg.astcreation.AstCreator.{NameConstants, TypeConstants, operatorSymbols}
import io.joern.php2cpg.datastructures.ArrayIndexTracker
import io.joern.php2cpg.datastructures.{PhpProgramSummary, Scope}
import io.joern.php2cpg.parser.Domain.*
import io.joern.php2cpg.parser.Domain.PhpModifiers.containsAccessModifier
import io.joern.php2cpg.utils.Scope
import io.joern.x2cpg.Ast.storeInDiffGraph
import io.joern.x2cpg.Defines.{StaticInitMethodName, UnresolvedNamespace, UnresolvedSignature}
import io.joern.x2cpg.utils.AstPropertiesUtil.RootProperties
import io.joern.x2cpg.utils.IntervalKeyPool
import io.joern.x2cpg.{Ast, AstCreatorBase, AstNodeBuilder, Defines, ValidationMode}
import io.joern.x2cpg.datastructures.AstParseLevel
import io.joern.x2cpg.{Ast, AstCreatorBase, ValidationMode}
import io.shiftleft.codepropertygraph.generated.*
import io.shiftleft.codepropertygraph.generated.nodes.*
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal
import io.shiftleft.utils.IOUtils
import org.slf4j.{Logger, LoggerFactory}

import java.nio.charset.StandardCharsets
import java.nio.file.Path
import scala.collection.mutable

class AstCreator(
protected val relativeFileName: String,
fileName: String,
phpAst: PhpFile,
disableFileContent: Boolean
val fileName: String,
protected val phpAst: PhpFile,
protected val disableFileContent: Boolean,
protected[php2cpg] val programSummary: PhpProgramSummary = PhpProgramSummary(),
protected val parseLevel: AstParseLevel = AstParseLevel.FULL_AST
)(implicit withSchemaValidation: ValidationMode)
extends AstCreatorBase[PhpNode, AstCreator](relativeFileName)
with AstCreatorHelper(disableFileContent)
with AstCreatorHelper
with AstForExpressionsCreator
with AstForControlStructuresCreator
with AstForDeclarationsCreator
with AstForFunctionsCreator
with AstForTypesCreator {
with AstForTypesCreator
with AstSummaryVisitor {

protected val logger: Logger = LoggerFactory.getLogger(AstCreator.getClass)
protected val scope = new Scope()(() => nextClosureName())
Expand Down Expand Up @@ -73,7 +69,7 @@ class AstCreator(
)
}

private def astForPhpFile(file: PhpFile): Ast = {
protected def astForPhpFile(file: PhpFile): Ast = {
val fileNode = NewFile().name(relativeFileName)
fileContent.foreach(fileNode.content(_))

Expand Down Expand Up @@ -130,7 +126,7 @@ class AstCreator(
case useStmt: PhpUseStmt => astForUseStmt(useStmt) :: Nil
case groupUseStmt: PhpGroupUseStmt => astForGroupUseStmt(groupUseStmt) :: Nil
case foreachStmt: PhpForeachStmt => astForForeachStmt(foreachStmt) :: Nil
case traitUseStmt: PhpTraitUseStmt => astforTraitUseStmt(traitUseStmt) :: Nil
case traitUseStmt: PhpTraitUseStmt => astForTraitUseStmt(traitUseStmt) :: Nil
case enumCase: PhpEnumCaseStmt => astForEnumCase(enumCase) :: Nil
case staticStmt: PhpStaticStmt => astsForStaticStmt(staticStmt)
case unhandled =>
Expand All @@ -139,127 +135,6 @@ class AstCreator(
}
}

private def astForEchoStmt(echoStmt: PhpEchoStmt): Ast = {
val args = echoStmt.exprs.map(astForExpr)
val code = s"echo ${args.map(_.rootCodeOrEmpty).mkString(",")}"
val callNode = operatorCallNode(echoStmt, code, "echo", None)
callAst(callNode, args)
}

private def astForNamespaceStmt(stmt: PhpNamespaceStmt): Ast = {
val name = stmt.name.map(_.name).getOrElse(NameConstants.Unknown)
val fullName = s"$relativeFileName:$name"

val namespaceBlock = NewNamespaceBlock()
.name(name)
.fullName(fullName)

scope.pushNewScope(namespaceBlock)
val bodyStmts = astsForClassLikeBody(stmt, stmt.stmts, createDefaultConstructor = false)
scope.popScope()

Ast(namespaceBlock).withChildren(bodyStmts)
}

private def astForHaltCompilerStmt(stmt: PhpHaltCompilerStmt): Ast = {
val call =
operatorCallNode(stmt, s"${NameConstants.HaltCompiler}()", NameConstants.HaltCompiler, Some(TypeConstants.Void))

Ast(call)
}

private def astForUnsetStmt(stmt: PhpUnsetStmt): Ast = {
val name = PhpOperators.unset
val args = stmt.vars.map(astForExpr)
val code = s"$name(${args.map(_.rootCodeOrEmpty).mkString(", ")})"
val callNode = operatorCallNode(stmt, code, name, Some(TypeConstants.Void))
.methodFullName(PhpOperators.unset)
callAst(callNode, args)
}

private def astForGlobalStmt(stmt: PhpGlobalStmt): Ast = {
// This isn't an accurater representation of what `global` does, but with things like `global $$x` being possible,
// it's very difficult to figure out correct scopes for global variables.

val varsAsts = stmt.vars.map(astForExpr)
val code = s"${PhpOperators.global} ${varsAsts.map(_.rootCodeOrEmpty).mkString(", ")}"

val globalCallNode = operatorCallNode(stmt, code, PhpOperators.global, Some(TypeConstants.Void))

callAst(globalCallNode, varsAsts)
}

private def astForUseStmt(stmt: PhpUseStmt): Ast = {
// TODO Use useType + scope to get better name info
val imports = stmt.uses.map(astForUseUse(_))
wrapMultipleInBlock(imports, line(stmt))
}

private def astForGroupUseStmt(stmt: PhpGroupUseStmt): Ast = {
// TODO Use useType + scope to get better name info
val groupPrefix = s"${stmt.prefix.name}\\"
val imports = stmt.uses.map(astForUseUse(_, groupPrefix))
wrapMultipleInBlock(imports, line(stmt))
}

private def astforTraitUseStmt(stmt: PhpTraitUseStmt): Ast = {
// TODO Actually implement this
logger.debug(
s"Trait use statement encountered. This is not yet supported. Location: $relativeFileName:${line(stmt)}"
)
Ast(unknownNode(stmt, code(stmt)))
}

private def astForUseUse(stmt: PhpUseUse, namePrefix: String = ""): Ast = {
val originalName = s"$namePrefix${stmt.originalName.name}"
val aliasCode = stmt.alias.map(alias => s" as ${alias.name}").getOrElse("")
val typeCode = stmt.useType match {
case PhpUseType.Function => s"function "
case PhpUseType.Constant => s"const "
case _ => ""
}
val code = s"use $typeCode$originalName$aliasCode"

val importNode = NewImport()
.importedEntity(originalName)
.importedAs(stmt.alias.map(_.name))
.isExplicit(true)
.code(code)

Ast(importNode)
}

private def astsForStaticStmt(stmt: PhpStaticStmt): List[Ast] = {
stmt.vars.flatMap { staticVarDecl =>
staticVarDecl.variable match {
case PhpVariable(PhpNameExpr(name, _), _) =>
val maybeDefaultValueAst = staticVarDecl.defaultValue.map(astForExpr)

val code = s"static $$$name"
val typeFullName = maybeDefaultValueAst.flatMap(_.rootType).getOrElse(Defines.Any)

val local = localNode(stmt, name, code, typeFullName)
scope.addToScope(local.name, local)

val assignmentAst = maybeDefaultValueAst.map { defaultValue =>
val variableNode = identifierNode(stmt, name, s"$$$name", typeFullName)
val variableAst = Ast(variableNode).withRefEdge(variableNode, local)

val assignCode = s"$code = ${defaultValue.rootCodeOrEmpty}"
val assignNode = operatorCallNode(stmt, assignCode, Operators.assignment, None)

callAst(assignNode, variableAst :: defaultValue :: Nil)
}

Ast(local) :: assignmentAst.toList

case other =>
logger.warn(s"Unexpected static variable type $other in $relativeFileName")
Nil
}
}
}

}

object AstCreator {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal

import java.nio.charset.StandardCharsets

trait AstCreatorHelper(disableFileContent: Boolean)(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>
trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: AstCreator =>

protected val globalNamespace: NewNamespaceBlock = globalNamespaceBlock()

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package io.joern.php2cpg.astcreation

import io.joern.php2cpg.utils.PhpScopeElement
import io.joern.php2cpg.astcreation.AstCreator.TypeConstants
import io.joern.php2cpg.datastructures.PhpScopeElement
import io.joern.php2cpg.parser.Domain.*
import io.joern.x2cpg.Defines.UnresolvedSignature
import io.joern.x2cpg.utils.AstPropertiesUtil.RootProperties
Expand Down
Loading
Loading