From 713890ec6b913f7f0178c2be94d328730b0d57e1 Mon Sep 17 00:00:00 2001 From: David Baker Effendi Date: Wed, 30 Apr 2025 21:45:55 +0200 Subject: [PATCH 1/3] PHP Preparse Files for a Summary Glued various components together to create a pre-parse of all PHP internal ASTs for a summary of symbol info. --- .../main/scala/io/joern/php2cpg/Php2Cpg.scala | 75 ++++++- .../php2cpg/astcreation/AstCreator.scala | 151 ++------------ .../astcreation/AstCreatorHelper.scala | 2 +- .../AstForControlStructuresCreator.scala | 2 +- .../AstForDeclarationsCreator.scala | 104 +++++++++- .../AstForExpressionsCreator.scala | 23 +++ .../astcreation/AstForFunctionsCreator.scala | 189 +++++++++--------- .../astcreation/AstSummaryVisitor.scala | 79 ++++++++ .../datastructures/PhpProgramSummary.scala | 36 ++++ .../PhpScopeElement.scala} | 4 +- .../{utils => datastructures}/Scope.scala | 6 +- .../io/joern/php2cpg/parser/PhpParser.scala | 6 +- .../php2cpg/passes/AstCreationPass.scala | 54 +---- .../rubysrc2cpg/astcreation/AstCreator.scala | 16 +- .../astcreation/AstForFunctionsCreator.scala | 1 + .../astcreation/AstForTypesCreator.scala | 1 + .../astcreation/AstSummaryVisitor.scala | 8 +- .../x2cpg/datastructures/ProgramSummary.scala | 22 +- 18 files changed, 466 insertions(+), 313 deletions(-) create mode 100644 joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstSummaryVisitor.scala create mode 100644 joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/datastructures/PhpProgramSummary.scala rename joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/{utils/ScopeElement.scala => datastructures/PhpScopeElement.scala} (90%) rename joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/{utils => datastructures}/Scope.scala (95%) diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala index 8337f43e6c77..64eb19c1cf34 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala @@ -1,23 +1,29 @@ package io.joern.php2cpg -import io.joern.php2cpg.parser.PhpParser +import io.joern.php2cpg.astcreation.AstCreator +import io.joern.php2cpg.datastructures.PhpProgramSummary +import io.joern.php2cpg.parser.{PhpParseResult, PhpParser} import io.joern.php2cpg.passes.* import io.joern.php2cpg.utils.DependencyDownloader import io.joern.x2cpg.X2Cpg.withNewEmptyCpg import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass} +import io.joern.x2cpg.utils.ConcurrentTaskUtil import io.shiftleft.semanticcpg.utils.ExternalCommand import io.joern.x2cpg.{SourceFiles, X2CpgFrontend} import io.shiftleft.codepropertygraph.generated.{Cpg, Languages} +import io.shiftleft.semanticcpg.utils.FileUtil.PathExt import org.slf4j.LoggerFactory import versionsort.VersionHelper +import java.nio.file.Paths import scala.collection.mutable import scala.util.matching.Regex import scala.util.{Failure, Success, Try} class Php2Cpg extends X2CpgFrontend[Config] { - private val logger = LoggerFactory.getLogger(this.getClass) + private val logger = LoggerFactory.getLogger(this.getClass) + private val PhpSourceFileExtensions: Set[String] = Set(".php") private def isPhpVersionSupported: Boolean = { val result = ExternalCommand.run(Seq("php", "--version"), Some(".")).toTry @@ -58,7 +64,7 @@ class Php2Cpg extends X2CpgFrontend[Config] { // Parse dependencies and add high-level nodes to the CPG new DependencySymbolsPass(cpg, dependencyDir).createAndApply() } - new AstCreationPass(config, cpg, parser.get)(config.schemaValidation).createAndApply() + new AstCreationPass(cpg, parseFiles(config, parser)).createAndApply() new AstParentInfoPass(cpg).createAndApply() new AnyTypePass(cpg).createAndApply() TypeNodePass.withTypesFromCpg(cpg).createAndApply() @@ -89,4 +95,67 @@ class Php2Cpg extends X2CpgFrontend[Config] { ) .filter(_.endsWith("composer.json")) } + + /** We need to feed the php parser big groups of file in order to speed up the parsing. Apparently it is some sort of + * slow startup phase which makes single file processing prohibitively slow. On the other hand we need to be careful + * to not choose too big chunks because: + * 1. The argument length to the php executable has system dependent limits 2. We want to make use of multiple CPU + * cores for the rest of the CPG creation. + */ + private def parseFiles(config: Config, maybeParser: Option[PhpParser]): List[AstCreator] = { + + def parseResultToAstCreator(parseResult: PhpParseResult): Option[AstCreator] = { + parseResult match { + case PhpParseResult(fileName, Some(result), _) => + val relativeFilename = if (fileName == config.inputPath) { + Paths.get(fileName).fileName + } else { + Paths.get(config.inputPath).relativize(Paths.get(fileName)).toString + } + Option(new AstCreator(relativeFilename, fileName, result, config.disableFileContent)(config.schemaValidation)) + case PhpParseResult(fileName, None, _) => + logger.warn(s"Could not parse file $fileName. Results will be missing!") + None + } + } + + maybeParser match { + case None => List.empty + case Some(parser) => + val sourceFiles = SourceFiles + .determine( + config.inputPath, + PhpSourceFileExtensions, + ignoredFilesRegex = Option(config.ignoredFilesRegex), + ignoredFilesPath = Option(config.ignoredFiles) + ) + .toArray + + // Parse files concurrently in batches, creating AST creators from them + val batchedParserTasks = + sourceFiles + .grouped(20) + .map(fileNames => () => parser.parseFiles(fileNames).flatMap(parseResultToAstCreator).toSeq) + + val astCreators = ConcurrentTaskUtil + .runUsingThreadPool(batchedParserTasks.iterator) + .flatMap { + case Failure(exception) => logger.warn(s"Unable to parse PHP file batch, skipping - ", exception); Nil + case Success(astCreators) => astCreators + } + + // Pre-parse ASTs on a high level, not including method bodies, etc. + val internalProgramSummary = ConcurrentTaskUtil + .runUsingThreadPool(astCreators.map(x => () => x.summarize).iterator) + .flatMap { + case Failure(exception) => logger.warn(s"Unable to pre-parse PHP file, skipping - ", exception); None + case Success(summary) => Option(summary) + } + .foldLeft(PhpProgramSummary())(_ ++= _) + + // The result are AST creators with a reference to the program summary of all internal symbols (types/methods) + astCreators.map(_.withSummary(internalProgramSummary)) + } + } + } diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreator.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreator.scala index fa11e9b3f98b..251ccc452a3c 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreator.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreator.scala @@ -1,38 +1,34 @@ package io.joern.php2cpg.astcreation -import io.joern.php2cpg.astcreation.AstCreator.{NameConstants, TypeConstants, operatorSymbols} -import io.joern.php2cpg.datastructures.ArrayIndexTracker +import io.joern.php2cpg.datastructures.{PhpProgramSummary, Scope} import io.joern.php2cpg.parser.Domain.* -import io.joern.php2cpg.parser.Domain.PhpModifiers.containsAccessModifier -import io.joern.php2cpg.utils.Scope import io.joern.x2cpg.Ast.storeInDiffGraph -import io.joern.x2cpg.Defines.{StaticInitMethodName, UnresolvedNamespace, UnresolvedSignature} -import io.joern.x2cpg.utils.AstPropertiesUtil.RootProperties -import io.joern.x2cpg.utils.IntervalKeyPool -import io.joern.x2cpg.{Ast, AstCreatorBase, AstNodeBuilder, Defines, ValidationMode} +import io.joern.x2cpg.datastructures.AstParseLevel +import io.joern.x2cpg.{Ast, AstCreatorBase, ValidationMode} import io.shiftleft.codepropertygraph.generated.* import io.shiftleft.codepropertygraph.generated.nodes.* import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal import io.shiftleft.utils.IOUtils import org.slf4j.{Logger, LoggerFactory} -import java.nio.charset.StandardCharsets import java.nio.file.Path -import scala.collection.mutable class AstCreator( protected val relativeFileName: String, - fileName: String, - phpAst: PhpFile, - disableFileContent: Boolean + val fileName: String, + protected val phpAst: PhpFile, + protected val disableFileContent: Boolean, + protected val programSummary: PhpProgramSummary = PhpProgramSummary(), + protected val parseLevel: AstParseLevel = AstParseLevel.FULL_AST )(implicit withSchemaValidation: ValidationMode) extends AstCreatorBase[PhpNode, AstCreator](relativeFileName) - with AstCreatorHelper(disableFileContent) + with AstCreatorHelper with AstForExpressionsCreator with AstForControlStructuresCreator with AstForDeclarationsCreator with AstForFunctionsCreator - with AstForTypesCreator { + with AstForTypesCreator + with AstSummaryVisitor { protected val logger: Logger = LoggerFactory.getLogger(AstCreator.getClass) protected val scope = new Scope()(() => nextClosureName()) @@ -73,7 +69,7 @@ class AstCreator( ) } - private def astForPhpFile(file: PhpFile): Ast = { + protected def astForPhpFile(file: PhpFile): Ast = { val fileNode = NewFile().name(relativeFileName) fileContent.foreach(fileNode.content(_)) @@ -130,7 +126,7 @@ class AstCreator( case useStmt: PhpUseStmt => astForUseStmt(useStmt) :: Nil case groupUseStmt: PhpGroupUseStmt => astForGroupUseStmt(groupUseStmt) :: Nil case foreachStmt: PhpForeachStmt => astForForeachStmt(foreachStmt) :: Nil - case traitUseStmt: PhpTraitUseStmt => astforTraitUseStmt(traitUseStmt) :: Nil + case traitUseStmt: PhpTraitUseStmt => astForTraitUseStmt(traitUseStmt) :: Nil case enumCase: PhpEnumCaseStmt => astForEnumCase(enumCase) :: Nil case staticStmt: PhpStaticStmt => astsForStaticStmt(staticStmt) case unhandled => @@ -139,127 +135,6 @@ class AstCreator( } } - private def astForEchoStmt(echoStmt: PhpEchoStmt): Ast = { - val args = echoStmt.exprs.map(astForExpr) - val code = s"echo ${args.map(_.rootCodeOrEmpty).mkString(",")}" - val callNode = operatorCallNode(echoStmt, code, "echo", None) - callAst(callNode, args) - } - - private def astForNamespaceStmt(stmt: PhpNamespaceStmt): Ast = { - val name = stmt.name.map(_.name).getOrElse(NameConstants.Unknown) - val fullName = s"$relativeFileName:$name" - - val namespaceBlock = NewNamespaceBlock() - .name(name) - .fullName(fullName) - - scope.pushNewScope(namespaceBlock) - val bodyStmts = astsForClassLikeBody(stmt, stmt.stmts, createDefaultConstructor = false) - scope.popScope() - - Ast(namespaceBlock).withChildren(bodyStmts) - } - - private def astForHaltCompilerStmt(stmt: PhpHaltCompilerStmt): Ast = { - val call = - operatorCallNode(stmt, s"${NameConstants.HaltCompiler}()", NameConstants.HaltCompiler, Some(TypeConstants.Void)) - - Ast(call) - } - - private def astForUnsetStmt(stmt: PhpUnsetStmt): Ast = { - val name = PhpOperators.unset - val args = stmt.vars.map(astForExpr) - val code = s"$name(${args.map(_.rootCodeOrEmpty).mkString(", ")})" - val callNode = operatorCallNode(stmt, code, name, Some(TypeConstants.Void)) - .methodFullName(PhpOperators.unset) - callAst(callNode, args) - } - - private def astForGlobalStmt(stmt: PhpGlobalStmt): Ast = { - // This isn't an accurater representation of what `global` does, but with things like `global $$x` being possible, - // it's very difficult to figure out correct scopes for global variables. - - val varsAsts = stmt.vars.map(astForExpr) - val code = s"${PhpOperators.global} ${varsAsts.map(_.rootCodeOrEmpty).mkString(", ")}" - - val globalCallNode = operatorCallNode(stmt, code, PhpOperators.global, Some(TypeConstants.Void)) - - callAst(globalCallNode, varsAsts) - } - - private def astForUseStmt(stmt: PhpUseStmt): Ast = { - // TODO Use useType + scope to get better name info - val imports = stmt.uses.map(astForUseUse(_)) - wrapMultipleInBlock(imports, line(stmt)) - } - - private def astForGroupUseStmt(stmt: PhpGroupUseStmt): Ast = { - // TODO Use useType + scope to get better name info - val groupPrefix = s"${stmt.prefix.name}\\" - val imports = stmt.uses.map(astForUseUse(_, groupPrefix)) - wrapMultipleInBlock(imports, line(stmt)) - } - - private def astforTraitUseStmt(stmt: PhpTraitUseStmt): Ast = { - // TODO Actually implement this - logger.debug( - s"Trait use statement encountered. This is not yet supported. Location: $relativeFileName:${line(stmt)}" - ) - Ast(unknownNode(stmt, code(stmt))) - } - - private def astForUseUse(stmt: PhpUseUse, namePrefix: String = ""): Ast = { - val originalName = s"$namePrefix${stmt.originalName.name}" - val aliasCode = stmt.alias.map(alias => s" as ${alias.name}").getOrElse("") - val typeCode = stmt.useType match { - case PhpUseType.Function => s"function " - case PhpUseType.Constant => s"const " - case _ => "" - } - val code = s"use $typeCode$originalName$aliasCode" - - val importNode = NewImport() - .importedEntity(originalName) - .importedAs(stmt.alias.map(_.name)) - .isExplicit(true) - .code(code) - - Ast(importNode) - } - - private def astsForStaticStmt(stmt: PhpStaticStmt): List[Ast] = { - stmt.vars.flatMap { staticVarDecl => - staticVarDecl.variable match { - case PhpVariable(PhpNameExpr(name, _), _) => - val maybeDefaultValueAst = staticVarDecl.defaultValue.map(astForExpr) - - val code = s"static $$$name" - val typeFullName = maybeDefaultValueAst.flatMap(_.rootType).getOrElse(Defines.Any) - - val local = localNode(stmt, name, code, typeFullName) - scope.addToScope(local.name, local) - - val assignmentAst = maybeDefaultValueAst.map { defaultValue => - val variableNode = identifierNode(stmt, name, s"$$$name", typeFullName) - val variableAst = Ast(variableNode).withRefEdge(variableNode, local) - - val assignCode = s"$code = ${defaultValue.rootCodeOrEmpty}" - val assignNode = operatorCallNode(stmt, assignCode, Operators.assignment, None) - - callAst(assignNode, variableAst :: defaultValue :: Nil) - } - - Ast(local) :: assignmentAst.toList - - case other => - logger.warn(s"Unexpected static variable type $other in $relativeFileName") - Nil - } - } - } - } object AstCreator { diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreatorHelper.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreatorHelper.scala index 81e7715d5dcd..e769beaedc52 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreatorHelper.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreatorHelper.scala @@ -11,7 +11,7 @@ import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal import java.nio.charset.StandardCharsets -trait AstCreatorHelper(disableFileContent: Boolean)(implicit withSchemaValidation: ValidationMode) { this: AstCreator => +trait AstCreatorHelper(implicit withSchemaValidation: ValidationMode) { this: AstCreator => protected val globalNamespace: NewNamespaceBlock = globalNamespaceBlock() diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForControlStructuresCreator.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForControlStructuresCreator.scala index b219001e10c5..a844a827bb43 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForControlStructuresCreator.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForControlStructuresCreator.scala @@ -1,7 +1,7 @@ package io.joern.php2cpg.astcreation -import io.joern.php2cpg.utils.PhpScopeElement import io.joern.php2cpg.astcreation.AstCreator.TypeConstants +import io.joern.php2cpg.datastructures.PhpScopeElement import io.joern.php2cpg.parser.Domain.* import io.joern.x2cpg.Defines.UnresolvedSignature import io.joern.x2cpg.utils.AstPropertiesUtil.RootProperties diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForDeclarationsCreator.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForDeclarationsCreator.scala index 547d06d6aaf7..e879dcf13b4b 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForDeclarationsCreator.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForDeclarationsCreator.scala @@ -1,9 +1,11 @@ package io.joern.php2cpg.astcreation -import io.joern.php2cpg.parser.Domain.{PhpDeclareItem, PhpDeclareStmt, PhpOperators} -import io.joern.x2cpg.{Ast, Defines, ValidationMode} +import io.joern.php2cpg.astcreation.AstCreator.{NameConstants, TypeConstants} +import io.joern.php2cpg.parser.Domain.* import io.joern.x2cpg.utils.AstPropertiesUtil.RootProperties +import io.joern.x2cpg.{Ast, Defines, ValidationMode} import io.shiftleft.codepropertygraph.generated.Operators +import io.shiftleft.codepropertygraph.generated.nodes.{NewImport, NewNamespaceBlock} trait AstForDeclarationsCreator(implicit withSchemaValidation: ValidationMode) { this: AstCreator => @@ -33,4 +35,102 @@ trait AstForDeclarationsCreator(implicit withSchemaValidation: ValidationMode) { callAst(declareAssignment, Ast(key) :: value :: Nil) } + protected def astForGlobalStmt(stmt: PhpGlobalStmt): Ast = { + // This isn't an accurater representation of what `global` does, but with things like `global $$x` being possible, + // it's very difficult to figure out correct scopes for global variables. + + val varsAsts = stmt.vars.map(astForExpr) + val code = s"${PhpOperators.global} ${varsAsts.map(_.rootCodeOrEmpty).mkString(", ")}" + + val globalCallNode = operatorCallNode(stmt, code, PhpOperators.global, Some(TypeConstants.Void)) + + callAst(globalCallNode, varsAsts) + } + + protected def astForNamespaceStmt(stmt: PhpNamespaceStmt): Ast = { + val name = stmt.name.map(_.name).getOrElse(NameConstants.Unknown) + val fullName = s"$relativeFileName:$name" + + val namespaceBlock = NewNamespaceBlock() + .name(name) + .fullName(fullName) + + scope.pushNewScope(namespaceBlock) + val bodyStmts = astsForClassLikeBody(stmt, stmt.stmts, createDefaultConstructor = false) + scope.popScope() + + Ast(namespaceBlock).withChildren(bodyStmts) + } + + protected def astForUseStmt(stmt: PhpUseStmt): Ast = { + // TODO Use useType + scope to get better name info + val imports = stmt.uses.map(astForUseUse(_)) + wrapMultipleInBlock(imports, line(stmt)) + } + + protected def astForGroupUseStmt(stmt: PhpGroupUseStmt): Ast = { + // TODO Use useType + scope to get better name info + val groupPrefix = s"${stmt.prefix.name}\\" + val imports = stmt.uses.map(astForUseUse(_, groupPrefix)) + wrapMultipleInBlock(imports, line(stmt)) + } + + protected def astForTraitUseStmt(stmt: PhpTraitUseStmt): Ast = { + // TODO Actually implement this + logger.debug( + s"Trait use statement encountered. This is not yet supported. Location: $relativeFileName:${line(stmt)}" + ) + Ast(unknownNode(stmt, code(stmt))) + } + + protected def astForUseUse(stmt: PhpUseUse, namePrefix: String = ""): Ast = { + val originalName = s"$namePrefix${stmt.originalName.name}" + val aliasCode = stmt.alias.map(alias => s" as ${alias.name}").getOrElse("") + val typeCode = stmt.useType match { + case PhpUseType.Function => s"function " + case PhpUseType.Constant => s"const " + case _ => "" + } + val code = s"use $typeCode$originalName$aliasCode" + + val importNode = NewImport() + .importedEntity(originalName) + .importedAs(stmt.alias.map(_.name)) + .isExplicit(true) + .code(code) + + Ast(importNode) + } + + protected def astsForStaticStmt(stmt: PhpStaticStmt): List[Ast] = { + stmt.vars.flatMap { staticVarDecl => + staticVarDecl.variable match { + case PhpVariable(PhpNameExpr(name, _), _) => + val maybeDefaultValueAst = staticVarDecl.defaultValue.map(astForExpr) + + val code = s"static $$$name" + val typeFullName = maybeDefaultValueAst.flatMap(_.rootType).getOrElse(Defines.Any) + + val local = localNode(stmt, name, code, typeFullName) + scope.addToScope(local.name, local) + + val assignmentAst = maybeDefaultValueAst.map { defaultValue => + val variableNode = identifierNode(stmt, name, s"$$$name", typeFullName) + val variableAst = Ast(variableNode).withRefEdge(variableNode, local) + + val assignCode = s"$code = ${defaultValue.rootCodeOrEmpty}" + val assignNode = operatorCallNode(stmt, assignCode, Operators.assignment, None) + + callAst(assignNode, variableAst :: defaultValue :: Nil) + } + + Ast(local) :: assignmentAst.toList + + case other => + logger.warn(s"Unexpected static variable type $other in $relativeFileName") + Nil + } + } + } + } diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForExpressionsCreator.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForExpressionsCreator.scala index 66e657ce8b12..140103b51ed5 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForExpressionsCreator.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForExpressionsCreator.scala @@ -867,4 +867,27 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) { .withChild(returnIdentifierAst) } + protected def astForEchoStmt(echoStmt: PhpEchoStmt): Ast = { + val args = echoStmt.exprs.map(astForExpr) + val code = s"echo ${args.map(_.rootCodeOrEmpty).mkString(",")}" + val callNode = operatorCallNode(echoStmt, code, "echo", None) + callAst(callNode, args) + } + + protected def astForHaltCompilerStmt(stmt: PhpHaltCompilerStmt): Ast = { + val call = + operatorCallNode(stmt, s"${NameConstants.HaltCompiler}()", NameConstants.HaltCompiler, Some(TypeConstants.Void)) + + Ast(call) + } + + protected def astForUnsetStmt(stmt: PhpUnsetStmt): Ast = { + val name = PhpOperators.unset + val args = stmt.vars.map(astForExpr) + val code = s"$name(${args.map(_.rootCodeOrEmpty).mkString(", ")})" + val callNode = operatorCallNode(stmt, code, name, Some(TypeConstants.Void)) + .methodFullName(PhpOperators.unset) + callAst(callNode, args) + } + } diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForFunctionsCreator.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForFunctionsCreator.scala index 484f1903866f..38d10047cffb 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForFunctionsCreator.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForFunctionsCreator.scala @@ -4,6 +4,7 @@ import io.joern.php2cpg.astcreation.AstCreator.{NameConstants, TypeConstants} import io.joern.php2cpg.parser.Domain.* import io.joern.php2cpg.parser.Domain.PhpModifiers.containsAccessModifier import io.joern.x2cpg.Defines.UnresolvedSignature +import io.joern.x2cpg.datastructures.AstParseLevel.{FULL_AST, SIGNATURES} import io.joern.x2cpg.utils.AstPropertiesUtil.RootProperties import io.joern.x2cpg.{Ast, Defines, ValidationMode} import io.shiftleft.codepropertygraph.generated.nodes.* @@ -11,73 +12,75 @@ import io.shiftleft.codepropertygraph.generated.{EdgeTypes, EvaluationStrategies trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { this: AstCreator => - protected def astForClosureExpr(closureExpr: PhpClosureExpr): Ast = { - val methodName = scope.getScopedClosureName - val methodRef = methodRefNode(closureExpr, methodName, methodName, Defines.Any) - - val localsForUses = closureExpr.uses.flatMap { closureUse => - closureUse.variable match { - case PhpVariable(PhpNameExpr(name, _), _) => - val typeFullName = scope - .lookupVariable(name) - .flatMap(_.properties.get(PropertyNames.TYPE_FULL_NAME).map(_.toString)) - .getOrElse(Defines.Any) - val byRefPrefix = if (closureUse.byRef) "&" else "" - - Some(localNode(closureExpr, name, s"$byRefPrefix$$$name", typeFullName)) - - case other => - logger.warn(s"Found incorrect closure use variable '$other' in $relativeFileName") - None + protected def astForClosureExpr(closureExpr: PhpClosureExpr): Ast = parseLevel match { + case SIGNATURES => Ast() + case FULL_AST => + val methodName = scope.getScopedClosureName + val methodRef = methodRefNode(closureExpr, methodName, methodName, Defines.Any) + + val localsForUses = closureExpr.uses.flatMap { closureUse => + closureUse.variable match { + case PhpVariable(PhpNameExpr(name, _), _) => + val typeFullName = scope + .lookupVariable(name) + .flatMap(_.properties.get(PropertyNames.TYPE_FULL_NAME).map(_.toString)) + .getOrElse(Defines.Any) + val byRefPrefix = if (closureUse.byRef) "&" else "" + + Some(localNode(closureExpr, name, s"$byRefPrefix$$$name", typeFullName)) + + case other => + logger.warn(s"Found incorrect closure use variable '$other' in $relativeFileName") + None + } } - } - // Add closure bindings to diffgraph - localsForUses.foreach { local => - val closureBindingId = s"$relativeFileName:$methodName:${local.name}" - local.closureBindingId(closureBindingId) - scope.addToScope(local.name, local) + // Add closure bindings to diffgraph + localsForUses.foreach { local => + val closureBindingId = s"$relativeFileName:$methodName:${local.name}" + local.closureBindingId(closureBindingId) + scope.addToScope(local.name, local) - val closureBindingNode = NewClosureBinding() - .closureBindingId(closureBindingId) - .closureOriginalName(local.name) - .evaluationStrategy(EvaluationStrategies.BY_SHARING) + val closureBindingNode = NewClosureBinding() + .closureBindingId(closureBindingId) + .closureOriginalName(local.name) + .evaluationStrategy(EvaluationStrategies.BY_SHARING) - // The ref edge to the captured local is added in the ClosureRefPass - diffGraph.addNode(closureBindingNode) - diffGraph.addEdge(methodRef, closureBindingNode, EdgeTypes.CAPTURE) - } + // The ref edge to the captured local is added in the ClosureRefPass + diffGraph.addNode(closureBindingNode) + diffGraph.addEdge(methodRef, closureBindingNode, EdgeTypes.CAPTURE) + } - // Create method for closure - val name = PhpNameExpr(methodName, closureExpr.attributes) - // TODO Check for static modifier - val modifiers = ModifierTypes.LAMBDA :: (if (closureExpr.isStatic) ModifierTypes.STATIC :: Nil else Nil) - val methodDecl = PhpMethodDecl( - name, - closureExpr.params, - modifiers, - closureExpr.returnType, - closureExpr.stmts, - closureExpr.returnByRef, - namespacedName = None, - isClassMethod = closureExpr.isStatic, - closureExpr.attributes, - List.empty[PhpAttributeGroup] - ) - val methodAst = astForMethodDecl(methodDecl, localsForUses.map(Ast(_)), Option(methodName)) - - val usesCode = localsForUses match { - case Nil => "" - case locals => s" use(${locals.map(_.code).mkString(", ")})" - } - methodAst.root.collect { case method: NewMethod => method }.foreach { methodNode => - methodNode.code(methodNode.code ++ usesCode) - } + // Create method for closure + val name = PhpNameExpr(methodName, closureExpr.attributes) + // TODO Check for static modifier + val modifiers = ModifierTypes.LAMBDA :: (if (closureExpr.isStatic) ModifierTypes.STATIC :: Nil else Nil) + val methodDecl = PhpMethodDecl( + name, + closureExpr.params, + modifiers, + closureExpr.returnType, + closureExpr.stmts, + closureExpr.returnByRef, + namespacedName = None, + isClassMethod = closureExpr.isStatic, + closureExpr.attributes, + List.empty[PhpAttributeGroup] + ) + val methodAst = astForMethodDecl(methodDecl, localsForUses.map(Ast(_)), Option(methodName)) + + val usesCode = localsForUses match { + case Nil => "" + case locals => s" use(${locals.map(_.code).mkString(", ")})" + } + methodAst.root.collect { case method: NewMethod => method }.foreach { methodNode => + methodNode.code(methodNode.code ++ usesCode) + } - // Add method to scope to be attached to typeDecl later - scope.addAnonymousMethod(methodAst) + // Add method to scope to be attached to typeDecl later + scope.addAnonymousMethod(methodAst) - Ast(methodRef) + Ast(methodRef) } protected def astForMethodDecl( @@ -85,49 +88,51 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th bodyPrefixAsts: List[Ast] = Nil, fullNameOverride: Option[String] = None, isConstructor: Boolean = false - ): Ast = { - val isStatic = decl.modifiers.contains(ModifierTypes.STATIC) - val thisParam = if (decl.isClassMethod && !isStatic) { - Option(thisParamAstForMethod(decl)) - } else { - None - } + ): Ast = parseLevel match { + case SIGNATURES => Ast() + case FULL_AST => + val isStatic = decl.modifiers.contains(ModifierTypes.STATIC) + val thisParam = if (decl.isClassMethod && !isStatic) { + Option(thisParamAstForMethod(decl)) + } else { + None + } - val methodName = decl.name.name - val fullName = fullNameOverride.getOrElse(composeMethodFullName(methodName, isStatic)) + val methodName = decl.name.name + val fullName = fullNameOverride.getOrElse(composeMethodFullName(methodName, isStatic)) - val signature = s"$UnresolvedSignature(${decl.params.size})" + val signature = s"$UnresolvedSignature(${decl.params.size})" - val parameters = thisParam.toList ++ decl.params.zipWithIndex.map { case (param, idx) => - astForParam(param, idx + 1) - } + val parameters = thisParam.toList ++ decl.params.zipWithIndex.map { case (param, idx) => + astForParam(param, idx + 1) + } - val constructorModifier = Option.when(isConstructor)(ModifierTypes.CONSTRUCTOR) - val defaultAccessModifier = Option.unless(containsAccessModifier(decl.modifiers))(ModifierTypes.PUBLIC) + val constructorModifier = Option.when(isConstructor)(ModifierTypes.CONSTRUCTOR) + val defaultAccessModifier = Option.unless(containsAccessModifier(decl.modifiers))(ModifierTypes.PUBLIC) - val allModifiers = constructorModifier ++: defaultAccessModifier ++: decl.modifiers - val modifiers = allModifiers.map(modifierNode(decl, _)) - val excludedModifiers = Set(ModifierTypes.MODULE, ModifierTypes.LAMBDA) - val modifierString = decl.modifiers.filterNot(excludedModifiers.contains) match { - case Nil => "" - case mods => s"${mods.mkString(" ")} " - } - val methodCode = s"${modifierString}function $methodName(${parameters.map(_.rootCodeOrEmpty).mkString(",")})" + val allModifiers = constructorModifier ++: defaultAccessModifier ++: decl.modifiers + val modifiers = allModifiers.map(modifierNode(decl, _)) + val excludedModifiers = Set(ModifierTypes.MODULE, ModifierTypes.LAMBDA) + val modifierString = decl.modifiers.filterNot(excludedModifiers.contains) match { + case Nil => "" + case mods => s"${mods.mkString(" ")} " + } + val methodCode = s"${modifierString}function $methodName(${parameters.map(_.rootCodeOrEmpty).mkString(",")})" - val method = methodNode(decl, methodName, methodCode, fullName, Some(signature), relativeFileName) + val method = methodNode(decl, methodName, methodCode, fullName, Some(signature), relativeFileName) - scope.pushNewScope(method) + scope.pushNewScope(method) - val returnType = decl.returnType.map(_.name).getOrElse(Defines.Any) + val returnType = decl.returnType.map(_.name).getOrElse(Defines.Any) - val methodBodyStmts = bodyPrefixAsts ++ decl.stmts.flatMap(astsForStmt) - val methodReturn = methodReturnNode(decl, returnType) + val methodBodyStmts = bodyPrefixAsts ++ decl.stmts.flatMap(astsForStmt) + val methodReturn = methodReturnNode(decl, returnType) - val attributeAsts = decl.attributeGroups.flatMap(astForAttributeGroup) - val methodBody = blockAst(blockNode(decl), methodBodyStmts) + val attributeAsts = decl.attributeGroups.flatMap(astForAttributeGroup) + val methodBody = blockAst(blockNode(decl), methodBodyStmts) - scope.popScope() - methodAstWithAnnotations(method, parameters, methodBody, methodReturn, modifiers, attributeAsts) + scope.popScope() + methodAstWithAnnotations(method, parameters, methodBody, methodReturn, modifiers, attributeAsts) } private def thisParamAstForMethod(originNode: PhpNode): Ast = { diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstSummaryVisitor.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstSummaryVisitor.scala new file mode 100644 index 000000000000..63986cf6e7d4 --- /dev/null +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstSummaryVisitor.scala @@ -0,0 +1,79 @@ +package io.joern.php2cpg.astcreation + +import flatgraph.DiffGraphApplier +import io.joern.php2cpg.datastructures.* +import io.joern.x2cpg.datastructures.AstParseLevel +import io.joern.x2cpg.datastructures.AstParseLevel.{FULL_AST, SIGNATURES} +import io.joern.x2cpg.passes.base.AstLinkerPass +import io.joern.x2cpg.{Ast, ValidationMode} +import io.shiftleft.codepropertygraph.generated.nodes.Namespace +import io.shiftleft.codepropertygraph.generated.{Cpg, nodes} +import io.shiftleft.semanticcpg.language.* +import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal + +import scala.collection.mutable +import scala.util.Using + +trait AstSummaryVisitor(implicit withSchemaValidation: ValidationMode) { this: AstCreator => + + def summarize: PhpProgramSummary = this.parseLevel match { + case FULL_AST => + AstCreator(relativeFileName, fileName, phpAst, disableFileContent, programSummary, SIGNATURES).summarize + case SIGNATURES => + Using.resource(Cpg.empty) { cpg => + // Build and store compilation unit AST + val ast = astForPhpFile(phpAst) + Ast.storeInDiffGraph(ast, diffGraph) + DiffGraphApplier.applyDiff(cpg.graph, diffGraph) + + // Link basic AST elements + AstLinkerPass(cpg).createAndApply() + // Summarize findings + summarize(cpg) + } + } + + def withSummary(newSummary: PhpProgramSummary): AstCreator = { + AstCreator(relativeFileName, fileName, phpAst, disableFileContent, newSummary, parseLevel) + } + + private def summarize(cpg: Cpg): PhpProgramSummary = { + + def toMethod(m: nodes.Method): PhpMethod = { + val definingTypeDeclFullName = m.definingTypeDecl.fullName.headOption + + PhpMethod( + m.name, + m.parameter.map(x => x.name -> x.typeFullName).l, + m.methodReturn.typeFullName, + definingTypeDeclFullName + ) + } + + def toField(f: nodes.Member): PhpField = { + PhpField(f.name, f.typeFullName) + } + + def toType(m: nodes.TypeDecl): PhpType = PhpType(m.fullName, m.method.map(toMethod).l, m.member.map(toField).l) + + val namespaceToTypeMap = cpg.namespaceBlock.map { n => + val phpTypes = n.astChildren.flatMap { + // We have TypeDecl()->Method()->/.../ + case x: nodes.TypeDecl if x.name == NamespaceTraversal.globalNamespaceName => + val classDecls = x.method.isModule.block.astChildren.collect { case classDecl: nodes.TypeDecl => + toType(classDecl) + }.toList + val topLevelMethods = x.method.isModule.block.astChildren.collect { case functionDecl: nodes.Method => + toMethod(functionDecl) + }.toList + val topLevelTypeDecl = PhpType(n.fullName, topLevelMethods, Nil) + topLevelTypeDecl :: classDecls + case x: nodes.Method => PhpType(n.fullName, toMethod(x) :: Nil, Nil) :: Nil + case _ => Nil + }.toSetMutable + n.fullName -> phpTypes + }.toSeq + PhpProgramSummary(mutable.Map(namespaceToTypeMap*)) + } + +} diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/datastructures/PhpProgramSummary.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/datastructures/PhpProgramSummary.scala new file mode 100644 index 000000000000..05d0a69d60bf --- /dev/null +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/datastructures/PhpProgramSummary.scala @@ -0,0 +1,36 @@ +package io.joern.php2cpg.datastructures + +import io.joern.x2cpg.datastructures.{FieldLike, MethodLike, ProgramSummary, TypeLike} + +import scala.annotation.targetName +import scala.collection.mutable + +type NamespaceToTypeMap = mutable.Map[String, mutable.Set[PhpType]] + +class PhpProgramSummary( + override val namespaceToType: NamespaceToTypeMap = mutable.Map.empty[String, mutable.Set[PhpType]] +) extends ProgramSummary[PhpType, PhpMethod, PhpField] { + + @targetName("appendAll") + def ++=(other: PhpProgramSummary): PhpProgramSummary = + PhpProgramSummary(ProgramSummary.merge(this.namespaceToType, other.namespaceToType)) + +} + +case class PhpField(name: String, typeName: String) extends FieldLike + +case class PhpMethod( + name: String, + parameterTypes: List[(String, String)], + returnType: String, + baseTypeFullName: Option[String] +) extends MethodLike + +case class PhpType(name: String, methods: List[PhpMethod], fields: List[PhpField]) + extends TypeLike[PhpMethod, PhpField] { + + @targetName("add") + override def +(o: TypeLike[PhpMethod, PhpField]): TypeLike[PhpMethod, PhpField] = { + this.copy(methods = mergeMethods(o), fields = mergeFields(o)) + } +} diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/utils/ScopeElement.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/datastructures/PhpScopeElement.scala similarity index 90% rename from joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/utils/ScopeElement.scala rename to joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/datastructures/PhpScopeElement.scala index 4a4c44924beb..b8bc8ef2088b 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/utils/ScopeElement.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/datastructures/PhpScopeElement.scala @@ -1,7 +1,7 @@ -package io.joern.php2cpg.utils +package io.joern.php2cpg.datastructures import io.joern.php2cpg.parser.Domain.InstanceMethodDelimiter -import io.shiftleft.codepropertygraph.generated.nodes.{NewBlock, NewMethod, NewNamespaceBlock, NewNode, NewTypeDecl} +import io.shiftleft.codepropertygraph.generated.nodes.* class PhpScopeElement private (val node: NewNode, scopeName: String)(implicit nextClosureName: () => String) { private var tmpVarCounter = 0 diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/utils/Scope.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/datastructures/Scope.scala similarity index 95% rename from joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/utils/Scope.scala rename to joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/datastructures/Scope.scala index 7fd030010c29..3f617f8e3517 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/utils/Scope.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/datastructures/Scope.scala @@ -1,10 +1,10 @@ -package io.joern.php2cpg.utils +package io.joern.php2cpg.datastructures import io.joern.php2cpg.astcreation.AstCreator.NameConstants import io.joern.x2cpg.Ast -import io.joern.x2cpg.datastructures.{ScopeElement, NamespaceLikeScope, Scope as X2CpgScope} +import io.joern.x2cpg.datastructures.{NamespaceLikeScope, ScopeElement, Scope as X2CpgScope} import io.shiftleft.codepropertygraph.generated.NodeTypes -import io.shiftleft.codepropertygraph.generated.nodes.{NewBlock, NewMethod, NewNamespaceBlock, NewNode, NewTypeDecl} +import io.shiftleft.codepropertygraph.generated.nodes.* import org.slf4j.LoggerFactory import scala.collection.mutable diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/parser/PhpParser.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/parser/PhpParser.scala index 3f04a1dc49f8..192e3e4046a1 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/parser/PhpParser.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/parser/PhpParser.scala @@ -12,6 +12,8 @@ import scala.collection.mutable import scala.io.Source import scala.util.{Failure, Success, Try} +case class PhpParseResult(fileName: String, parseResult: Option[PhpFile], infoLines: String) + class PhpParser private (phpParserPath: String, phpIniPath: String, disableFileContent: Boolean) { private val logger = LoggerFactory.getLogger(this.getClass) @@ -21,7 +23,7 @@ class PhpParser private (phpParserPath: String, phpIniPath: String, disableFileC Seq("php", "--php-ini", phpIniPath, phpParserPath) ++ phpParserCommands ++ filenames } - def parseFiles(inputPaths: collection.Seq[String]): collection.Seq[(String, Option[PhpFile], String)] = { + def parseFiles(inputPaths: collection.Seq[String]): collection.Seq[PhpParseResult] = { // We need to keep a map between the input path and its canonical representation in // order to map back the canonical file name we get from the php parser. // Otherwise later on file name/path processing might get confused because the returned @@ -43,7 +45,7 @@ class PhpParser private (phpParserPath: String, phpIniPath: String, disableFileC (filename, jsonToPhpFile(jsonObjectOption, filename), infoLines) } val withRemappedFileName = asPhpFile.map { case (filename, phpFileOption, infoLines) => - (canonicalToInputPath.apply(filename), phpFileOption, infoLines) + PhpParseResult(canonicalToInputPath.apply(filename), phpFileOption, infoLines) } withRemappedFileName case ExternalCommand.ExternalCommandResult(exitCode, _, _) => diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala index 9086f68a0707..e8d5956bab3c 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/passes/AstCreationPass.scala @@ -11,54 +11,20 @@ import org.slf4j.LoggerFactory import java.nio.file.Paths -class AstCreationPass(config: Config, cpg: Cpg, parser: PhpParser)(implicit withSchemaValidation: ValidationMode) - extends ForkJoinParallelCpgPass[Array[String]](cpg) { +class AstCreationPass(cpg: Cpg, astCreators: List[AstCreator]) extends ForkJoinParallelCpgPass[AstCreator](cpg) { private val logger = LoggerFactory.getLogger(this.getClass) - val PhpSourceFileExtensions: Set[String] = Set(".php") + override def generateParts(): Array[AstCreator] = astCreators.toArray - override def generateParts(): Array[Array[String]] = { - val sourceFiles = SourceFiles - .determine( - config.inputPath, - PhpSourceFileExtensions, - ignoredFilesRegex = Option(config.ignoredFilesRegex), - ignoredFilesPath = Option(config.ignoredFiles) - ) - .toArray - - // We need to feed the php parser big groups of file in order - // to speed up the parsing. Apparently it is some sort of slow - // startup phase which makes single file processing prohibitively - // slow. - // On the other hand we need to be careful to not choose too big - // chunks because: - // 1. The argument length to the php executable has system - // dependent limits - // 2. We want to make use of multiple CPU cores for the rest - // of the CPG creation. - // - val parts = sourceFiles.grouped(20).toArray - parts - } - - override def runOnPart(diffGraph: DiffGraphBuilder, filenames: Array[String]): Unit = { - parser.parseFiles(filenames).foreach { case (filename, parseResult, infoLines) => - parseResult match { - case Some(parseResult) => - val relativeFilename = if (filename == config.inputPath) { - Paths.get(filename).fileName - } else { - Paths.get(config.inputPath).relativize(Paths.get(filename)).toString - } - diffGraph.absorb( - new AstCreator(relativeFilename, filename, parseResult, config.disableFileContent)(config.schemaValidation) - .createAst() - ) - case None => - logger.warn(s"Could not parse file $filename. Results will be missing!") - } + override def runOnPart(diffGraph: DiffGraphBuilder, astCreator: AstCreator): Unit = { + try { + val ast = astCreator.createAst() + diffGraph.absorb(ast) + } catch { + case ex: Exception => + logger.error(s"Error while processing AST for file - ${astCreator.fileName} - ", ex) } } + } diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala index 7dc9a4a39996..d172e05ebefc 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala @@ -4,7 +4,8 @@ import io.joern.rubysrc2cpg.astcreation.RubyIntermediateAst.* import io.joern.rubysrc2cpg.datastructures.{BlockScope, NamespaceScope, RubyProgramSummary, RubyScope} import io.joern.rubysrc2cpg.passes.Defines import io.joern.rubysrc2cpg.utils.FreshNameGenerator -import io.joern.x2cpg.{Ast, AstCreatorBase, AstNodeBuilder, ValidationMode} +import io.joern.x2cpg.datastructures.AstParseLevel +import io.joern.x2cpg.{Ast, AstCreatorBase, ValidationMode} import io.shiftleft.codepropertygraph.generated.nodes.* import io.shiftleft.codepropertygraph.generated.{DiffGraphBuilder, EvaluationStrategies, ModifierTypes} import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal @@ -133,16 +134,3 @@ class AstCreator( } } - -/** Determines till what depth the AST creator will parse until. - */ -enum AstParseLevel { - - /** This level will parse all types and methods signatures, but exclude method bodies. - */ - case SIGNATURES - - /** This level will parse the full AST. - */ - case FULL_AST -} diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForFunctionsCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForFunctionsCreator.scala index 79099abe5c53..d69d83d41fc0 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForFunctionsCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForFunctionsCreator.scala @@ -15,6 +15,7 @@ import io.shiftleft.codepropertygraph.generated.{ Operators } import io.joern.x2cpg.AstNodeBuilder.{bindingNode, closureBindingNode} +import io.joern.x2cpg.datastructures.AstParseLevel import scala.collection.mutable diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForTypesCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForTypesCreator.scala index eaf763043731..a6a4ddc12bc3 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForTypesCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForTypesCreator.scala @@ -3,6 +3,7 @@ package io.joern.rubysrc2cpg.astcreation import io.joern.rubysrc2cpg.astcreation.RubyIntermediateAst.{TypeDeclaration, *} import io.joern.rubysrc2cpg.datastructures.{BlockScope, MethodScope, ModuleScope, NamespaceScope, TypeScope} import io.joern.rubysrc2cpg.passes.Defines +import io.joern.x2cpg.datastructures.AstParseLevel import io.joern.x2cpg.{Ast, ValidationMode} import io.shiftleft.codepropertygraph.generated.nodes.* import io.shiftleft.codepropertygraph.generated.{ diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstSummaryVisitor.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstSummaryVisitor.scala index 7486bf43dac6..c45c457a9802 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstSummaryVisitor.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstSummaryVisitor.scala @@ -1,13 +1,11 @@ package io.joern.rubysrc2cpg.astcreation import flatgraph.DiffGraphApplier -import io.joern.rubysrc2cpg.astcreation.RubyIntermediateAst.{RubyExpression, StatementList} -import io.joern.rubysrc2cpg.datastructures.{RubyField, RubyMethod, RubyProgramSummary, RubyStubbedType, RubyType} +import io.joern.rubysrc2cpg.datastructures.* import io.joern.rubysrc2cpg.passes.Defines -import io.joern.x2cpg.layers.Base -import io.joern.x2cpg.passes.base.{AstLinkerPass, FileCreationPass} +import io.joern.x2cpg.datastructures.AstParseLevel +import io.joern.x2cpg.passes.base.AstLinkerPass import io.joern.x2cpg.{Ast, ValidationMode} -import io.shiftleft.codepropertygraph.cpgloading.CpgLoader import io.shiftleft.codepropertygraph.generated.Cpg import io.shiftleft.codepropertygraph.generated.nodes.{Local, Member, Method, TypeDecl} import io.shiftleft.semanticcpg.language.* diff --git a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/datastructures/ProgramSummary.scala b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/datastructures/ProgramSummary.scala index 13e381ee5415..d4b28a511c11 100644 --- a/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/datastructures/ProgramSummary.scala +++ b/joern-cli/frontends/x2cpg/src/main/scala/io/joern/x2cpg/datastructures/ProgramSummary.scala @@ -62,9 +62,8 @@ object ProgramSummary { ): mutable.Map[String, mutable.Set[T]] = { def dedupTypesInPlace(m: mutable.Map[String, mutable.Set[T]]): Unit = { - val newMap = m - .map { case (namespace, ts) => namespace -> ts.groupBy(_.name) } - .map { case (namespace, typMap) => + m.map { case (namespace, ts) => namespace -> ts.groupBy(_.name) } + .foreach { case (namespace, typMap) => val dedupedTypes = mutable.Set.from( typMap .map { case (name, ts) => name -> ts.reduce((u, v) => (u + v).asInstanceOf[T]) } @@ -72,10 +71,8 @@ object ProgramSummary { .toSet ) m.put(namespace, dedupedTypes) - namespace -> dedupedTypes } - .toMap - assert(m.flatMap { case (name, ts) => ts.groupBy(_.name).map(_._2.size) }.forall(_ == 1)) + assert(m.flatMap { case (_, ts) => ts.groupBy(_.name).map(_._2.size) }.forall(_ == 1)) } // Handle duplicate types sharing the same namespace. This can be introduced from serialized type stubs. @@ -107,6 +104,19 @@ object ProgramSummary { } +/** Determines till what depth the AST creator will parse until. + */ +enum AstParseLevel { + + /** This level will parse all types and methods signatures, but exclude method bodies. + */ + case SIGNATURES + + /** This level will parse the full AST. + */ + case FULL_AST +} + /** Extends the capability of the scope object to track types in scope as provide type resolution. * * @tparam M From def95a7bdadd951ed7b71bd3fb05609c6ce2e212 Mon Sep 17 00:00:00 2001 From: David Baker Effendi Date: Mon, 5 May 2025 12:52:16 +0200 Subject: [PATCH 2/3] Have basic test asserting PHP summary collects files correctly --- .../main/scala/io/joern/php2cpg/Php2Cpg.scala | 2 +- .../php2cpg/astcreation/AstCreator.scala | 2 +- .../astcreation/AstForFunctionsCreator.scala | 192 +++++++++--------- .../joern/php2cpg/datastructures/Scope.scala | 8 + .../php2cpg/passes/PhpSummaryPassTests.scala | 59 ++++++ .../testfixtures/PhpCode2CpgFixture.scala | 2 +- 6 files changed, 166 insertions(+), 99 deletions(-) create mode 100644 joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpSummaryPassTests.scala diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala index 64eb19c1cf34..01ec4df66f1c 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/Php2Cpg.scala @@ -102,7 +102,7 @@ class Php2Cpg extends X2CpgFrontend[Config] { * 1. The argument length to the php executable has system dependent limits 2. We want to make use of multiple CPU * cores for the rest of the CPG creation. */ - private def parseFiles(config: Config, maybeParser: Option[PhpParser]): List[AstCreator] = { + private[php2cpg] def parseFiles(config: Config, maybeParser: Option[PhpParser]): List[AstCreator] = { def parseResultToAstCreator(parseResult: PhpParseResult): Option[AstCreator] = { parseResult match { diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreator.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreator.scala index 251ccc452a3c..86eb483f157b 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreator.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstCreator.scala @@ -18,7 +18,7 @@ class AstCreator( val fileName: String, protected val phpAst: PhpFile, protected val disableFileContent: Boolean, - protected val programSummary: PhpProgramSummary = PhpProgramSummary(), + protected[php2cpg] val programSummary: PhpProgramSummary = PhpProgramSummary(), protected val parseLevel: AstParseLevel = AstParseLevel.FULL_AST )(implicit withSchemaValidation: ValidationMode) extends AstCreatorBase[PhpNode, AstCreator](relativeFileName) diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForFunctionsCreator.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForFunctionsCreator.scala index 38d10047cffb..76b70ddb206a 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForFunctionsCreator.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/astcreation/AstForFunctionsCreator.scala @@ -12,75 +12,73 @@ import io.shiftleft.codepropertygraph.generated.{EdgeTypes, EvaluationStrategies trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { this: AstCreator => - protected def astForClosureExpr(closureExpr: PhpClosureExpr): Ast = parseLevel match { - case SIGNATURES => Ast() - case FULL_AST => - val methodName = scope.getScopedClosureName - val methodRef = methodRefNode(closureExpr, methodName, methodName, Defines.Any) - - val localsForUses = closureExpr.uses.flatMap { closureUse => - closureUse.variable match { - case PhpVariable(PhpNameExpr(name, _), _) => - val typeFullName = scope - .lookupVariable(name) - .flatMap(_.properties.get(PropertyNames.TYPE_FULL_NAME).map(_.toString)) - .getOrElse(Defines.Any) - val byRefPrefix = if (closureUse.byRef) "&" else "" - - Some(localNode(closureExpr, name, s"$byRefPrefix$$$name", typeFullName)) - - case other => - logger.warn(s"Found incorrect closure use variable '$other' in $relativeFileName") - None - } + protected def astForClosureExpr(closureExpr: PhpClosureExpr): Ast = { + val methodName = scope.getScopedClosureName + val methodRef = methodRefNode(closureExpr, methodName, methodName, Defines.Any) + + val localsForUses = closureExpr.uses.flatMap { closureUse => + closureUse.variable match { + case PhpVariable(PhpNameExpr(name, _), _) => + val typeFullName = scope + .lookupVariable(name) + .flatMap(_.properties.get(PropertyNames.TYPE_FULL_NAME).map(_.toString)) + .getOrElse(Defines.Any) + val byRefPrefix = if (closureUse.byRef) "&" else "" + + Some(localNode(closureExpr, name, s"$byRefPrefix$$$name", typeFullName)) + + case other => + logger.warn(s"Found incorrect closure use variable '$other' in $relativeFileName") + None } + } - // Add closure bindings to diffgraph - localsForUses.foreach { local => - val closureBindingId = s"$relativeFileName:$methodName:${local.name}" - local.closureBindingId(closureBindingId) - scope.addToScope(local.name, local) + // Add closure bindings to diffgraph + localsForUses.foreach { local => + val closureBindingId = s"$relativeFileName:$methodName:${local.name}" + local.closureBindingId(closureBindingId) + scope.addToScope(local.name, local) - val closureBindingNode = NewClosureBinding() - .closureBindingId(closureBindingId) - .closureOriginalName(local.name) - .evaluationStrategy(EvaluationStrategies.BY_SHARING) + val closureBindingNode = NewClosureBinding() + .closureBindingId(closureBindingId) + .closureOriginalName(local.name) + .evaluationStrategy(EvaluationStrategies.BY_SHARING) - // The ref edge to the captured local is added in the ClosureRefPass - diffGraph.addNode(closureBindingNode) - diffGraph.addEdge(methodRef, closureBindingNode, EdgeTypes.CAPTURE) - } + // The ref edge to the captured local is added in the ClosureRefPass + diffGraph.addNode(closureBindingNode) + diffGraph.addEdge(methodRef, closureBindingNode, EdgeTypes.CAPTURE) + } - // Create method for closure - val name = PhpNameExpr(methodName, closureExpr.attributes) - // TODO Check for static modifier - val modifiers = ModifierTypes.LAMBDA :: (if (closureExpr.isStatic) ModifierTypes.STATIC :: Nil else Nil) - val methodDecl = PhpMethodDecl( - name, - closureExpr.params, - modifiers, - closureExpr.returnType, - closureExpr.stmts, - closureExpr.returnByRef, - namespacedName = None, - isClassMethod = closureExpr.isStatic, - closureExpr.attributes, - List.empty[PhpAttributeGroup] - ) - val methodAst = astForMethodDecl(methodDecl, localsForUses.map(Ast(_)), Option(methodName)) - - val usesCode = localsForUses match { - case Nil => "" - case locals => s" use(${locals.map(_.code).mkString(", ")})" - } - methodAst.root.collect { case method: NewMethod => method }.foreach { methodNode => - methodNode.code(methodNode.code ++ usesCode) - } + // Create method for closure + val name = PhpNameExpr(methodName, closureExpr.attributes) + // TODO Check for static modifier + val modifiers = ModifierTypes.LAMBDA :: (if (closureExpr.isStatic) ModifierTypes.STATIC :: Nil else Nil) + val methodDecl = PhpMethodDecl( + name, + closureExpr.params, + modifiers, + closureExpr.returnType, + closureExpr.stmts, + closureExpr.returnByRef, + namespacedName = None, + isClassMethod = closureExpr.isStatic, + closureExpr.attributes, + List.empty[PhpAttributeGroup] + ) + val methodAst = astForMethodDecl(methodDecl, localsForUses.map(Ast(_)), Option(methodName)) + + val usesCode = localsForUses match { + case Nil => "" + case locals => s" use(${locals.map(_.code).mkString(", ")})" + } + methodAst.root.collect { case method: NewMethod => method }.foreach { methodNode => + methodNode.code(methodNode.code ++ usesCode) + } - // Add method to scope to be attached to typeDecl later - scope.addAnonymousMethod(methodAst) + // Add method to scope to be attached to typeDecl later + scope.addAnonymousMethod(methodAst) - Ast(methodRef) + Ast(methodRef) } protected def astForMethodDecl( @@ -88,51 +86,53 @@ trait AstForFunctionsCreator(implicit withSchemaValidation: ValidationMode) { th bodyPrefixAsts: List[Ast] = Nil, fullNameOverride: Option[String] = None, isConstructor: Boolean = false - ): Ast = parseLevel match { - case SIGNATURES => Ast() - case FULL_AST => - val isStatic = decl.modifiers.contains(ModifierTypes.STATIC) - val thisParam = if (decl.isClassMethod && !isStatic) { - Option(thisParamAstForMethod(decl)) - } else { - None - } + ): Ast = { + val isStatic = decl.modifiers.contains(ModifierTypes.STATIC) + val thisParam = if (decl.isClassMethod && !isStatic) { + Option(thisParamAstForMethod(decl)) + } else { + None + } - val methodName = decl.name.name - val fullName = fullNameOverride.getOrElse(composeMethodFullName(methodName, isStatic)) + val methodName = decl.name.name + val fullName = fullNameOverride.getOrElse(composeMethodFullName(methodName, isStatic)) - val signature = s"$UnresolvedSignature(${decl.params.size})" + val signature = s"$UnresolvedSignature(${decl.params.size})" - val parameters = thisParam.toList ++ decl.params.zipWithIndex.map { case (param, idx) => - astForParam(param, idx + 1) - } + val parameters = thisParam.toList ++ decl.params.zipWithIndex.map { case (param, idx) => + astForParam(param, idx + 1) + } - val constructorModifier = Option.when(isConstructor)(ModifierTypes.CONSTRUCTOR) - val defaultAccessModifier = Option.unless(containsAccessModifier(decl.modifiers))(ModifierTypes.PUBLIC) + val constructorModifier = Option.when(isConstructor)(ModifierTypes.CONSTRUCTOR) + val defaultAccessModifier = Option.unless(containsAccessModifier(decl.modifiers))(ModifierTypes.PUBLIC) - val allModifiers = constructorModifier ++: defaultAccessModifier ++: decl.modifiers - val modifiers = allModifiers.map(modifierNode(decl, _)) - val excludedModifiers = Set(ModifierTypes.MODULE, ModifierTypes.LAMBDA) - val modifierString = decl.modifiers.filterNot(excludedModifiers.contains) match { - case Nil => "" - case mods => s"${mods.mkString(" ")} " - } - val methodCode = s"${modifierString}function $methodName(${parameters.map(_.rootCodeOrEmpty).mkString(",")})" + val allModifiers = constructorModifier ++: defaultAccessModifier ++: decl.modifiers + val modifiers = allModifiers.map(modifierNode(decl, _)) + val excludedModifiers = Set(ModifierTypes.MODULE, ModifierTypes.LAMBDA) + val modifierString = decl.modifiers.filterNot(excludedModifiers.contains) match { + case Nil => "" + case mods => s"${mods.mkString(" ")} " + } + val methodCode = s"${modifierString}function $methodName(${parameters.map(_.rootCodeOrEmpty).mkString(",")})" - val method = methodNode(decl, methodName, methodCode, fullName, Some(signature), relativeFileName) + val method = methodNode(decl, methodName, methodCode, fullName, Some(signature), relativeFileName) + val isTopLevel = scope.isTopLevel - scope.pushNewScope(method) + scope.pushNewScope(method) - val returnType = decl.returnType.map(_.name).getOrElse(Defines.Any) + val returnType = decl.returnType.map(_.name).getOrElse(Defines.Any) - val methodBodyStmts = bodyPrefixAsts ++ decl.stmts.flatMap(astsForStmt) - val methodReturn = methodReturnNode(decl, returnType) + val methodBodyStmts = bodyPrefixAsts ++ decl.stmts.flatMap(astsForStmt) + val methodReturn = methodReturnNode(decl, returnType) - val attributeAsts = decl.attributeGroups.flatMap(astForAttributeGroup) - val methodBody = blockAst(blockNode(decl), methodBodyStmts) + val attributeAsts = decl.attributeGroups.flatMap(astForAttributeGroup) + val methodBody = parseLevel match { + case SIGNATURES if !isTopLevel => blockAst(blockNode(decl), Nil) + case _ => blockAst(blockNode(decl), methodBodyStmts) + } - scope.popScope() - methodAstWithAnnotations(method, parameters, methodBody, methodReturn, modifiers, attributeAsts) + scope.popScope() + methodAstWithAnnotations(method, parameters, methodBody, methodReturn, modifiers, attributeAsts) } private def thisParamAstForMethod(originNode: PhpNode): Ast = { diff --git a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/datastructures/Scope.scala b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/datastructures/Scope.scala index 3f617f8e3517..8daf5ade992b 100644 --- a/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/datastructures/Scope.scala +++ b/joern-cli/frontends/php2cpg/src/main/scala/io/joern/php2cpg/datastructures/Scope.scala @@ -5,6 +5,7 @@ import io.joern.x2cpg.Ast import io.joern.x2cpg.datastructures.{NamespaceLikeScope, ScopeElement, Scope as X2CpgScope} import io.shiftleft.codepropertygraph.generated.NodeTypes import io.shiftleft.codepropertygraph.generated.nodes.* +import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal import org.slf4j.LoggerFactory import scala.collection.mutable @@ -70,6 +71,13 @@ class Scope(implicit nextClosureName: () => String) extends X2CpgScope[String, N scopeNode } + /** @return + * true if the current scope is top-level, i.e., a direct child of the PHP script. + */ + def isTopLevel: Boolean = stack.collectFirst { case ScopeElement(PhpScopeElement(x: NewTypeDecl), _) => + x.name.endsWith(NamespaceTraversal.globalNamespaceName) + }.isDefined + def getNewClassTmp: String = { stack.headOption match { case Some(node) => diff --git a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpSummaryPassTests.scala b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpSummaryPassTests.scala new file mode 100644 index 000000000000..d1455cdb3cff --- /dev/null +++ b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpSummaryPassTests.scala @@ -0,0 +1,59 @@ +package io.joern.php2cpg.passes + +import io.joern.php2cpg.datastructures.{PhpMethod, PhpProgramSummary} +import io.joern.php2cpg.parser.PhpParser +import io.joern.php2cpg.{Config, Php2Cpg} +import io.shiftleft.semanticcpg.utils.FileUtil +import org.scalatest.Assertion +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import java.nio.file.Files + +class PhpSummaryPassTests extends AnyWordSpec with Matchers { + + import PhpSummaryPassTests.* + + "pre-parsing a file with a top-level function should provide a summary of that function" in { + assertAgainstTempFile( + """ + val summary = programSummary.namespaceToType + summary.size shouldBe 1 + val globalType = summary("Test.php:").head + globalType.name shouldBe "Test.php:" + globalType.methods should contain( + PhpMethod("foo", List(("a", "ANY"), ("b", "ANY")), "int", Some("Test.php:")) + ) + } + ) + } + +} + +object PhpSummaryPassTests { + + import FileUtil.PathExt + + case class ConfigAndParser(config: Config, parser: PhpParser) + + def assertAgainstTempFile(code: String, assertion: PhpProgramSummary => Assertion): Unit = { + FileUtil.usingTemporaryDirectory("php-test") { tmpDirPath => + val tmpFilePath = tmpDirPath / "Test.php" + Files.createFile(tmpFilePath) + FileUtil.writeBytes(tmpFilePath, code.getBytes) + val config = Config().withInputPath(tmpFilePath.toString) + PhpParser.getParser(config) match { + case Some(parser) => + new Php2Cpg().parseFiles(config, Option(parser)).map(_.programSummary).headOption match { + case Some(summary) => assertion(summary) + case None => Matchers.fail(s"Unable to obtain summary from given code! See logs for details.") + } + case None => Matchers.fail(s"Unable to create a PHP parser! See logs for details.") + } + } + } + +} diff --git a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala index 0cf55b76fc97..49714fb4a43e 100644 --- a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala +++ b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/testfixtures/PhpCode2CpgFixture.scala @@ -1,7 +1,7 @@ package io.joern.php2cpg.testfixtures import io.joern.dataflowengineoss.DefaultSemantics -import io.joern.dataflowengineoss.semanticsloader.{FlowSemantic, Semantics} +import io.joern.dataflowengineoss.semanticsloader.Semantics import io.joern.dataflowengineoss.testfixtures.{SemanticCpgTestFixture, SemanticTestCpg} import io.joern.php2cpg.{Config, Php2Cpg} import io.joern.x2cpg.frontendspecific.php2cpg From 13beee02841488fa9fc244f03a8bc2666c75554b Mon Sep 17 00:00:00 2001 From: David Baker Effendi Date: Mon, 5 May 2025 13:08:01 +0200 Subject: [PATCH 3/3] Added tests for classes, methods, and fields --- .../php2cpg/passes/PhpSummaryPassTests.scala | 41 +++++++++++++++++-- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpSummaryPassTests.scala b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpSummaryPassTests.scala index d1455cdb3cff..9d9412098cc9 100644 --- a/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpSummaryPassTests.scala +++ b/joern-cli/frontends/php2cpg/src/test/scala/io/joern/php2cpg/passes/PhpSummaryPassTests.scala @@ -1,6 +1,6 @@ package io.joern.php2cpg.passes -import io.joern.php2cpg.datastructures.{PhpMethod, PhpProgramSummary} +import io.joern.php2cpg.datastructures.{PhpField, PhpMethod, PhpProgramSummary} import io.joern.php2cpg.parser.PhpParser import io.joern.php2cpg.{Config, Php2Cpg} import io.shiftleft.semanticcpg.utils.FileUtil @@ -17,7 +17,7 @@ class PhpSummaryPassTests extends AnyWordSpec with Matchers { "pre-parsing a file with a top-level function should provide a summary of that function" in { assertAgainstTempFile( """ val summary = programSummary.namespaceToType @@ -25,12 +25,47 @@ class PhpSummaryPassTests extends AnyWordSpec with Matchers { val globalType = summary("Test.php:").head globalType.name shouldBe "Test.php:" globalType.methods should contain( - PhpMethod("foo", List(("a", "ANY"), ("b", "ANY")), "int", Some("Test.php:")) + PhpMethod("foo", List(("a", "ANY"), ("b", "string")), "int", Some("Test.php:")) ) } ) } + "pre-parsing a file with a top-level class with a nested function should provide a summary of that class" in { + assertAgainstTempFile( + """ + val summary = programSummary.namespaceToType + summary.size shouldBe 1 + val fooType = summary("Test.php:").filter(_.name == "Foo").head + fooType.methods should contain(PhpMethod("foo", List(("this", "Foo"), ("x", "int")), "int", Some("Foo"))) + fooType.methods should contain(PhpMethod("__construct", List(("this", "Foo")), "ANY", Some("Foo"))) + } + ) + } + + "pre-parsing a file with a top-level class with a nested constant should provide a summary of that class" in { + assertAgainstTempFile( + """ + val summary = programSummary.namespaceToType + summary.size shouldBe 1 + val fooType = summary("Test.php:").filter(_.name == "Foo").head + fooType.fields should contain(PhpField("B", "ANY")) + } + ) + } + } object PhpSummaryPassTests {