Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use effective pom to analyze pom.xml #4710

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cli/azd/internal/appdetect/appdetect.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (

"github.com/azure/azure-dev/cli/azd/pkg/exec"
"github.com/azure/azure-dev/cli/azd/pkg/tools/dotnet"
"github.com/azure/azure-dev/cli/azd/pkg/tools/maven"
"github.com/bmatcuk/doublestar/v4"
)

Expand Down Expand Up @@ -179,7 +180,8 @@ type projectDetector interface {
var allDetectors = []projectDetector{
// Order here determines precedence when two projects are in the same directory.
// This is unlikely to occur in practice, but reordering could help to break the tie in these cases.
&javaDetector{},
&javaDetector{
mvnCli: maven.NewCli(exec.NewCommandRunner(nil))},
&dotNetAppHostDetector{
// TODO(ellismg): Remove ambient authority.
dotnetCli: dotnet.NewCli(exec.NewCommandRunner(nil)),
Expand Down
105 changes: 6 additions & 99 deletions cli/azd/internal/appdetect/java.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,16 @@ package appdetect

import (
"context"
"encoding/xml"
"fmt"
"io/fs"
"maps"
"os"
"path/filepath"
"slices"
"strings"

"github.com/azure/azure-dev/cli/azd/pkg/tools/maven"
)

type javaDetector struct {
rootProjects []mavenProject
mvnCli *maven.Cli
}

func (jd *javaDetector) Language() Language {
Expand All @@ -23,28 +21,18 @@ func (jd *javaDetector) Language() Language {
func (jd *javaDetector) DetectProject(ctx context.Context, path string, entries []fs.DirEntry) (*Project, error) {
for _, entry := range entries {
if strings.ToLower(entry.Name()) == "pom.xml" {
pomFile := filepath.Join(path, entry.Name())
project, err := readMavenProject(pomFile)
pomFilePath := filepath.Join(path, entry.Name())
project, err := toMavenProject(ctx, jd.mvnCli, pomFilePath)
if err != nil {
return nil, fmt.Errorf("error reading pom.xml: %w", err)
}

if len(project.Modules) > 0 {
if len(project.pom.Modules) > 0 {
// This is a multi-module project, we will capture the analysis, but return nil
// to continue recursing
jd.rootProjects = append(jd.rootProjects, *project)
return nil, nil
}

var currentRoot *mavenProject
for _, rootProject := range jd.rootProjects {
// we can say that the project is in the root project if the path is under the project
if inRoot := strings.HasPrefix(pomFile, rootProject.path); inRoot {
currentRoot = &rootProject
}
}

_ = currentRoot // use currentRoot here in the analysis
result, err := detectDependencies(project, &Project{
Language: Java,
Path: path,
Expand All @@ -60,84 +48,3 @@ func (jd *javaDetector) DetectProject(ctx context.Context, path string, entries

return nil, nil
}

// mavenProject represents the top-level structure of a Maven POM file.
type mavenProject struct {
XmlName xml.Name `xml:"project"`
Parent parent `xml:"parent"`
Modules []string `xml:"modules>module"` // Capture the modules
Dependencies []dependency `xml:"dependencies>dependency"`
DependencyManagement dependencyManagement `xml:"dependencyManagement"`
Build build `xml:"build"`
path string
}

// Parent represents the parent POM if this project is a module.
type parent struct {
GroupId string `xml:"groupId"`
ArtifactId string `xml:"artifactId"`
Version string `xml:"version"`
}

// Dependency represents a single Maven dependency.
type dependency struct {
GroupId string `xml:"groupId"`
ArtifactId string `xml:"artifactId"`
Version string `xml:"version"`
Scope string `xml:"scope,omitempty"`
}

// DependencyManagement includes a list of dependencies that are managed.
type dependencyManagement struct {
Dependencies []dependency `xml:"dependencies>dependency"`
}

// Build represents the build configuration which can contain plugins.
type build struct {
Plugins []plugin `xml:"plugins>plugin"`
}

// Plugin represents a build plugin.
type plugin struct {
GroupId string `xml:"groupId"`
ArtifactId string `xml:"artifactId"`
Version string `xml:"version"`
}

func readMavenProject(filePath string) (*mavenProject, error) {
bytes, err := os.ReadFile(filePath)
if err != nil {
return nil, err
}

var project mavenProject
if err := xml.Unmarshal(bytes, &project); err != nil {
return nil, fmt.Errorf("parsing xml: %w", err)
}

project.path = filepath.Dir(filePath)

return &project, nil
}

func detectDependencies(mavenProject *mavenProject, project *Project) (*Project, error) {
databaseDepMap := map[DatabaseDep]struct{}{}
for _, dep := range mavenProject.Dependencies {
if dep.GroupId == "com.mysql" && dep.ArtifactId == "mysql-connector-j" {
databaseDepMap[DbMySql] = struct{}{}
}

if dep.GroupId == "org.postgresql" && dep.ArtifactId == "postgresql" {
databaseDepMap[DbPostgres] = struct{}{}
}
}

if len(databaseDepMap) > 0 {
project.DatabaseDeps = slices.SortedFunc(maps.Keys(databaseDepMap),
func(a, b DatabaseDep) int {
return strings.Compare(string(a), string(b))
})
}

return project, nil
}
44 changes: 44 additions & 0 deletions cli/azd/internal/appdetect/maven_project.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package appdetect

import (
"context"
"maps"
"slices"
"strings"

"github.com/azure/azure-dev/cli/azd/pkg/tools/maven"
)

type mavenProject struct {
pom pom
}
Comment on lines +12 to +14
Copy link
Contributor

@weikanglim weikanglim Jan 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see us adding more properties here... Are we okay keeping what we have to avoid code churn?

Copy link
Member Author

@rujche rujche Jan 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm OK to delete mavenProject and use pom directly.

Hi, @saragluna , This option is required by you before. Now if you agree, I'll delete mavenProject and use pom directly. I perfer pom instead of mavenProject because it's unmarshalled from pom.xml

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you could go ahead please. But what I meant (insisted) is we should call this concept a maven project, instead of just a pom, which conceptually should be under the maven project.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To make the PR been merged as soon as possible, I'll revert all refactor-related changes, just keep necessary changes.


func toMavenProject(ctx context.Context, mvnCli *maven.Cli, pomFilePath string) (mavenProject, error) {
pom, err := toPom(ctx, mvnCli, pomFilePath)
if err != nil {
return mavenProject{}, err
}
return mavenProject{pom: pom}, nil
}

func detectDependencies(mavenProject mavenProject, project *Project) (*Project, error) {
databaseDepMap := map[DatabaseDep]struct{}{}
for _, dep := range mavenProject.pom.Dependencies {
if dep.GroupId == "com.mysql" && dep.ArtifactId == "mysql-connector-j" {
databaseDepMap[DbMySql] = struct{}{}
}

if dep.GroupId == "org.postgresql" && dep.ArtifactId == "postgresql" {
databaseDepMap[DbPostgres] = struct{}{}
}
}

if len(databaseDepMap) > 0 {
project.DatabaseDeps = slices.SortedFunc(maps.Keys(databaseDepMap),
func(a, b DatabaseDep) int {
return strings.Compare(string(a), string(b))
})
}

return project, nil
}
94 changes: 94 additions & 0 deletions cli/azd/internal/appdetect/pom.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package appdetect
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a general go programming convention here, I would like to avoid adding more files like maven_project.go or
pom.go here. We can fit all under java.go. In the future, we could rename this to java_maven.go when gradle support is added.

Copy link
Member Author

@rujche rujche Jan 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a general go programming convention here

  1. Thanks for telling me that. I didn't know this convention before. BTW, could you please share related link about this convention? I guess there should be some official doc like this one: https://go.dev/doc/effective_go
  2. I use different files because:
    2.1. java and pom are different things in concept. Especially for type pom struct, it's unmarshalled from pom.xml, it's a standalone concept.
    2.2. Separate codes into multiple file can avoid one file too long. Too long file will make it hard to maintain.


import (
"context"
"encoding/xml"
"fmt"
"os"
"path/filepath"

"github.com/azure/azure-dev/cli/azd/pkg/tools/maven"
)

func toPom(ctx context.Context, mvnCli *maven.Cli, pomFilePath string) (pom, error) {
result, err := toEffectivePom(ctx, mvnCli, pomFilePath)
if err == nil {
result.path = filepath.Dir(pomFilePath)
return result, nil
}

result, err = unmarshalPomFile(pomFilePath)
if err == nil {
result.path = filepath.Dir(pomFilePath)
// todo: handle pom, for example: <version>${project.version}<version>
Copy link
Member Author

@rujche rujche Jan 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, @weikanglim

  1. My current plan is do this after current PR merged.
  2. Anyway, analyze the unmarshalled pom will be done. If you think toSimulatedEffectivePom is not accurate, I can choose another func name, for example: pom := analyzeUnmarshalledPom(pom)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previous comment here: #4473 (comment)

I still strongly suggest we do not do this. A user is asking azd to scan a Java project that has pom.xml. The pom.xml clearly requires maven tooling installed to run. User also needs maven tooling installed to run azd package or azd up. The overall scenario requires the user to install maven.

I don't like the idea of trying to replicate maven evaluation because evaluation rules can be tricky to replicate; and I prefer tools that just work exactly, instead of tools that are subtly wrong. This can lead to downstream impacts that are harder to debug.

On the flip side, if we are confident in replicating maven evaluation rules, I would prefer removing the current effective-POM evaluation and have the native-go implementation take over -- it's cool work to have a 100% compatible maven evaluator in go, but I'm not confident that we can always replicate it since it's not a fully documented standard -- and it's one that changes over time.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we end up deciding removing the maven evaluation, perhaps pom_test.go can be removed along those lines as well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, @weikanglim

I still strongly suggest we do not do this.

  1. Thanks for the detailed explanation. Now I deleted the todo. My current plan is use mvn help:effective-pom only, will not analyze the pom file by go code.

  2. When mvn command not found, current behavior is to just unmarshall the pom file. Do you think it's necessary to exit analyze and ask customer to install mvn?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we end up deciding removing the maven evaluation, perhaps pom_test.go can be removed along those lines as well.

Agree. Now effective pom is generated by mvn help:effective-pom, it's not necessary to test it. I deleted pom_test.go.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think it's necessary to exit analyze and ask customer to install mvn?

Yes, I think it's better than doing a partial analysis

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated in this commit: bfab5a3

return result, nil
}
return pom{}, err
}

func toEffectivePom(ctx context.Context, mvnCli *maven.Cli, pomFilePath string) (pom, error) {
effectivePom, err := mvnCli.EffectivePom(ctx, pomFilePath)
if err != nil {
return pom{}, err
}
var resultPom pom
err = xml.Unmarshal([]byte(effectivePom), &resultPom)
return resultPom, err
}

func unmarshalPomFile(pomFilePath string) (pom, error) {
bytes, err := os.ReadFile(pomFilePath)
if err != nil {
return pom{}, err
}

var result pom
if err := xml.Unmarshal(bytes, &result); err != nil {
return pom{}, fmt.Errorf("parsing xml: %w", err)
}

return result, nil
}

// pom represents the top-level structure of a Maven POM file.
type pom struct {
XmlName xml.Name `xml:"project"`
Parent parent `xml:"parent"`
Modules []string `xml:"modules>module"` // Capture the modules
Dependencies []dependency `xml:"dependencies>dependency"`
DependencyManagement dependencyManagement `xml:"dependencyManagement"`
Build build `xml:"build"`
path string
}

// Parent represents the parent POM if this project is a module.
type parent struct {
GroupId string `xml:"groupId"`
ArtifactId string `xml:"artifactId"`
Version string `xml:"version"`
}

// Dependency represents a single Maven dependency.
type dependency struct {
GroupId string `xml:"groupId"`
ArtifactId string `xml:"artifactId"`
Version string `xml:"version"`
Scope string `xml:"scope,omitempty"`
}

// DependencyManagement includes a list of dependencies that are managed.
type dependencyManagement struct {
Dependencies []dependency `xml:"dependencies>dependency"`
}

// Build represents the build configuration which can contain plugins.
type build struct {
Plugins []plugin `xml:"plugins>plugin"`
}

// Plugin represents a build plugin.
type plugin struct {
GroupId string `xml:"groupId"`
ArtifactId string `xml:"artifactId"`
Version string `xml:"version"`
}
Loading
Loading