diff --git a/examples/language-sdk-instrumentation/java/rideshare/.pyroscope.yaml b/examples/language-sdk-instrumentation/java/rideshare/.pyroscope.yaml index 6758b4e1da..d6167d946e 100644 --- a/examples/language-sdk-instrumentation/java/rideshare/.pyroscope.yaml +++ b/examples/language-sdk-instrumentation/java/rideshare/.pyroscope.yaml @@ -8,17 +8,351 @@ source_code: local: path: src/main/java - function_name: - - prefix: java + - prefix: com/fasterxml/jackson/core + - prefix: com/fasterxml/jackson/core/json + language: java + source: + github: + owner: FasterXML + repo: jackson-core + ref: v2.13.3 + path: src/main/java + - function_name: + - prefix: com/fasterxml/jackson/databind + - prefix: com/fasterxml/jackson/databind/deser + - prefix: com/fasterxml/jackson/databind/jsontype + - prefix: com/fasterxml/jackson/databind/ser + language: java + source: + github: + owner: FasterXML + repo: jackson-databind + ref: v2.13.3 + path: src/main/java + - function_name: + - prefix: com/fasterxml/jackson/datatype/jsr310 + - prefix: com/fasterxml/jackson/datatype/jsr310/deser + - prefix: com/fasterxml/jackson/datatype/jsr310/ser + language: java + source: + github: + owner: FasterXML + repo: jackson-modules-java8 + ref: v2.13.3 + path: jsr310/src/main/java + - function_name: + - prefix: javax/security/auth/message + - prefix: javax/servlet + - prefix: org/apache/catalina + - prefix: org/apache/catalina/authenticator + - prefix: org/apache/catalina/manager + - prefix: org/apache/catalina/valves + - prefix: org/apache/catalina/webresources + - prefix: org/apache/coyote + - prefix: org/apache/coyote/http11 + - prefix: org/apache/juli + - prefix: org/apache/naming + - prefix: org/apache/naming/factory + - prefix: org/apache/tomcat + - prefix: org/apache/tomcat/util + - prefix: org/apache/tomcat/util/bcel + - prefix: org/apache/tomcat/util/descriptor + - prefix: org/apache/tomcat/util/http + - prefix: org/apache/tomcat/util/http/fileupload + - prefix: org/apache/tomcat/util/http/fileupload/util + - prefix: org/apache/tomcat/util/modeler + - prefix: org/apache/tomcat/util/net + - prefix: org/apache/tomcat/util/net/openssl + language: java + source: + github: + owner: apache + repo: tomcat + ref: 9.0.63 + path: java + - function_name: + - prefix: javax/websocket + - prefix: org/apache/tomcat/websocket + language: java + source: + github: + owner: apache + repo: tomcat + ref: 9.0.63 + path: java + - function_name: + - prefix: org/apache/el + language: java + source: + github: + owner: apache + repo: tomcat + ref: 9.0.63 + path: java/org/apache/el + - function_name: + - prefix: ch/qos/logback/classic + - prefix: ch/qos/logback/classic/joran + - prefix: ch/qos/logback/classic/net + - prefix: ch/qos/logback/classic/pattern + - prefix: ch/qos/logback/classic/selector + language: java + source: + github: + owner: ceki + repo: logback + ref: 1.2.11 + path: logback-classic/src/main/java + - function_name: + - prefix: ch/qos/logback/core + - prefix: ch/qos/logback/core/joran + - prefix: ch/qos/logback/core/joran/event + - prefix: ch/qos/logback/core/joran/util + - prefix: ch/qos/logback/core/net + - prefix: ch/qos/logback/core/pattern + - prefix: ch/qos/logback/core/rolling + language: java + source: + github: + owner: ceki + repo: logback + ref: 1.2.11 + path: logback-core/src/main/java + - function_name: + - prefix: javax/annotation + language: java + source: + github: + owner: eclipse-ee4j + repo: common-annotations-api + ref: 1.3.5 + path: jakarta.annotation-api/src/main/java + - function_name: + - prefix: META-INF/versions + - prefix: io/pyroscope + - prefix: io/pyroscope/javaagent + - prefix: io/pyroscope/labels + - prefix: io/pyroscope/vendor/com/google/protobuf + - prefix: io/pyroscope/vendor/com/squareup/moshi + - prefix: io/pyroscope/vendor/kotlin + - prefix: io/pyroscope/vendor/kotlin/collections + - prefix: io/pyroscope/vendor/kotlin/coroutines + - prefix: io/pyroscope/vendor/kotlin/internal + - prefix: io/pyroscope/vendor/kotlin/io + - prefix: io/pyroscope/vendor/kotlin/jvm + - prefix: io/pyroscope/vendor/kotlin/jvm/internal + - prefix: io/pyroscope/vendor/kotlin/random + - prefix: io/pyroscope/vendor/okhttp3 + - prefix: io/pyroscope/vendor/okhttp3/internal + - prefix: io/pyroscope/vendor/okhttp3/internal/platform + - prefix: io/pyroscope/vendor/okio + language: java + source: + github: + owner: grafana + repo: pyroscope-java + ref: v2.1.2 + path: agent/src/main/java + - function_name: + - prefix: java/lang + - prefix: java/util + - prefix: java/io + - prefix: java/net + - prefix: java/time + - prefix: java/reflect + - prefix: java/security + - prefix: java/math + - prefix: java/text + - prefix: java/nio + - prefix: java/concurrent + - prefix: java/beans + - prefix: java/awt + - prefix: java/applet + - prefix: javax/annotation + - prefix: javax/crypto + - prefix: javax/net + - prefix: javax/security + - prefix: javax/sql + - prefix: javax/xml + - prefix: jdk/internal + - prefix: jdk/nashorn + - prefix: sun/misc + - prefix: sun/nio + - prefix: sun/reflect + - prefix: sun/security + - prefix: sun/util language: java source: github: owner: openjdk - repo: jdk - ref: jdk-17+0 + repo: jdk17 + ref: master path: src/java.base/share/classes + - function_name: + - prefix: org/slf4j + language: java + source: + github: + owner: qos-ch + repo: slf4j + ref: v1.7.36 + path: slf4j-api/src/main/java + - function_name: + - prefix: org/springframework/boot + - prefix: org/springframework/boot/context + - prefix: org/springframework/boot/context/properties + - prefix: org/springframework/boot/context/properties/bind + - prefix: org/springframework/boot/diagnostics + - prefix: org/springframework/boot/jdbc + - prefix: org/springframework/boot/logging + - prefix: org/springframework/boot/orm/jpa + - prefix: org/springframework/boot/r2dbc + - prefix: org/springframework/boot/rsocket + - prefix: org/springframework/boot/security + - prefix: org/springframework/boot/sql/init + - prefix: org/springframework/boot/validation + - prefix: org/springframework/boot/web/embedded + - prefix: org/springframework/boot/web/reactive + - prefix: org/springframework/boot/web/servlet + language: java + source: + github: + owner: spring-projects + repo: spring-boot + ref: v2.7.0 + path: spring-boot-project/spring-boot/src/main/java + - function_name: + - prefix: org/springframework/boot/autoconfigure + - prefix: org/springframework/boot/autoconfigure/data + - prefix: org/springframework/boot/autoconfigure/graphql + - prefix: org/springframework/boot/autoconfigure/http + - prefix: org/springframework/boot/autoconfigure/jdbc + - prefix: org/springframework/boot/autoconfigure/jms + - prefix: org/springframework/boot/autoconfigure/ldap + - prefix: org/springframework/boot/autoconfigure/mongo + - prefix: org/springframework/boot/autoconfigure/security + - prefix: org/springframework/boot/autoconfigure/security/oauth2/client + - prefix: org/springframework/boot/autoconfigure/security/oauth2/resource + - prefix: org/springframework/boot/autoconfigure/transaction + - prefix: org/springframework/boot/autoconfigure/web + - prefix: org/springframework/boot/autoconfigure/web/reactive + - prefix: org/springframework/boot/autoconfigure/web/servlet + - prefix: org/springframework/boot/autoconfigure/webservices + - prefix: org/springframework/boot/autoconfigure/websocket + language: java + source: + github: + owner: spring-projects + repo: spring-boot + ref: v2.7.0 + path: spring-boot-project/spring-boot-autoconfigure/src/main/java + - function_name: + - prefix: org/aopalliance + - prefix: org/springframework/aop + - prefix: org/springframework/aop/aspectj + - prefix: org/springframework/aop/framework + - prefix: org/springframework/aop/framework/autoproxy + - prefix: org/springframework/aop/support + - prefix: org/springframework/aop/target + language: java + source: + github: + owner: spring-projects + repo: spring-framework + ref: v5.3.20 + path: spring-aop/src/main/java + - function_name: + - prefix: org/apache/commons/logging + language: java + source: + github: + owner: spring-projects + repo: spring-framework + ref: v5.3.20 + path: spring-jcl/src/main/java + - function_name: + - prefix: org/springframework/beans + - prefix: org/springframework/beans/factory + language: java + source: + github: + owner: spring-projects + repo: spring-framework + ref: v5.3.20 + path: spring-beans/src/main/java + - function_name: + - prefix: org/springframework/cache + - prefix: org/springframework/context + - prefix: org/springframework/ejb + - prefix: org/springframework/format + - prefix: org/springframework/format/datetime + - prefix: org/springframework/format/number + - prefix: org/springframework/instrument/classloading + - prefix: org/springframework/jmx + - prefix: org/springframework/jmx/export + - prefix: org/springframework/jndi + - prefix: org/springframework/remoting + - prefix: org/springframework/scheduling + - prefix: org/springframework/scripting + - prefix: org/springframework/ui + - prefix: org/springframework/ui/context + - prefix: org/springframework/validation + language: java + source: + github: + owner: spring-projects + repo: spring-framework + ref: v5.3.20 + path: spring-context/src/main/java + - function_name: + - prefix: org/springframework/cglib + - prefix: org/springframework/cglib/core + - prefix: org/springframework/cglib/transform + - prefix: org/springframework/core + - prefix: org/springframework/core/convert + - prefix: org/springframework/core/io + - prefix: org/springframework/core/metrics + - prefix: org/springframework/core/serializer + - prefix: org/springframework/core/task + - prefix: org/springframework/core/type + - prefix: org/springframework/objenesis + - prefix: org/springframework/objenesis/instantiator + - prefix: org/springframework/util + language: java + source: + github: + owner: spring-projects + repo: spring-framework + ref: v5.3.20 + path: spring-core/src/main/java + - function_name: + - prefix: org/springframework/expression + - prefix: org/springframework/expression/spel + language: java + source: + github: + owner: spring-projects + repo: spring-framework + ref: v5.3.20 + path: spring-expression/src/main/java - function_name: - prefix: org/springframework/http + - prefix: org/springframework/http/client + - prefix: org/springframework/http/codec + - prefix: org/springframework/http/converter + - prefix: org/springframework/http/server + - prefix: org/springframework/remoting - prefix: org/springframework/web + - prefix: org/springframework/web/bind + - prefix: org/springframework/web/client + - prefix: org/springframework/web/context + - prefix: org/springframework/web/context/request + - prefix: org/springframework/web/cors + - prefix: org/springframework/web/filter + - prefix: org/springframework/web/jsf + - prefix: org/springframework/web/method + - prefix: org/springframework/web/multipart + - prefix: org/springframework/web/server + - prefix: org/springframework/web/util language: java source: github: @@ -28,6 +362,12 @@ source_code: path: spring-web/src/main/java - function_name: - prefix: org/springframework/web/servlet + - prefix: org/springframework/web/servlet/config + - prefix: org/springframework/web/servlet/function + - prefix: org/springframework/web/servlet/mvc + - prefix: org/springframework/web/servlet/mvc/method + - prefix: org/springframework/web/servlet/tags + - prefix: org/springframework/web/servlet/view language: java source: github: diff --git a/pkg/frontend/vcs/config/config.go b/pkg/frontend/vcs/config/config.go index 91368b9bc9..78147c546d 100644 --- a/pkg/frontend/vcs/config/config.go +++ b/pkg/frontend/vcs/config/config.go @@ -44,7 +44,7 @@ type SourceCodeConfig struct { // MappingConfig represents a single source code path mapping type MappingConfig struct { - Path []Match `yaml:"path"` + Path []Match `yaml:"path,omitempty"` FunctionName []Match `yaml:"function_name"` Language string `yaml:"language"` diff --git a/tools/source-code-mapping-generators/java-jar-mapper/.gitignore b/tools/source-code-mapping-generators/java-jar-mapper/.gitignore new file mode 100644 index 0000000000..65ec764e6c --- /dev/null +++ b/tools/source-code-mapping-generators/java-jar-mapper/.gitignore @@ -0,0 +1 @@ +java-jar-mapper \ No newline at end of file diff --git a/tools/source-code-mapping-generators/java-jar-mapper/README.md b/tools/source-code-mapping-generators/java-jar-mapper/README.md new file mode 100644 index 0000000000..0600752843 --- /dev/null +++ b/tools/source-code-mapping-generators/java-jar-mapper/README.md @@ -0,0 +1,44 @@ +# Java Jar Mapper + +A Go tool that updates a `.pyroscope.yaml` file with source code +mappings for 3rd party libraries found in a given JAR. + +Note this tool relies on multiple 3rd party APIs. Downtime +in these APIs can result in non deterministic output from this tool. + +This tool relies on `jar-mappings.json` to resolve common +3rd party libraries that are not properly resolved by the heuristics +in the tool. + +## Dependencies + +- JDK [for `jar` executable] + +## Usage + +```bash +go run . [flags] +``` + +### Flags + +- `-jar string`: Path to the Java JAR file to analyze (required) +- `-jdk-version string`: JDK version for JDK function mappings (e.g., '8', '11', '17', '21'). If not specified, JDK mappings will not be generated. +- `-config string`: `.pyroscope.yaml` to modify with new source code mappings. If not specified, a valid `.pyroscope.yaml` will be printed to stdout. +- `-help`: Show help information + +### Example + +```bash +# Generate .pyroscope.yaml to stdout with source code mappings +go run . -jar /path/to/jar/foo.jar + +# Update .pyroscope.yaml with source code mappings and JDK mappings +go run . -jar /path/to/jar/foo.jar -jdk-version 19 -config .pyroscope.yaml +``` + +## Build + +```bash +go build -o java-jar-mapper . +``` \ No newline at end of file diff --git a/tools/source-code-mapping-generators/java-jar-mapper/config.go b/tools/source-code-mapping-generators/java-jar-mapper/config.go new file mode 100644 index 0000000000..482beab616 --- /dev/null +++ b/tools/source-code-mapping-generators/java-jar-mapper/config.go @@ -0,0 +1,114 @@ +package main + +import ( + _ "embed" + "encoding/json" + "fmt" + "io" + "os" + + "gopkg.in/yaml.v3" + + "github.com/grafana/pyroscope/pkg/frontend/vcs/config" +) + +//go:embed jar-mappings.json +var jarMappingsJSON []byte + +// JarMapping represents a hardcoded mapping for a JAR file. +type JarMapping struct { + Jar string `json:"jar"` // JAR name (artifactId) to match + Owner string `json:"owner"` // GitHub owner + Repo string `json:"repo"` // GitHub repository + Path string `json:"path"` // Source path in repository +} + +// JarMappingsConfig represents the JSON configuration file. +type JarMappingsConfig struct { + Mappings []JarMapping `json:"mappings"` +} + +// ConfigService handles loading and querying JAR mappings. +type ConfigService struct { + mappings *JarMappingsConfig +} + +func (s *ConfigService) LoadJarMappings() (*JarMappingsConfig, error) { + if len(jarMappingsJSON) == 0 { + s.mappings = nil + return nil, nil + } + + var config JarMappingsConfig + if err := json.Unmarshal(jarMappingsJSON, &config); err != nil { + return nil, fmt.Errorf("failed to parse JAR mappings JSON: %w", err) + } + + s.mappings = &config + return &config, nil +} + +func (s *ConfigService) FindJarMapping(artifactId string) *JarMapping { + if s.mappings == nil { + return nil + } + + for i := range s.mappings.Mappings { + if s.mappings.Mappings[i].Jar == artifactId { + return &s.mappings.Mappings[i] + } + } + + return nil +} + +func GenerateOrMergeConfig(configPath string, mappings []config.MappingConfig, jdkMappings []config.MappingConfig) error { + if len(jdkMappings) > 0 { + mappings = append(mappings, jdkMappings...) + fmt.Fprintf(os.Stderr, "Added %d JDK mapping(s)\n", len(jdkMappings)) + } + + var cfg config.PyroscopeConfig + + if configPath != "" { + data, err := os.ReadFile(configPath) + if err != nil { + return fmt.Errorf("failed to read config file: %w", err) + } + + existingCfg, err := config.ParsePyroscopeConfig(data) + if err != nil { + return fmt.Errorf("failed to parse existing config file: %w", err) + } + cfg = *existingCfg + + cfg.SourceCode.Mappings = MergeMappings(cfg.SourceCode.Mappings, mappings) + } else { + cfg = config.PyroscopeConfig{ + Version: config.VersionV1, + SourceCode: config.SourceCodeConfig{ + Mappings: mappings, + }, + } + } + + SortMappings(cfg.SourceCode.Mappings) + + var output io.Writer = os.Stdout + if configPath != "" { + file, err := os.Create(configPath) + if err != nil { + return fmt.Errorf("failed to create config file: %w", err) + } + defer file.Close() + output = file + } + + encoder := yaml.NewEncoder(output) + encoder.SetIndent(2) + if err := encoder.Encode(cfg); err != nil { + return fmt.Errorf("failed to encode YAML: %w", err) + } + + return nil +} diff --git a/tools/source-code-mapping-generators/java-jar-mapper/github.go b/tools/source-code-mapping-generators/java-jar-mapper/github.go new file mode 100644 index 0000000000..b18b6cacfb --- /dev/null +++ b/tools/source-code-mapping-generators/java-jar-mapper/github.go @@ -0,0 +1,86 @@ +package main + +import ( + "fmt" + "os" + "regexp" + "strings" +) + +var ( + // githubRepoPatterns are compiled regex patterns for extracting GitHub owner/repo from URLs. + githubRepoPatterns = []*regexp.Regexp{ + regexp.MustCompile(`github\.com[:/]([^/]+)/([^/]+?)(?:\.git)?/?$`), + regexp.MustCompile(`github\.com[:/]([^/]+)/([^/]+)`), + } +) + +func ExtractGitHubRepoFromURL(urlStr string) (owner, repo string, err error) { + if !strings.Contains(urlStr, "github.com") { + return "", "", fmt.Errorf("URL does not contain github.com: %s", urlStr) + } + + for _, pattern := range githubRepoPatterns { + matches := pattern.FindStringSubmatch(urlStr) + if len(matches) >= 3 { + owner = strings.TrimSpace(matches[1]) + repo = strings.TrimSpace(strings.TrimSuffix(matches[2], ".git")) + if owner != "" && repo != "" && owner != "/" && repo != "/" { + return owner, repo, nil + } + } + } + + return "", "", fmt.Errorf("could not extract GitHub repo from URL: %s", urlStr) +} + +// RepoResolver is an interface for resolving GitHub repositories from Maven coordinates. +type RepoResolver interface { + ResolveRepo(groupId, artifactId, version string) (owner, repo string, err error) +} + +// GitHubResolver handles resolving GitHub repositories from Maven coordinates. +// It uses a simple strategy: deps.dev API first, then hardcoded jar-mappings. +type GitHubResolver struct { + depsDevService *DepsDevService + configService *ConfigService +} + +func NewGitHubResolver(depsDevService *DepsDevService, configService *ConfigService) *GitHubResolver { + return &GitHubResolver{ + depsDevService: depsDevService, + configService: configService, + } +} + +// ResolveRepo resolves a GitHub repository using Maven coordinates. +// Strategy: +// 1. Check hardcoded jar-mappings.json first (for known edge cases) +// 2. Query deps.dev API (primary external source) +func (r *GitHubResolver) ResolveRepo(coords *MavenCoordinates) (owner, repo string, err error) { + // Strategy 1: Check hardcoded mapping first + if r.configService != nil { + jarMapping := r.configService.FindJarMapping(coords.ArtifactID) + if jarMapping != nil { + fmt.Fprintf(os.Stderr, " Found hardcoded mapping: %s/%s\n", jarMapping.Owner, jarMapping.Repo) + return jarMapping.Owner, jarMapping.Repo, nil + } + } + + // Strategy 2: Query deps.dev API + if r.depsDevService != nil && coords.GroupID != "" { + fmt.Fprintf(os.Stderr, " Querying deps.dev for %s:%s:%s\n", coords.GroupID, coords.ArtifactID, coords.Version) + owner, repo, err = r.depsDevService.ResolveRepo(coords.GroupID, coords.ArtifactID, coords.Version) + if err == nil { + fmt.Fprintf(os.Stderr, " Successfully resolved via deps.dev: %s/%s\n", owner, repo) + return owner, repo, nil + } + fmt.Fprintf(os.Stderr, " deps.dev lookup failed: %v\n", err) + } + + // No resolution found + if coords.GroupID == "" { + return "", "", fmt.Errorf("cannot resolve GitHub repo: groupId unknown for %s:%s", coords.ArtifactID, coords.Version) + } + return "", "", fmt.Errorf("cannot resolve GitHub repo for %s:%s:%s", coords.GroupID, coords.ArtifactID, coords.Version) +} diff --git a/tools/source-code-mapping-generators/java-jar-mapper/jar-mappings.json b/tools/source-code-mapping-generators/java-jar-mapper/jar-mappings.json new file mode 100644 index 0000000000..c5795364cd --- /dev/null +++ b/tools/source-code-mapping-generators/java-jar-mapper/jar-mappings.json @@ -0,0 +1,124 @@ +{ + "mappings": [ + { + "jar": "tomcat-embed-core", + "owner": "apache", + "repo": "tomcat", + "path": "java" + }, + { + "jar": "tomcat-embed-el", + "owner": "apache", + "repo": "tomcat", + "path": "java/org/apache/el" + }, + { + "jar": "tomcat-embed-websocket", + "owner": "apache", + "repo": "tomcat", + "path": "java" + }, + { + "jar": "spring-expression", + "owner": "spring-projects", + "repo": "spring-framework", + "path": "spring-expression/src/main/java" + }, + { + "jar": "spring-web", + "owner": "spring-projects", + "repo": "spring-framework", + "path": "spring-web/src/main/java" + }, + { + "jar": "spring-webmvc", + "owner": "spring-projects", + "repo": "spring-framework", + "path": "spring-webmvc/src/main/java" + }, + { + "jar": "spring-core", + "owner": "spring-projects", + "repo": "spring-framework", + "path": "spring-core/src/main/java" + }, + { + "jar": "spring-beans", + "owner": "spring-projects", + "repo": "spring-framework", + "path": "spring-beans/src/main/java" + }, + { + "jar": "spring-context", + "owner": "spring-projects", + "repo": "spring-framework", + "path": "spring-context/src/main/java" + }, + { + "jar": "spring-aop", + "owner": "spring-projects", + "repo": "spring-framework", + "path": "spring-aop/src/main/java" + }, + { + "jar": "spring-jcl", + "owner": "spring-projects", + "repo": "spring-framework", + "path": "spring-jcl/src/main/java" + }, + { + "jar": "jackson-annotations", + "owner": "FasterXML", + "repo": "jackson-annotations", + "path": "src/main/java" + }, + { + "jar": "jackson-core", + "owner": "FasterXML", + "repo": "jackson-core", + "path": "src/main/java" + }, + { + "jar": "jackson-databind", + "owner": "FasterXML", + "repo": "jackson-databind", + "path": "src/main/java" + }, + { + "jar": "jackson-datatype-jdk8", + "owner": "FasterXML", + "repo": "jackson-modules-java8", + "path": "jdk8/src/main/java" + }, + { + "jar": "jackson-datatype-jsr310", + "owner": "FasterXML", + "repo": "jackson-modules-java8", + "path": "jsr310/src/main/java" + }, + { + "jar": "jackson-module-parameter-names", + "owner": "FasterXML", + "repo": "jackson-modules-java8", + "path": "parameter-names/src/main/java" + }, + { + "jar": "spring-boot", + "owner": "spring-projects", + "repo": "spring-boot", + "path": "spring-boot-project/spring-boot/src/main/java" + }, + { + "jar": "spring-boot-autoconfigure", + "owner": "spring-projects", + "repo": "spring-boot", + "path": "spring-boot-project/spring-boot-autoconfigure/src/main/java" + }, + { + "jar": "agent", + "owner": "grafana", + "repo": "pyroscope-java", + "path": "agent/src/main/java" + } + ] +} \ No newline at end of file diff --git a/tools/source-code-mapping-generators/java-jar-mapper/jar.go b/tools/source-code-mapping-generators/java-jar-mapper/jar.go new file mode 100644 index 0000000000..7f156b9824 --- /dev/null +++ b/tools/source-code-mapping-generators/java-jar-mapper/jar.go @@ -0,0 +1,667 @@ +package main + +import ( + "archive/zip" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "regexp" + "sort" + "strings" +) + +type CommandRunner interface { + RunCommand(name string, args ...string) ([]byte, error) +} + +type DefaultCommandRunner struct{} + +func (r *DefaultCommandRunner) RunCommand(name string, args ...string) ([]byte, error) { + cmd := exec.Command(name, args...) + return cmd.Output() +} + +type JARAnalyzer struct { + runner CommandRunner +} + +func NewJARAnalyzer() *JARAnalyzer { + return &JARAnalyzer{ + runner: &DefaultCommandRunner{}, + } +} + +func (a *JARAnalyzer) ExtractClassPrefixes(jarPath string) ([]string, error) { + output, err := a.runner.RunCommand("jar", "-tf", jarPath) + if err != nil { + return nil, err + } + + classPattern := regexp.MustCompile(`^([^/]+(/[^/]+)*)/[^/]+\.class$`) + packageSet := make(map[string]bool) + + lines := strings.Split(string(output), "\n") + for _, line := range lines { + line = strings.TrimSpace(line) + if matches := classPattern.FindStringSubmatch(line); matches != nil { + packageSet[matches[1]] = true + } + } + + if len(packageSet) == 0 { + return nil, fmt.Errorf("no class files found") + } + + packages := make([]string, 0, len(packageSet)) + for pkg := range packageSet { + packages = append(packages, pkg) + } + sort.Strings(packages) + + prefixes := findCommonPrefixes(packages) + return prefixes, nil +} + +func findCommonPrefixes(packages []string) []string { + if len(packages) == 0 { + return nil + } + + // Filter out shaded/vendor packages before counting + var filteredPackages []string + for _, pkg := range packages { + if !isShadedPackage(pkg) { + filteredPackages = append(filteredPackages, pkg) + } + } + + // If all packages were filtered out, fall back to original list + if len(filteredPackages) == 0 { + filteredPackages = packages + } + + prefixCount := make(map[string]int) + for _, pkg := range filteredPackages { + parts := strings.Split(pkg, "/") + for i := 1; i <= len(parts); i++ { + prefix := strings.Join(parts[:i], "/") + prefixCount[prefix]++ + } + } + + prefixKeys := make([]string, 0, len(prefixCount)) + for prefix := range prefixCount { + prefixKeys = append(prefixKeys, prefix) + } + sort.Strings(prefixKeys) + + var commonPrefixes []string + seen := make(map[string]bool) + for _, prefix := range prefixKeys { + count := prefixCount[prefix] + if count >= 2 && !seen[prefix] { + commonPrefixes = append(commonPrefixes, prefix) + seen[prefix] = true + } + } + + sort.Slice(commonPrefixes, func(i, j int) bool { + lenI, lenJ := len(commonPrefixes[i]), len(commonPrefixes[j]) + if lenI != lenJ { + return lenI > lenJ + } + return commonPrefixes[i] < commonPrefixes[j] + }) + + packageSet := make(map[string]bool) + for _, pkg := range packages { + packageSet[pkg] = true + } + + var filtered []string + for _, prefix := range commonPrefixes { + if packageSet[prefix] { + filtered = append(filtered, prefix) + continue + } + + isSubstring := false + for _, other := range commonPrefixes { + if prefix != other && strings.HasPrefix(other, prefix+"/") { + isSubstring = true + break + } + } + if !isSubstring { + filtered = append(filtered, prefix) + } + } + + return filtered +} + +// isShadedPackage detects packages that are shaded/relocated dependencies. +// These are dependencies that have been relocated to a different package during the build +// and don't exist in the original source repository. +// Common patterns include: +// - vendor/ +// - shaded/ +// - repackaged/ +// - internal/shaded/ +// - relocated/ +func isShadedPackage(pkg string) bool { + shadedPatterns := []string{ + "/vendor/", + "/shaded/", + "/repackaged/", + "/relocated/", + "/internal/shaded/", + "/shadow/", + "/thirdparty/", + } + + pkgLower := strings.ToLower(pkg) + for _, pattern := range shadedPatterns { + if strings.Contains(pkgLower, pattern) { + return true + } + } + + return false +} + +func (a *JARAnalyzer) ExtractManifest(jarPath string) (map[string]string, error) { + reader, err := zip.OpenReader(jarPath) + if err != nil { + return nil, err + } + defer reader.Close() + + for _, f := range reader.File { + if f.Name == "META-INF/MANIFEST.MF" { + rc, err := f.Open() + if err != nil { + return nil, err + } + defer rc.Close() + + data, err := io.ReadAll(rc) + if err != nil { + return nil, err + } + + return parseManifest(string(data)), nil + } + } + + return nil, fmt.Errorf("MANIFEST.MF not found") +} + +func parseManifest(data string) map[string]string { + result := make(map[string]string) + lines := strings.Split(data, "\n") + + var currentKey string + var currentValue strings.Builder + + for _, line := range lines { + line = strings.TrimSpace(line) + if line == "" { + continue + } + + if strings.HasPrefix(line, " ") || strings.HasPrefix(line, "\t") { + if currentKey != "" { + currentValue.WriteString(" ") + currentValue.WriteString(strings.TrimSpace(line)) + } + } else { + if currentKey != "" { + if _, exists := result[currentKey]; !exists { + result[currentKey] = currentValue.String() + } + } + + parts := strings.SplitN(line, ":", 2) + if len(parts) == 2 { + key := parts[0] + if key == "Name" { + currentKey = "" + currentValue.Reset() + continue + } + if _, exists := result[key]; !exists { + currentKey = key + currentValue.Reset() + currentValue.WriteString(strings.TrimSpace(parts[1])) + } else { + currentKey = "" + currentValue.Reset() + } + } + } + } + + if currentKey != "" { + if _, exists := result[currentKey]; !exists { + result[currentKey] = currentValue.String() + } + } + + return result +} + +// MavenCoordinates holds Maven artifact coordinates extracted from pom.properties. +type MavenCoordinates struct { + GroupID string + ArtifactID string + Version string +} + +// ExtractPOMProperties extracts Maven coordinates from the embedded pom.properties file. +// Maven JARs typically contain META-INF/maven/{groupId}/{artifactId}/pom.properties. +// For shaded JARs that contain multiple pom.properties files, this function prefers +// the one whose artifactId matches the JAR filename. +func (a *JARAnalyzer) ExtractPOMProperties(jarPath string) (*MavenCoordinates, error) { + reader, err := zip.OpenReader(jarPath) + if err != nil { + return nil, err + } + defer reader.Close() + + // Extract base name from JAR filename for matching + baseName := filepath.Base(jarPath) + baseName = strings.TrimSuffix(baseName, ".jar") + // Extract artifactId from filename (remove version suffix like "-1.2.3") + expectedArtifactId := extractArtifactIdFromFilename(baseName) + + // Collect all valid pom.properties + pomPropsPattern := regexp.MustCompile(`^META-INF/maven/[^/]+/[^/]+/pom\.properties$`) + var allCoords []*MavenCoordinates + var matchingCoords *MavenCoordinates + + for _, f := range reader.File { + if pomPropsPattern.MatchString(f.Name) { + rc, err := f.Open() + if err != nil { + continue + } + data, err := io.ReadAll(rc) + rc.Close() + if err != nil { + continue + } + + coords := parsePOMProperties(string(data)) + if coords.GroupID != "" && coords.ArtifactID != "" && coords.Version != "" { + allCoords = append(allCoords, coords) + // Check if this pom.properties matches the JAR filename + if matchingCoords == nil && artifactIdMatchesFilename(coords.ArtifactID, expectedArtifactId, baseName) { + matchingCoords = coords + } + } + } + } + + // Prefer the pom.properties that matches the JAR filename + if matchingCoords != nil { + return matchingCoords, nil + } + + // If we have pom.properties but none match the filename, this is likely + // a shaded JAR where the main artifact's metadata was stripped but + // shaded dependency metadata remained. Don't use incorrect coordinates. + if len(allCoords) > 0 { + return nil, fmt.Errorf("pom.properties found but artifactId %q doesn't match JAR filename %q (likely shaded JAR)", + allCoords[0].ArtifactID, baseName) + } + + return nil, fmt.Errorf("no pom.properties found in JAR") +} + +// extractArtifactIdFromFilename extracts the artifactId part from a JAR filename. +// E.g., "spring-web-5.3.20" -> "spring-web", "agent-2.1.2" -> "agent" +func extractArtifactIdFromFilename(baseName string) string { + parts := strings.Split(baseName, "-") + if len(parts) <= 1 { + return baseName + } + // Find where version starts (first part that looks like a version number) + for i := len(parts) - 1; i > 0; i-- { + if looksLikeVersion(parts[i]) { + return strings.Join(parts[:i], "-") + } + } + return baseName +} + +// looksLikeVersion checks if a string looks like a version number. +func looksLikeVersion(s string) bool { + if len(s) == 0 { + return false + } + // Version typically starts with a digit + return s[0] >= '0' && s[0] <= '9' +} + +// artifactIdMatchesFilename checks if a pom.properties artifactId matches the JAR filename. +func artifactIdMatchesFilename(artifactId, expectedArtifactId, baseName string) bool { + // Direct match + if artifactId == expectedArtifactId { + return true + } + // Check if baseName starts with artifactId (handles version suffix variations) + if strings.HasPrefix(baseName, artifactId+"-") || baseName == artifactId { + return true + } + return false +} + +// parsePOMProperties parses a pom.properties file content. +func parsePOMProperties(data string) *MavenCoordinates { + coords := &MavenCoordinates{} + lines := strings.Split(data, "\n") + for _, line := range lines { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + parts := strings.SplitN(line, "=", 2) + if len(parts) != 2 { + continue + } + key := strings.TrimSpace(parts[0]) + value := strings.TrimSpace(parts[1]) + switch key { + case "groupId": + coords.GroupID = value + case "artifactId": + coords.ArtifactID = value + case "version": + coords.Version = value + } + } + return coords +} + +// ExtractMavenCoordinates extracts Maven coordinates using multiple strategies: +// 1. pom.properties (most reliable for Maven-published JARs) +// 2. Bazel path parsing (for JARs in Bazel runfiles with Maven path structure) +// 3. MANIFEST.MF + filename parsing (fallback for other JARs) +func (a *JARAnalyzer) ExtractMavenCoordinates(jarPath string) (*MavenCoordinates, error) { + // Try pom.properties first (most reliable) + coords, err := a.ExtractPOMProperties(jarPath) + if err == nil && coords.GroupID != "" && coords.ArtifactID != "" && coords.Version != "" { + return coords, nil + } + + // Try extracting from Bazel path (e.g., .../maven2/io/pyroscope/agent/2.1.2/...) + coords = extractMavenCoordinatesFromPath(jarPath) + if coords.GroupID != "" && coords.ArtifactID != "" && coords.Version != "" { + return coords, nil + } + + // Fallback to manifest + filename parsing + artifactId, version, err := a.extractArtifactInfoFromManifest(jarPath) + if err != nil { + return nil, err + } + + return &MavenCoordinates{ + ArtifactID: artifactId, + Version: version, + // GroupID unknown from manifest/filename + }, nil +} + +// extractMavenCoordinatesFromPath extracts Maven coordinates from a Bazel-style path. +// Bazel stores Maven dependencies in paths like: +// +// .../maven2/io/pyroscope/agent/2.1.2/processed_agent-2.1.2.jar +// .../maven2/com/google/guava/guava/31.1-jre/guava-31.1-jre.jar +// +// The structure is: maven2/{groupId as path}/{artifactId}/{version}/{filename} +func extractMavenCoordinatesFromPath(jarPath string) *MavenCoordinates { + // Normalize path separators + jarPath = filepath.ToSlash(jarPath) + + // Find "maven2/" marker in path + maven2Idx := strings.Index(jarPath, "maven2/") + if maven2Idx == -1 { + return &MavenCoordinates{} + } + + // Get the path after "maven2/" + pathAfterMaven2 := jarPath[maven2Idx+len("maven2/"):] + + // Split into segments: [groupId parts..., artifactId, version, filename] + segments := strings.Split(pathAfterMaven2, "/") + if len(segments) < 4 { + return &MavenCoordinates{} + } + + // Last segment is the filename + // Second-to-last is version + // Third-to-last is artifactId + // Everything before that is groupId + version := segments[len(segments)-2] + artifactId := segments[len(segments)-3] + groupIdParts := segments[:len(segments)-3] + + // Validate: version should look like a version + if !looksLikeVersion(version) { + return &MavenCoordinates{} + } + + // Join groupId parts with dots + groupId := strings.Join(groupIdParts, ".") + + return &MavenCoordinates{ + GroupID: groupId, + ArtifactID: artifactId, + Version: version, + } +} + +func (a *JARAnalyzer) extractArtifactInfoFromManifest(jarPath string) (artifactId, version string, err error) { + manifest, err := a.ExtractManifest(jarPath) + if err != nil { + return "", "", fmt.Errorf("missing MANIFEST.MF: %w", err) + } + + baseName := filepath.Base(jarPath) + baseName = strings.TrimSuffix(baseName, ".jar") + artifactId = baseName + var versionFromFilename string + parts := strings.Split(baseName, "-") + if len(parts) > 1 { + lastPart := parts[len(parts)-1] + if strings.ContainsAny(lastPart, "0123456789") { + versionFromFilename = lastPart + artifactId = strings.Join(parts[:len(parts)-1], "-") + } + } + + version, ok := manifest["Implementation-Version"] + if !ok || version == "" { + if versionFromFilename != "" { + version = versionFromFilename + } else { + return "", "", fmt.Errorf("missing Implementation-Version in manifest and could not extract from filename") + } + } + + return artifactId, version, nil +} + +type JARExtractor struct{} + +// ExtractThirdPartyJARs extracts 3rd party JARs from a JAR file. +// For Spring Boot fat JARs, it extracts nested JARs from BOOT-INF/lib/. +// For Bazel JARs, it finds dependencies in the .runfiles directory. +// For regular JARs, it returns the JAR itself for processing. +func (e *JARExtractor) ExtractThirdPartyJARs(jarPath string) ([]string, string, func() error, error) { + mainJAR, err := zip.OpenReader(jarPath) + if err != nil { + return nil, "", nil, fmt.Errorf("failed to open JAR: %w", err) + } + defer mainJAR.Close() + + // Check if this is a Spring Boot fat JAR + if isSpringBootFatJAR(mainJAR.File) { + return e.extractSpringBootJARs(mainJAR.File) + } + + // Check for Bazel runfiles directory (e.g., ProjectRunner.runfiles for ProjectRunner.jar) + bazelJARs := e.findBazelRunfileJARs(jarPath) + if len(bazelJARs) > 0 { + // Return both the main JAR and runfiles JARs + allJARs := append([]string{jarPath}, bazelJARs...) + return allJARs, "", func() error { return nil }, nil + } + + // Regular JAR: return the JAR itself for processing + return []string{jarPath}, "", func() error { return nil }, nil +} + +// findBazelRunfileJARs finds 3rd party JARs in Bazel's runfiles directory. +// Bazel uses {name}.runfiles directory adjacent to {name}.jar for dependencies. +func (e *JARExtractor) findBazelRunfileJARs(jarPath string) []string { + // Try both patterns: + // 1. {name}.runfiles (for {name}.jar) + // 2. {name}.jar.runfiles + baseName := strings.TrimSuffix(jarPath, ".jar") + runfilesDirs := []string{ + baseName + ".runfiles", + jarPath + ".runfiles", + } + + var jars []string + for _, runfilesDir := range runfilesDirs { + info, err := os.Stat(runfilesDir) + if err != nil || !info.IsDir() { + continue + } + + // Walk the runfiles directory to find JAR files + err = filepath.Walk(runfilesDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return nil // Skip errors + } + if info.IsDir() { + return nil + } + if !strings.HasSuffix(path, ".jar") { + return nil + } + // Skip JDK/JRE JARs + if strings.Contains(path, "local_jdk") || strings.Contains(path, "jre/lib") { + return nil + } + // Skip the main JAR if it appears in runfiles + if filepath.Base(path) == filepath.Base(jarPath) { + return nil + } + jars = append(jars, path) + return nil + }) + if err != nil { + continue + } + + // If we found JARs, return them + if len(jars) > 0 { + return jars + } + } + + return nil +} + +// isSpringBootFatJAR checks if a JAR file is a Spring Boot fat JAR by looking for +// the BOOT-INF directory structure and nested JARs in BOOT-INF/lib/. +func isSpringBootFatJAR(files []*zip.File) bool { + jarPattern := regexp.MustCompile(`^BOOT-INF/lib/.*\.jar$`) + hasBootInf := false + hasNestedJARs := false + + for _, f := range files { + if strings.HasPrefix(f.Name, "BOOT-INF/") { + hasBootInf = true + } + if jarPattern.MatchString(f.Name) { + hasNestedJARs = true + } + if hasBootInf && hasNestedJARs { + return true + } + } + + return false +} + +// extractSpringBootJARs extracts nested JAR files from BOOT-INF/lib/ in a Spring Boot fat JAR. +func (e *JARExtractor) extractSpringBootJARs(files []*zip.File) ([]string, string, func() error, error) { + jarPattern := regexp.MustCompile(`^BOOT-INF/lib/.*\.jar$`) + var jarFiles []string + fileMap := make(map[string]*zip.File) + + // Collect all nested JAR files + for _, f := range files { + if jarPattern.MatchString(f.Name) { + jarFiles = append(jarFiles, f.Name) + fileMap[f.Name] = f + } + } + + if len(jarFiles) == 0 { + // This shouldn't happen if isSpringBootFatJAR returned true, + // but handle it gracefully + return nil, "", nil, fmt.Errorf("spring boot fat JAR contains no nested JARs in BOOT-INF/lib/") + } + + sort.Strings(jarFiles) + + // Create temporary directory for extracted JARs + tmpDir, err := os.MkdirTemp("", "jar-mapper-*") + if err != nil { + return nil, "", nil, fmt.Errorf("failed to create temp directory: %w", err) + } + + cleanup := func() error { + return os.RemoveAll(tmpDir) + } + + // Extract each nested JAR to the temp directory + var extractedJARs []string + for _, jarFile := range jarFiles { + f := fileMap[jarFile] + extractedPath := filepath.Join(tmpDir, filepath.Base(jarFile)) + if err := extractFile(f, extractedPath); err != nil { + fmt.Printf("Warning: failed to extract %s: %v\n", jarFile, err) + continue + } + extractedJARs = append(extractedJARs, extractedPath) + } + + return extractedJARs, tmpDir, cleanup, nil +} + +func extractFile(f *zip.File, destPath string) error { + rc, err := f.Open() + if err != nil { + return err + } + defer rc.Close() + + outFile, err := os.Create(destPath) + if err != nil { + return err + } + defer outFile.Close() + + _, err = io.Copy(outFile, rc) + return err +} diff --git a/tools/source-code-mapping-generators/java-jar-mapper/jdk.go b/tools/source-code-mapping-generators/java-jar-mapper/jdk.go new file mode 100644 index 0000000000..4f34bebe74 --- /dev/null +++ b/tools/source-code-mapping-generators/java-jar-mapper/jdk.go @@ -0,0 +1,194 @@ +package main + +import ( + "archive/zip" + "encoding/binary" + "fmt" + "io" + "strings" + + "github.com/grafana/pyroscope/pkg/frontend/vcs/config" +) + +// extractJDKVersionFromJAR extracts the JDK version from a JAR file by analyzing +// the class file major version numbers. Returns the major version (e.g., "8", "11", "17"). +func extractJDKVersionFromJAR(jarPath string) (string, error) { + reader, err := zip.OpenReader(jarPath) + if err != nil { + return "", fmt.Errorf("failed to open JAR: %w", err) + } + defer reader.Close() + + var maxMajorVersion int + for _, f := range reader.File { + if !strings.HasSuffix(f.Name, ".class") { + continue + } + + rc, err := f.Open() + if err != nil { + continue + } + + // Read class file header to get major version + // Class file format: magic (4 bytes) + minor_version (2 bytes) + major_version (2 bytes) + header := make([]byte, 8) + n, err := rc.Read(header) + rc.Close() + if err != nil && err != io.EOF { + continue + } + if n < 8 { + continue + } + + // Check magic number (0xCAFEBABE) + if binary.BigEndian.Uint32(header[0:4]) != 0xCAFEBABE { + continue + } + + // Extract major version (bytes 6-7) + majorVersion := int(binary.BigEndian.Uint16(header[6:8])) + if majorVersion > maxMajorVersion { + maxMajorVersion = majorVersion + } + } + + if maxMajorVersion == 0 { + return "", fmt.Errorf("no valid class files found in JAR") + } + + // Map class file major version to JDK version + // Java 8 = 52, Java 9 = 53, Java 10 = 54, Java 11 = 55, etc. + jdkVersion := getJDKVersionInfo(maxMajorVersion).Version + if jdkVersion == "" { + return "", fmt.Errorf("unsupported class file version: %d", maxMajorVersion) + } + + return jdkVersion, nil +} + +type JDKVersionInfo struct { + Version string + Repo string + Path string +} + +// Class file major version to JDK version mapping +// Path defaults to "src/java.base/share/classes" +// Repo defaults to "jdk" +var jdkVersionMap = map[int]*JDKVersionInfo{ + 52: {Version: "8", Path: "jdk/src/share/classes"}, + 53: {Version: "9", Path: "jdk/src/java.base/share/classes"}, + 54: {Version: "10"}, + 55: {Version: "11"}, + 56: {Version: "12"}, + 57: {Version: "13"}, + 58: {Version: "14"}, + 59: {Version: "15"}, + 60: {Version: "16"}, + 61: {Version: "17"}, + 62: {Version: "18"}, + 63: {Version: "19"}, + 64: {Version: "20"}, + 65: {Version: "21"}, + 66: {Version: "22"}, + 67: {Version: "23", Repo: "jdk23u"}, +} + +func getJDKVersionInfo(majorVersion int) *JDKVersionInfo { + info := jdkVersionMap[majorVersion] + if info == nil { + return nil + } + + // Apply defaults + if info.Repo == "" { + info.Repo = fmt.Sprintf("jdk%s", info.Version) + } + if info.Path == "" { + info.Path = "src/java.base/share/classes" + } + + return info +} + +// jdkVersionToMajorVersion converts a JDK version string (e.g., "8", "11", "17") to class file major version. +func jdkVersionToMajorVersion(jdkVersion string) int { + for majorVersion, info := range jdkVersionMap { + if info.Version == jdkVersion { + return majorVersion + } + } + return 0 +} + +// generateJDKMappings generates mappings for JDK packages (java/, jdk/, javax/, sun/) +// jdkVersion should be a major version number like "8", "11", "17", "21", etc. +func generateJDKMappings(jdkVersion string) []config.MappingConfig { + var mappings []config.MappingConfig + + majorVersion := jdkVersionToMajorVersion(jdkVersion) + if majorVersion == 0 { + return nil + } + version := getJDKVersionInfo(majorVersion) + if version == nil { + return nil + } + + // JDK package prefixes to map + // These cover the most common JDK packages that appear in profiles + jdkPrefixes := []string{ + "java/lang", + "java/util", + "java/io", + "java/net", + "java/time", + "java/reflect", + "java/security", + "java/math", + "java/text", + "java/nio", + "java/concurrent", + "java/beans", + "java/awt", + "java/applet", + "javax/annotation", + "javax/crypto", + "javax/net", + "javax/security", + "javax/sql", + "javax/xml", + "jdk/internal", + "jdk/nashorn", + "sun/misc", + "sun/nio", + "sun/reflect", + "sun/security", + "sun/util", + } + + // Create a single mapping with all JDK prefixes + // This is more efficient than creating separate mappings for each prefix + matchConfigs := make([]config.Match, len(jdkPrefixes)) + for i, prefix := range jdkPrefixes { + matchConfigs[i] = config.Match{Prefix: prefix} + } + + mapping := config.MappingConfig{ + FunctionName: matchConfigs, + Language: "java", + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "openjdk", + Repo: version.Repo, + Ref: "master", + Path: version.Path, + }, + }, + } + + mappings = append(mappings, mapping) + return mappings +} diff --git a/tools/source-code-mapping-generators/java-jar-mapper/main.go b/tools/source-code-mapping-generators/java-jar-mapper/main.go new file mode 100644 index 0000000000..405bc4de83 --- /dev/null +++ b/tools/source-code-mapping-generators/java-jar-mapper/main.go @@ -0,0 +1,86 @@ +package main + +import ( + "flag" + "fmt" + "os" + + "github.com/grafana/pyroscope/pkg/frontend/vcs/config" +) + +func main() { + var ( + jarPath = flag.String("jar", "", "Path to the Java JAR file to analyze") + configPath = flag.String("config", "", "Path to existing .pyroscope.yaml file to modify (default: print complete config to stdout)") + jdkVersion = flag.String("jdk-version", "", "JDK version for JDK function mappings (e.g., '8', '11', '17', '21'). If not specified, will attempt to extract from JAR file.") + help = flag.Bool("help", false, "Show help") + ) + flag.Parse() + + if *help || *jarPath == "" { + fmt.Println("Java JAR Source Code Mapper") + fmt.Println() + fmt.Println("Generates .pyroscope.yaml source_code mappings for 3rd party libraries") + fmt.Println("found in a Java JAR file.") + fmt.Println() + fmt.Println("Usage:") + fmt.Printf(" %s -jar [-config ] [-jdk-version ]\n", os.Args[0]) + fmt.Println() + fmt.Println("Flags:") + flag.PrintDefaults() + return + } + + httpClient := NewHTTPClient() + depsDevService := NewDepsDevService(httpClient) + githubTagService := NewGitHubTagService(httpClient) + configService := &ConfigService{} + + _, err := configService.LoadJarMappings() + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: failed to load JAR mappings: %v\n", err) + } + + githubResolver := NewGitHubResolver(depsDevService, configService) + + jarAnalyzer := NewJARAnalyzer() + jarExtractor := &JARExtractor{} + + processor := NewProcessor( + jarAnalyzer, + githubResolver, + configService, + githubTagService, + ) + + mappingService := NewMappingService(processor, jarExtractor) + + // Process the JAR file + mappings, err := mappingService.ProcessJAR(*jarPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + + // Generate JDK mappings + var jdkMappings []config.MappingConfig + jdkVersionToUse := *jdkVersion + if jdkVersionToUse == "" { + extractedVersion, err := extractJDKVersionFromJAR(*jarPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Warning: could not extract JDK version from JAR: %v\n", err) + fmt.Fprintf(os.Stderr, "Skipping JDK mappings. Use -jdk-version flag to specify JDK version manually.\n") + } else { + jdkVersionToUse = extractedVersion + fmt.Fprintf(os.Stderr, "Detected JDK version: %s\n", jdkVersionToUse) + } + } + if jdkVersionToUse != "" { + jdkMappings = generateJDKMappings(jdkVersionToUse) + } + + if err := GenerateOrMergeConfig(*configPath, mappings, jdkMappings); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} diff --git a/tools/source-code-mapping-generators/java-jar-mapper/main_test.go b/tools/source-code-mapping-generators/java-jar-mapper/main_test.go new file mode 100644 index 0000000000..4772f7e94a --- /dev/null +++ b/tools/source-code-mapping-generators/java-jar-mapper/main_test.go @@ -0,0 +1,1249 @@ +package main + +import ( + "archive/zip" + "maps" + "os" + "path/filepath" + "slices" + "strings" + "testing" + + "github.com/grafana/pyroscope/pkg/frontend/vcs/config" +) + +func TestFindCommonPrefixes(t *testing.T) { + tests := []struct { + name string + packages []string + want []string + }{ + { + name: "empty packages", + packages: []string{}, + want: nil, + }, + { + name: "single package", + packages: []string{"org/springframework/web"}, + want: nil, // Need at least 2 occurrences + }, + { + name: "multiple packages with common prefix", + packages: []string{ + "org/springframework/web/HttpServlet", + "org/springframework/web/Filter", + "org/springframework/http/Request", + }, + want: []string{"org/springframework/web"}, + }, + { + name: "nested packages", + packages: []string{ + "org/springframework/web/servlet/DispatcherServlet", + "org/springframework/web/servlet/HandlerMapping", + "org/springframework/web/filter/CharacterEncodingFilter", + }, + want: []string{"org/springframework/web/servlet"}, + }, + { + name: "no common prefix", + packages: []string{ + "com/example/foo", + "org/example/bar", + }, + want: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := findCommonPrefixes(tt.packages) + if !slices.Equal(got, tt.want) { + t.Errorf("findCommonPrefixes() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestParseManifest(t *testing.T) { + tests := []struct { + name string + manifest string + want map[string]string + }{ + { + name: "simple manifest", + manifest: `Manifest-Version: 1.0 +Implementation-Title: spring-web +Implementation-Version: 5.3.20 +`, + want: map[string]string{ + "Manifest-Version": "1.0", + "Implementation-Title": "spring-web", + "Implementation-Version": "5.3.20", + }, + }, + { + name: "manifest with continuation lines", + manifest: `Manifest-Version: 1.0 +Implementation-Title: spring-web +Implementation-Version: 5.3.20 +Bundle-Description: Spring Framework Web + Support Classes +`, + want: map[string]string{ + "Manifest-Version": "1.0", + "Implementation-Title": "spring-web", + "Implementation-Version": "5.3.20", + "Bundle-Description": "Spring Framework Web", + }, + }, + { + name: "empty manifest", + manifest: ``, + want: map[string]string{}, + }, + { + name: "manifest with empty lines", + manifest: `Manifest-Version: 1.0 + +Implementation-Title: spring-web + +`, + want: map[string]string{ + "Manifest-Version": "1.0", + "Implementation-Title": "spring-web", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := parseManifest(tt.manifest) + if !maps.Equal(got, tt.want) { + t.Errorf("parseManifest() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestExtractGitHubRepoFromURL(t *testing.T) { + tests := []struct { + name string + url string + wantOwner string + wantRepo string + wantErr bool + }{ + { + name: "HTTPS URL", + url: "https://github.com/spring-projects/spring-framework", + wantOwner: "spring-projects", + wantRepo: "spring-framework", + wantErr: false, + }, + { + name: "HTTPS URL with .git", + url: "https://github.com/spring-projects/spring-framework.git", + wantOwner: "spring-projects", + wantRepo: "spring-framework", + wantErr: false, + }, + { + name: "SSH URL", + url: "git@github.com:spring-projects/spring-framework.git", + wantOwner: "spring-projects", + wantRepo: "spring-framework", + wantErr: false, + }, + { + name: "URL with trailing slash", + url: "https://github.com/spring-projects/spring-framework/", + wantOwner: "spring-projects", + wantRepo: "spring-framework", + wantErr: false, + }, + { + name: "non-GitHub URL", + url: "https://gitlab.com/user/repo", + wantOwner: "", + wantRepo: "", + wantErr: true, + }, + { + name: "invalid URL", + url: "not-a-url", + wantOwner: "", + wantRepo: "", + wantErr: true, + }, + { + name: "SCM connection URL", + url: "scm:git:git@github.com:apache/spark.git", + wantOwner: "apache", + wantRepo: "spark", + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotOwner, gotRepo, err := ExtractGitHubRepoFromURL(tt.url) + if (err != nil) != tt.wantErr { + t.Errorf("ExtractGitHubRepoFromURL() error = %v, wantErr %v", err, tt.wantErr) + return + } + if gotOwner != tt.wantOwner { + t.Errorf("ExtractGitHubRepoFromURL() owner = %v, want %v", gotOwner, tt.wantOwner) + } + if gotRepo != tt.wantRepo { + t.Errorf("ExtractGitHubRepoFromURL() repo = %v, want %v", gotRepo, tt.wantRepo) + } + }) + } +} + +func TestJARAnalyzer_ExtractManifest(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "jar-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + jarPath := filepath.Join(tmpDir, "test.jar") + createTestJAR(t, jarPath, map[string]string{ + "META-INF/MANIFEST.MF": `Manifest-Version: 1.0 +Implementation-Title: spring-web +Implementation-Version: 5.3.20 +`, + }) + + analyzer := NewJARAnalyzer() + got, err := analyzer.ExtractManifest(jarPath) + if err != nil { + t.Errorf("ExtractManifest() error = %v", err) + return + } + + want := map[string]string{ + "Manifest-Version": "1.0", + "Implementation-Title": "spring-web", + "Implementation-Version": "5.3.20", + } + + if !maps.Equal(got, want) { + t.Errorf("ExtractManifest() = %v, want %v", got, want) + } +} + +func TestJARAnalyzer_ExtractManifestMissing(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "jar-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + jarPath := filepath.Join(tmpDir, "test.jar") + createTestJAR(t, jarPath, map[string]string{ + "some/class.class": "fake class data", + }) + + analyzer := NewJARAnalyzer() + _, err = analyzer.ExtractManifest(jarPath) + if err == nil { + t.Error("ExtractManifest() expected error for JAR without manifest") + } + if !strings.Contains(err.Error(), "MANIFEST.MF not found") { + t.Errorf("ExtractManifest() error = %v, want error containing 'MANIFEST.MF not found'", err) + } +} + +func TestJARAnalyzer_ExtractPOMProperties(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "jar-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + // JAR filename must match artifactId in pom.properties + jarPath := filepath.Join(tmpDir, "spark-core_2.13-4.0.1.jar") + createTestJAR(t, jarPath, map[string]string{ + "META-INF/MANIFEST.MF": `Manifest-Version: 1.0 +`, + "META-INF/maven/org.apache.spark/spark-core_2.13/pom.properties": `#Generated by Maven +groupId=org.apache.spark +artifactId=spark-core_2.13 +version=4.0.1 +`, + }) + + analyzer := NewJARAnalyzer() + coords, err := analyzer.ExtractPOMProperties(jarPath) + if err != nil { + t.Errorf("ExtractPOMProperties() error = %v", err) + return + } + + if coords.GroupID != "org.apache.spark" { + t.Errorf("ExtractPOMProperties() groupId = %q, want %q", coords.GroupID, "org.apache.spark") + } + if coords.ArtifactID != "spark-core_2.13" { + t.Errorf("ExtractPOMProperties() artifactId = %q, want %q", coords.ArtifactID, "spark-core_2.13") + } + if coords.Version != "4.0.1" { + t.Errorf("ExtractPOMProperties() version = %q, want %q", coords.Version, "4.0.1") + } +} + +func TestJARAnalyzer_ExtractPOMProperties_Missing(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "jar-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + jarPath := filepath.Join(tmpDir, "test.jar") + createTestJAR(t, jarPath, map[string]string{ + "META-INF/MANIFEST.MF": `Manifest-Version: 1.0 +`, + }) + + analyzer := NewJARAnalyzer() + _, err = analyzer.ExtractPOMProperties(jarPath) + if err == nil { + t.Error("ExtractPOMProperties() expected error for JAR without pom.properties") + } +} + +func TestJARAnalyzer_ExtractPOMProperties_ShadedJAR(t *testing.T) { + // Test that shaded JARs with mismatched pom.properties are rejected + tmpDir, err := os.MkdirTemp("", "jar-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + // Create a JAR named "agent-2.1.2.jar" but with pom.properties from a different artifact + jarPath := filepath.Join(tmpDir, "agent-2.1.2.jar") + createTestJAR(t, jarPath, map[string]string{ + "META-INF/MANIFEST.MF": `Manifest-Version: 1.0 +`, + // This pom.properties is from a shaded dependency, not the main artifact + "META-INF/maven/org.jetbrains/annotations/pom.properties": `groupId=org.jetbrains +artifactId=annotations +version=13.0 +`, + }) + + analyzer := NewJARAnalyzer() + _, err = analyzer.ExtractPOMProperties(jarPath) + if err == nil { + t.Error("ExtractPOMProperties() should reject shaded JAR with mismatched pom.properties") + } + if !strings.Contains(err.Error(), "doesn't match JAR filename") { + t.Errorf("ExtractPOMProperties() error should mention filename mismatch, got: %v", err) + } +} + +func TestJARAnalyzer_ExtractPOMProperties_MatchingFilename(t *testing.T) { + // Test that pom.properties matching the JAR filename is accepted + tmpDir, err := os.MkdirTemp("", "jar-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + jarPath := filepath.Join(tmpDir, "spring-web-5.3.20.jar") + createTestJAR(t, jarPath, map[string]string{ + "META-INF/MANIFEST.MF": `Manifest-Version: 1.0 +`, + "META-INF/maven/org.springframework/spring-web/pom.properties": `groupId=org.springframework +artifactId=spring-web +version=5.3.20 +`, + }) + + analyzer := NewJARAnalyzer() + coords, err := analyzer.ExtractPOMProperties(jarPath) + if err != nil { + t.Errorf("ExtractPOMProperties() error = %v", err) + return + } + if coords.GroupID != "org.springframework" { + t.Errorf("ExtractPOMProperties() groupId = %q, want %q", coords.GroupID, "org.springframework") + } + if coords.ArtifactID != "spring-web" { + t.Errorf("ExtractPOMProperties() artifactId = %q, want %q", coords.ArtifactID, "spring-web") + } +} + +func TestJARAnalyzer_ExtractMavenCoordinates(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "jar-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + // Test with pom.properties (preferred source) - filename must match artifactId + jarPath := filepath.Join(tmpDir, "myapp-2.0.0.jar") + createTestJAR(t, jarPath, map[string]string{ + "META-INF/MANIFEST.MF": `Manifest-Version: 1.0 +Implementation-Version: 1.0.0 +`, + "META-INF/maven/com.example/myapp/pom.properties": `groupId=com.example +artifactId=myapp +version=2.0.0 +`, + }) + + analyzer := NewJARAnalyzer() + coords, err := analyzer.ExtractMavenCoordinates(jarPath) + if err != nil { + t.Errorf("ExtractMavenCoordinates() error = %v", err) + return + } + + // Should use pom.properties values (version 2.0.0, not 1.0.0 from manifest) + if coords.GroupID != "com.example" { + t.Errorf("ExtractMavenCoordinates() groupId = %q, want %q", coords.GroupID, "com.example") + } + if coords.Version != "2.0.0" { + t.Errorf("ExtractMavenCoordinates() version = %q, want %q", coords.Version, "2.0.0") + } +} + +func TestJARAnalyzer_ExtractMavenCoordinates_FallbackToManifest(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "jar-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + // Test without pom.properties (fallback to manifest) + jarPath := filepath.Join(tmpDir, "myapp-1.0.0.jar") + createTestJAR(t, jarPath, map[string]string{ + "META-INF/MANIFEST.MF": `Manifest-Version: 1.0 +Implementation-Version: 1.0.0 +`, + }) + + analyzer := NewJARAnalyzer() + coords, err := analyzer.ExtractMavenCoordinates(jarPath) + if err != nil { + t.Errorf("ExtractMavenCoordinates() error = %v", err) + return + } + + // Should fall back to manifest/filename + if coords.ArtifactID != "myapp" { + t.Errorf("ExtractMavenCoordinates() artifactId = %q, want %q", coords.ArtifactID, "myapp") + } + if coords.Version != "1.0.0" { + t.Errorf("ExtractMavenCoordinates() version = %q, want %q", coords.Version, "1.0.0") + } + // GroupID should be empty when falling back to manifest + if coords.GroupID != "" { + t.Errorf("ExtractMavenCoordinates() groupId = %q, want empty", coords.GroupID) + } +} + +func TestJARAnalyzer_ExtractClassPrefixes(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "jar-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + jarPath := filepath.Join(tmpDir, "test.jar") + + file, err := os.Create(jarPath) + if err != nil { + t.Fatalf("Failed to create JAR file: %v", err) + } + defer file.Close() + + writer := zip.NewWriter(file) + + classes := []string{ + "org/springframework/web/HttpServlet.class", + "org/springframework/web/Filter.class", + "org/springframework/http/Request.class", + } + + for _, className := range classes { + f, err := writer.Create(className) + if err != nil { + t.Fatalf("Failed to create class file: %v", err) + } + _, err = f.Write([]byte("fake class data")) + if err != nil { + t.Fatalf("Failed to write: %v", err) + } + } + + writer.Close() + file.Close() + + analyzer := NewJARAnalyzer() + prefixes, err := analyzer.ExtractClassPrefixes(jarPath) + if err != nil { + if strings.Contains(err.Error(), "executable file not found") { + t.Skip("jar command not available") + } + t.Errorf("ExtractClassPrefixes() error = %v", err) + return + } + + if len(prefixes) == 0 { + t.Error("ExtractClassPrefixes() returned no prefixes, expected some") + } + + expectedPrefixes := []string{"org/springframework"} + if !slices.Equal(prefixes, expectedPrefixes) { + t.Logf("ExtractClassPrefixes() returned prefixes: %v (expected: %v)", prefixes, expectedPrefixes) + } +} + +func TestParseManifestContinuation(t *testing.T) { + manifest := `Manifest-Version: 1.0 +Implementation-Title: spring-web +Implementation-Version: 5.3.20 +Bundle-Description: Spring Framework Web + Support Classes + and more text +Created-By: Apache Maven +` + + result := parseManifest(manifest) + + expected := "Spring Framework Web" + if result["Bundle-Description"] != expected { + t.Errorf("parseManifest() continuation line failed, got: %q, want: %q", result["Bundle-Description"], expected) + } + + if result["Created-By"] != "Apache Maven" { + t.Errorf("parseManifest() Created-By failed, got: %q", result["Created-By"]) + } +} + +func TestFindCommonPrefixesFiltering(t *testing.T) { + packages := []string{ + "org/springframework/web/servlet/DispatcherServlet", + "org/springframework/web/servlet/HandlerMapping", + "org/springframework/web/filter/CharacterEncodingFilter", + } + + prefixes := findCommonPrefixes(packages) + + hasWebServlet := false + for _, prefix := range prefixes { + if prefix == "org/springframework/web/servlet" { + hasWebServlet = true + } + } + + if !hasWebServlet { + t.Error("findCommonPrefixes() should include org/springframework/web/servlet") + } +} + +func TestDetermineSourcePath(t *testing.T) { + tests := []struct { + name string + artifactId string + want string + }{ + { + name: "single module project", + artifactId: "myapp", + want: "src/main/java", + }, + { + name: "multi-module with hyphen - returns empty for repo root search", + artifactId: "spring-webmvc", + want: "", + }, + { + name: "single module with short name", + artifactId: "app", + want: "src/main/java", + }, + { + name: "artifactId with hyphen returns empty", + artifactId: "app-12", + want: "", + }, + { + name: "Scala artifact with version suffix", + artifactId: "spark-core_2.13", + want: "", // multi-module, returns empty + }, + { + name: "Scala artifact without hyphen", + artifactId: "core_2.13", + want: "src/main/java", // no hyphen after stripping suffix + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := determineSourcePath(tt.artifactId) + if got != tt.want { + t.Errorf("determineSourcePath(%q) = %q, want %q", tt.artifactId, got, tt.want) + } + }) + } +} + +func TestStripVersionSuffix(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"spark-core_2.13", "spark-core"}, + {"akka-actor_2.12", "akka-actor"}, + {"cats-core_2.11", "cats-core"}, + {"zio_3", "zio"}, + {"some-lib_1.0", "some-lib"}, + {"another-lib_10.2.3", "another-lib"}, + {"jackson-core", "jackson-core"}, // no suffix + {"my-app", "my-app"}, // no suffix + {"my_app", "my_app"}, // underscore but not version suffix + {"lib_name_2.13", "lib_name"}, // multiple underscores, strips version + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got := stripVersionSuffix(tt.input) + if got != tt.want { + t.Errorf("stripVersionSuffix(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + +func TestConfigService_FindJarMapping(t *testing.T) { + service := &ConfigService{} + service.mappings = &JarMappingsConfig{ + Mappings: []JarMapping{ + {Jar: "spring-web", Owner: "spring-projects", Repo: "spring-framework", Path: "spring-web/src/main/java"}, + {Jar: "jackson-core", Owner: "FasterXML", Repo: "jackson-core", Path: "src/main/java"}, + }, + } + + tests := []struct { + name string + artifactId string + want *JarMapping + }{ + { + name: "found mapping", + artifactId: "spring-web", + want: &JarMapping{Jar: "spring-web", Owner: "spring-projects", Repo: "spring-framework", Path: "spring-web/src/main/java"}, + }, + { + name: "not found", + artifactId: "unknown-jar", + want: nil, + }, + { + name: "found another mapping", + artifactId: "jackson-core", + want: &JarMapping{Jar: "jackson-core", Owner: "FasterXML", Repo: "jackson-core", Path: "src/main/java"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := service.FindJarMapping(tt.artifactId) + if tt.want == nil { + if got != nil { + t.Errorf("FindJarMapping() = %v, want nil", got) + } + return + } + if got == nil { + t.Errorf("FindJarMapping() = nil, want %v", tt.want) + return + } + if got.Jar != tt.want.Jar || got.Owner != tt.want.Owner || got.Repo != tt.want.Repo || got.Path != tt.want.Path { + t.Errorf("FindJarMapping() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestMergeMappings(t *testing.T) { + tests := []struct { + name string + existing []config.MappingConfig + new []config.MappingConfig + want int + }{ + { + name: "no duplicates", + existing: []config.MappingConfig{}, + new: []config.MappingConfig{ + { + FunctionName: []config.Match{{Prefix: "org/springframework"}}, + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "spring-projects", + Repo: "spring-framework", + Ref: "v5.3.20", + }, + }, + }, + }, + want: 1, + }, + { + name: "with duplicates", + existing: []config.MappingConfig{ + { + FunctionName: []config.Match{{Prefix: "org/springframework"}}, + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "spring-projects", + Repo: "spring-framework", + Ref: "v5.3.20", + }, + }, + }, + }, + new: []config.MappingConfig{ + { + FunctionName: []config.Match{{Prefix: "org/springframework"}}, + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "spring-projects", + Repo: "spring-framework", + Ref: "v5.3.20", + }, + }, + }, + }, + want: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := MergeMappings(tt.existing, tt.new) + if len(got) != tt.want { + t.Errorf("MergeMappings() returned %d mappings, want %d", len(got), tt.want) + } + }) + } +} + +func TestMappingsEqual(t *testing.T) { + tests := []struct { + name string + m1 config.MappingConfig + m2 config.MappingConfig + want bool + }{ + { + name: "equal mappings", + m1: config.MappingConfig{ + FunctionName: []config.Match{{Prefix: "org/springframework"}}, + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "spring-projects", + Repo: "spring-framework", + Ref: "v5.3.20", + }, + }, + }, + m2: config.MappingConfig{ + FunctionName: []config.Match{{Prefix: "org/springframework"}}, + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "spring-projects", + Repo: "spring-framework", + Ref: "v5.3.20", + }, + }, + }, + want: true, + }, + { + name: "different ref", + m1: config.MappingConfig{ + FunctionName: []config.Match{{Prefix: "org/springframework"}}, + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "spring-projects", + Repo: "spring-framework", + Ref: "v5.3.20", + }, + }, + }, + m2: config.MappingConfig{ + FunctionName: []config.Match{{Prefix: "org/springframework"}}, + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "spring-projects", + Repo: "spring-framework", + Ref: "v5.3.21", + }, + }, + }, + want: false, + }, + { + name: "no matching prefix", + m1: config.MappingConfig{ + FunctionName: []config.Match{{Prefix: "org/springframework"}}, + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "spring-projects", + Repo: "spring-framework", + Ref: "v5.3.20", + }, + }, + }, + m2: config.MappingConfig{ + FunctionName: []config.Match{{Prefix: "com/example"}}, + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "spring-projects", + Repo: "spring-framework", + Ref: "v5.3.20", + }, + }, + }, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := mappingsEqual(tt.m1, tt.m2) + if got != tt.want { + t.Errorf("mappingsEqual() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestSortMappings(t *testing.T) { + mappings := []config.MappingConfig{ + { + FunctionName: []config.Match{{Prefix: "com/example"}}, + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "zorg", + Repo: "zrepo", + Ref: "v1.0.0", + }, + }, + }, + { + FunctionName: []config.Match{{Prefix: "org/springframework"}}, + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "apache", + Repo: "tomcat", + Ref: "v9.0.0", + }, + }, + }, + { + FunctionName: []config.Match{{Prefix: "org/springframework"}}, + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "apache", + Repo: "tomcat", + Ref: "v8.0.0", + }, + }, + }, + } + + SortMappings(mappings) + + if mappings[0].Source.GitHub.Owner != "apache" { + t.Errorf("SortMappings() first mapping owner = %q, want %q", mappings[0].Source.GitHub.Owner, "apache") + } +} + +func TestJARExtractor_ExtractThirdPartyJARs(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "jar-test-*") + if err != nil { + t.Fatalf("Failed to create temp dir: %v", err) + } + defer os.RemoveAll(tmpDir) + + jarPath := filepath.Join(tmpDir, "app.jar") + file, err := os.Create(jarPath) + if err != nil { + t.Fatalf("Failed to create JAR file: %v", err) + } + defer file.Close() + + writer := zip.NewWriter(file) + + nestedJARs := []string{ + "BOOT-INF/lib/spring-web-5.3.20.jar", + "BOOT-INF/lib/jackson-core-2.13.3.jar", + "BOOT-INF/lib/other.jar", + } + + for _, jarName := range nestedJARs { + f, err := writer.Create(jarName) + if err != nil { + t.Fatalf("Failed to create nested JAR: %v", err) + } + _, err = f.Write([]byte("fake jar content")) + if err != nil { + t.Fatalf("Failed to write: %v", err) + } + } + + writer.Close() + file.Close() + + extractor := &JARExtractor{} + jars, tmpDir2, cleanup, err := extractor.ExtractThirdPartyJARs(jarPath) + if err != nil { + if strings.Contains(err.Error(), "executable file not found") { + t.Skip("jar command not available") + } + t.Fatalf("ExtractThirdPartyJARs() error = %v", err) + } + defer cleanup() //nolint:errcheck + + if len(jars) == 0 { + t.Error("ExtractThirdPartyJARs() returned no JARs, expected some") + } + + for _, jar := range jars { + if _, err := os.Stat(jar); os.IsNotExist(err) { + t.Errorf("ExtractThirdPartyJARs() extracted JAR does not exist: %s", jar) + } + } + + if err := cleanup(); err != nil { + t.Errorf("cleanup() error = %v", err) + } + + if _, err := os.Stat(tmpDir2); !os.IsNotExist(err) { + t.Errorf("cleanup() did not remove temp directory: %s", tmpDir2) + } +} + +func TestSortMappings_EmptyMappings(t *testing.T) { + mappings := []config.MappingConfig{} + SortMappings(mappings) + if len(mappings) != 0 { + t.Errorf("SortMappings() changed length of empty slice") + } +} + +func TestSortMappings_NilGitHub(t *testing.T) { + mappings := []config.MappingConfig{ + { + FunctionName: []config.Match{{Prefix: "com/example"}}, + Source: config.Source{GitHub: nil}, + }, + { + FunctionName: []config.Match{{Prefix: "org/springframework"}}, + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "spring-projects", + Repo: "spring-framework", + Ref: "v5.3.20", + }, + }, + }, + } + + SortMappings(mappings) + if len(mappings) != 2 { + t.Errorf("SortMappings() changed length of slice") + } +} + +func TestMappingsEqual_NilGitHub(t *testing.T) { + m1 := config.MappingConfig{ + FunctionName: []config.Match{{Prefix: "org/springframework"}}, + Source: config.Source{GitHub: nil}, + } + m2 := config.MappingConfig{ + FunctionName: []config.Match{{Prefix: "org/springframework"}}, + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: "spring-projects", + Repo: "spring-framework", + Ref: "v5.3.20", + }, + }, + } + + if mappingsEqual(m1, m2) { + t.Error("mappingsEqual() should return false when one mapping has nil GitHub") + } +} + +func TestDefaultCommandRunner_RunCommand(t *testing.T) { + runner := &DefaultCommandRunner{} + + output, err := runner.RunCommand("echo", "test") + if err != nil { + t.Skipf("RunCommand() error = %v (command may not be available)", err) + } + + if !strings.Contains(string(output), "test") { + t.Errorf("RunCommand() output = %q, want to contain 'test'", string(output)) + } +} + +func TestParsePOMProperties(t *testing.T) { + tests := []struct { + name string + data string + want *MavenCoordinates + }{ + { + name: "standard properties", + data: `#Generated by Maven +groupId=org.apache.spark +artifactId=spark-core_2.13 +version=4.0.1 +`, + want: &MavenCoordinates{ + GroupID: "org.apache.spark", + ArtifactID: "spark-core_2.13", + Version: "4.0.1", + }, + }, + { + name: "no comments", + data: `groupId=com.example +artifactId=myapp +version=1.0.0`, + want: &MavenCoordinates{ + GroupID: "com.example", + ArtifactID: "myapp", + Version: "1.0.0", + }, + }, + { + name: "with extra whitespace", + data: `groupId = org.test +artifactId = test-lib +version = 2.0.0 +`, + want: &MavenCoordinates{ + GroupID: "org.test", + ArtifactID: "test-lib", + Version: "2.0.0", + }, + }, + { + name: "empty file", + data: "", + want: &MavenCoordinates{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := parsePOMProperties(tt.data) + if got.GroupID != tt.want.GroupID { + t.Errorf("parsePOMProperties() groupId = %q, want %q", got.GroupID, tt.want.GroupID) + } + if got.ArtifactID != tt.want.ArtifactID { + t.Errorf("parsePOMProperties() artifactId = %q, want %q", got.ArtifactID, tt.want.ArtifactID) + } + if got.Version != tt.want.Version { + t.Errorf("parsePOMProperties() version = %q, want %q", got.Version, tt.want.Version) + } + }) + } +} + +func TestJdkVersionToMajorVersion(t *testing.T) { + tests := []struct { + jdkVersion string + want int + }{ + {"8", 52}, + {"11", 55}, + {"17", 61}, + {"21", 65}, + {"99", 0}, // Unknown version + } + + for _, tt := range tests { + t.Run(tt.jdkVersion, func(t *testing.T) { + got := jdkVersionToMajorVersion(tt.jdkVersion) + if got != tt.want { + t.Errorf("jdkVersionToMajorVersion(%q) = %d, want %d", tt.jdkVersion, got, tt.want) + } + }) + } +} + +func TestIsShadedPackage(t *testing.T) { + tests := []struct { + pkg string + want bool + }{ + {"io/pyroscope/vendor/okhttp3", true}, + {"io/pyroscope/vendor/com/google/protobuf", true}, + {"com/google/common/shaded/collect", true}, + {"org/apache/spark/internal/shaded/io", true}, + {"org/example/repackaged/guava", true}, + {"org/example/relocated/jackson", true}, + {"com/example/thirdparty/lib", true}, + {"io/pyroscope/javaagent", false}, + {"io/pyroscope/labels", false}, + {"org/springframework/web", false}, + {"com/google/common/collect", false}, + } + + for _, tt := range tests { + t.Run(tt.pkg, func(t *testing.T) { + got := isShadedPackage(tt.pkg) + if got != tt.want { + t.Errorf("isShadedPackage(%q) = %v, want %v", tt.pkg, got, tt.want) + } + }) + } +} + +func TestExtractMavenCoordinatesFromPath(t *testing.T) { + tests := []struct { + name string + path string + want *MavenCoordinates + wantErr bool + }{ + { + name: "Bazel Maven path - pyroscope agent", + path: "/some/path/.runfiles/rules_jvm_external++maven+maven/v1/https/repo1.maven.org/maven2/io/pyroscope/agent/2.1.2/processed_agent-2.1.2.jar", + want: &MavenCoordinates{ + GroupID: "io.pyroscope", + ArtifactID: "agent", + Version: "2.1.2", + }, + }, + { + name: "Bazel Maven path - guava", + path: "/path/maven2/com/google/guava/guava/31.1-jre/guava-31.1-jre.jar", + want: &MavenCoordinates{ + GroupID: "com.google.guava", + ArtifactID: "guava", + Version: "31.1-jre", + }, + }, + { + name: "Bazel Maven path - single segment groupId", + path: "/path/maven2/junit/junit/4.13.2/junit-4.13.2.jar", + want: &MavenCoordinates{ + GroupID: "junit", + ArtifactID: "junit", + Version: "4.13.2", + }, + }, + { + name: "No maven2 in path", + path: "/regular/path/to/some-lib-1.0.0.jar", + want: &MavenCoordinates{}, + }, + { + name: "Path too short after maven2", + path: "/path/maven2/incomplete/path.jar", + want: &MavenCoordinates{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := extractMavenCoordinatesFromPath(tt.path) + if got.GroupID != tt.want.GroupID { + t.Errorf("extractMavenCoordinatesFromPath() groupId = %q, want %q", got.GroupID, tt.want.GroupID) + } + if got.ArtifactID != tt.want.ArtifactID { + t.Errorf("extractMavenCoordinatesFromPath() artifactId = %q, want %q", got.ArtifactID, tt.want.ArtifactID) + } + if got.Version != tt.want.Version { + t.Errorf("extractMavenCoordinatesFromPath() version = %q, want %q", got.Version, tt.want.Version) + } + }) + } +} + +func TestParseNextPageURL(t *testing.T) { + tests := []struct { + name string + linkHeader string + want string + }{ + { + name: "empty header", + linkHeader: "", + want: "", + }, + { + name: "single next link", + linkHeader: `; rel="next"`, + want: "https://api.github.com/repos/apache/spark/tags?page=2", + }, + { + name: "next and last links", + linkHeader: `; rel="next", ; rel="last"`, + want: "https://api.github.com/repos/apache/spark/tags?page=2", + }, + { + name: "prev and last links only (no next)", + linkHeader: `; rel="prev", ; rel="last"`, + want: "", + }, + { + name: "first, prev, next, last links", + linkHeader: `; rel="first", ; rel="prev", ; rel="next", ; rel="last"`, + want: "https://api.github.com/repos/apache/spark/tags?page=6", + }, + { + name: "malformed link (missing angle brackets)", + linkHeader: `https://api.github.com/repos/apache/spark/tags?page=2; rel="next"`, + want: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := parseNextPageURL(tt.linkHeader) + if got != tt.want { + t.Errorf("parseNextPageURL() = %q, want %q", got, tt.want) + } + }) + } +} + +func createTestJAR(t *testing.T, jarPath string, files map[string]string) { + file, err := os.Create(jarPath) + if err != nil { + t.Fatalf("Failed to create JAR file: %v", err) + } + defer file.Close() + + writer := zip.NewWriter(file) + defer writer.Close() + + for path, content := range files { + f, err := writer.Create(path) + if err != nil { + t.Fatalf("Failed to create file in JAR: %v", err) + } + _, err = f.Write([]byte(content)) + if err != nil { + t.Fatalf("Failed to write file content: %v", err) + } + } +} diff --git a/tools/source-code-mapping-generators/java-jar-mapper/mapping.go b/tools/source-code-mapping-generators/java-jar-mapper/mapping.go new file mode 100644 index 0000000000..932c246bc5 --- /dev/null +++ b/tools/source-code-mapping-generators/java-jar-mapper/mapping.go @@ -0,0 +1,326 @@ +package main + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + + "github.com/grafana/pyroscope/pkg/frontend/vcs/config" +) + +// Processor processes individual JAR files to generate mapping configurations. +type Processor struct { + jarAnalyzer *JARAnalyzer + githubResolver *GitHubResolver + configService *ConfigService + githubTagService *GitHubTagService +} + +// NewProcessor creates a new Processor. +func NewProcessor( + jarAnalyzer *JARAnalyzer, + githubResolver *GitHubResolver, + configService *ConfigService, + githubTagService *GitHubTagService, +) *Processor { + return &Processor{ + jarAnalyzer: jarAnalyzer, + githubResolver: githubResolver, + configService: configService, + githubTagService: githubTagService, + } +} + +func (p *Processor) ProcessJAR(jarPath string) (*config.MappingConfig, error) { + prefixes, err := p.jarAnalyzer.ExtractClassPrefixes(jarPath) + if err != nil { + return nil, fmt.Errorf("failed to extract class names: %w", err) + } + if len(prefixes) == 0 { + return nil, fmt.Errorf("no common prefixes found in class names") + } + + coords, err := p.jarAnalyzer.ExtractMavenCoordinates(jarPath) + if err != nil { + return nil, err + } + + fmt.Fprintf(os.Stderr, "Processing JAR: %s\n", filepath.Base(jarPath)) + fmt.Fprintf(os.Stderr, " Coordinates: %s:%s:%s\n", coords.GroupID, coords.ArtifactID, coords.Version) + + owner, repo, err := p.githubResolver.ResolveRepo(coords) + if err != nil { + return nil, err + } + + if owner == "" || repo == "" { + return nil, fmt.Errorf("failed to resolve GitHub repo for %s", coords.ArtifactID) + } + + ref := p.determineRef(owner, repo, coords.Version) + + sourcePath := determineSourcePath(coords.ArtifactID) + + // Check for hardcoded path override + if p.configService != nil { + if mapping := p.configService.FindJarMapping(coords.ArtifactID); mapping != nil && mapping.Path != "" { + sourcePath = mapping.Path + } + } + + return p.buildMappingConfig(prefixes, owner, repo, ref, sourcePath), nil +} + +func (p *Processor) ProcessJARWithCoords(jarPath string, coords *MavenCoordinates) (*config.MappingConfig, error) { + prefixes, err := p.jarAnalyzer.ExtractClassPrefixes(jarPath) + if err != nil { + return nil, fmt.Errorf("failed to extract class names: %w", err) + } + if len(prefixes) == 0 { + return nil, fmt.Errorf("no common prefixes found in class names") + } + + fmt.Fprintf(os.Stderr, "Processing JAR: %s\n", filepath.Base(jarPath)) + fmt.Fprintf(os.Stderr, " Coordinates: %s:%s:%s\n", coords.GroupID, coords.ArtifactID, coords.Version) + + owner, repo, err := p.githubResolver.ResolveRepo(coords) + if err != nil { + return nil, err + } + + if owner == "" || repo == "" { + return nil, fmt.Errorf("failed to resolve GitHub repo for %s", coords.ArtifactID) + } + + ref := p.determineRef(owner, repo, coords.Version) + sourcePath := determineSourcePath(coords.ArtifactID) + + if p.configService != nil { + if mapping := p.configService.FindJarMapping(coords.ArtifactID); mapping != nil && mapping.Path != "" { + sourcePath = mapping.Path + } + } + + return p.buildMappingConfig(prefixes, owner, repo, ref, sourcePath), nil +} + +func (p *Processor) buildMappingConfig(prefixes []string, owner, repo, ref, sourcePath string) *config.MappingConfig { + mapping := &config.MappingConfig{ + FunctionName: make([]config.Match, len(prefixes)), + Language: "java", + Source: config.Source{ + GitHub: &config.GitHubMappingConfig{ + Owner: owner, + Repo: repo, + Ref: ref, + Path: sourcePath, + }, + }, + } + + sortedPrefixes := make([]string, len(prefixes)) + copy(sortedPrefixes, prefixes) + sort.Strings(sortedPrefixes) + + for i, prefix := range sortedPrefixes { + mapping.FunctionName[i] = config.Match{Prefix: prefix} + } + + return mapping +} + +func (p *Processor) determineRef(owner, repo, version string) string { + if p.githubTagService == nil { + return version + } + + ref, err := p.githubTagService.FindTagForVersion(owner, repo, version) + if err != nil { + fmt.Fprintf(os.Stderr, " Warning: failed to query GitHub tags for %s/%s: %v\n", owner, repo, err) + return version + } + + fmt.Fprintf(os.Stderr, " Found GitHub tag: %s (for version %s)\n", ref, version) + return ref +} + +// determineSourcePath determines the source path for Java code. +// Since path structures vary widely across projects, we use conservative defaults: +// - Simple artifactIds: src/main/java (standard Maven layout) +// - Multi-module projects: empty string (search from repo root) +// Known projects should use jar-mappings.json for correct paths. +func determineSourcePath(artifactId string) string { + // Strip version suffixes (e.g., "_2.13", "_3", "_1.0") + cleanArtifactId := stripVersionSuffix(artifactId) + + // For simple artifactIds without hyphens, use standard Maven src path + if !strings.Contains(cleanArtifactId, "-") { + return "src/main/java" + } + + // For multi-module projects, we can't reliably determine the path + // because naming conventions vary widely: + // - spark-core -> core/src/main/java (strips project prefix) + // - spring-web -> spring-web/src/main/java (keeps full name) + // - jackson-core -> src/main/java (single module at root) + // + // Return empty path to search from repo root + return "" +} + +// stripVersionSuffix removes version-like suffixes from artifactId. +// This handles Scala version suffixes (_2.13, _3) and similar patterns. +// Pattern: underscore followed by a version number (e.g., _2.13, _3, _1.0) +var versionSuffixPattern = regexp.MustCompile(`_\d+(\.\d+)*$`) + +func stripVersionSuffix(artifactId string) string { + return versionSuffixPattern.ReplaceAllString(artifactId, "") +} + +// MappingService orchestrates the mapping generation process. +type MappingService struct { + processor *Processor + jarExtractor *JARExtractor +} + +func NewMappingService(processor *Processor, jarExtractor *JARExtractor) *MappingService { + return &MappingService{ + processor: processor, + jarExtractor: jarExtractor, + } +} + +// ProcessJAR processes a JAR file and returns all mappings. +func (s *MappingService) ProcessJAR(jarPath string) ([]config.MappingConfig, error) { + thirdPartyJARs, _, cleanup, err := s.jarExtractor.ExtractThirdPartyJARs(jarPath) + if err != nil { + return nil, fmt.Errorf("failed to extract 3rd party JARs: %w", err) + } + defer cleanup() //nolint:errcheck + + fmt.Fprintf(os.Stderr, "Found %d JAR(s) to process\n", len(thirdPartyJARs)) + + var mappings []config.MappingConfig + successCount := 0 + failCount := 0 + + for _, jarFile := range thirdPartyJARs { + mapping, err := s.processor.ProcessJAR(jarFile) + if err != nil { + fmt.Fprintf(os.Stderr, "✗ Skipping %s: %v\n", filepath.Base(jarFile), err) + failCount++ + continue + } + + if mapping != nil { + mappings = append(mappings, *mapping) + fmt.Fprintf(os.Stderr, "✓ Successfully mapped %s to %s/%s\n", + filepath.Base(jarFile), + mapping.Source.GitHub.Owner, + mapping.Source.GitHub.Repo) + successCount++ + } else { + failCount++ + } + } + + fmt.Fprintf(os.Stderr, "\nProcessed %d JARs: %d successful, %d failed\n", + len(thirdPartyJARs), successCount, failCount) + + return mappings, nil +} + +// MergeMappings merges new mappings into existing ones, avoiding duplicates. +func MergeMappings(existing, new []config.MappingConfig) []config.MappingConfig { + result := make([]config.MappingConfig, 0, len(existing)+len(new)) + result = append(result, existing...) + + for _, newMapping := range new { + isDuplicate := false + for _, existingMapping := range existing { + if mappingsEqual(newMapping, existingMapping) { + isDuplicate = true + break + } + } + if !isDuplicate { + result = append(result, newMapping) + } + } + + return result +} + +// mappingsEqual checks if two mappings are effectively the same. +func mappingsEqual(m1, m2 config.MappingConfig) bool { + if m1.Source.GitHub == nil || m2.Source.GitHub == nil { + return false + } + + gh1, gh2 := m1.Source.GitHub, m2.Source.GitHub + if gh1.Owner != gh2.Owner || gh1.Repo != gh2.Repo || gh1.Ref != gh2.Ref { + return false + } + + for _, fn1 := range m1.FunctionName { + for _, fn2 := range m2.FunctionName { + if fn1.Prefix == fn2.Prefix { + return true + } + } + } + + return false +} + +// SortMappings sorts mappings to ensure deterministic output order. +func SortMappings(mappings []config.MappingConfig) { + sort.Slice(mappings, func(i, j int) bool { + mi, mj := mappings[i], mappings[j] + + ownerI, ownerJ := "", "" + if mi.Source.GitHub != nil { + ownerI = mi.Source.GitHub.Owner + } + if mj.Source.GitHub != nil { + ownerJ = mj.Source.GitHub.Owner + } + if ownerI != ownerJ { + return ownerI < ownerJ + } + + repoI, repoJ := "", "" + if mi.Source.GitHub != nil { + repoI = mi.Source.GitHub.Repo + } + if mj.Source.GitHub != nil { + repoJ = mj.Source.GitHub.Repo + } + if repoI != repoJ { + return repoI < repoJ + } + + refI, refJ := "", "" + if mi.Source.GitHub != nil { + refI = mi.Source.GitHub.Ref + } + if mj.Source.GitHub != nil { + refJ = mj.Source.GitHub.Ref + } + if refI != refJ { + return refI < refJ + } + + prefixI, prefixJ := "", "" + if len(mi.FunctionName) > 0 { + prefixI = mi.FunctionName[0].Prefix + } + if len(mj.FunctionName) > 0 { + prefixJ = mj.FunctionName[0].Prefix + } + return prefixI < prefixJ + }) +} diff --git a/tools/source-code-mapping-generators/java-jar-mapper/services.go b/tools/source-code-mapping-generators/java-jar-mapper/services.go new file mode 100644 index 0000000000..40e2c9e1cf --- /dev/null +++ b/tools/source-code-mapping-generators/java-jar-mapper/services.go @@ -0,0 +1,270 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "strings" + "time" +) + +type HTTPClient interface { + Get(url string) (*http.Response, error) +} + +type DefaultHTTPClient struct { + client *http.Client +} + +func NewHTTPClient() HTTPClient { + return &DefaultHTTPClient{ + client: &http.Client{ + Timeout: 10 * time.Second, + }, + } +} + +func (c *DefaultHTTPClient) Get(url string) (*http.Response, error) { + return c.client.Get(url) +} + +// MavenService handles downloading JAR files from Maven Central. +type MavenService struct { + client HTTPClient +} + +func NewMavenService(client HTTPClient) *MavenService { + if client == nil { + client = NewHTTPClient() + } + return &MavenService{client: client} +} + +func (s *MavenService) FetchJAR(groupId, artifactId, version string) ([]byte, error) { + groupIdPath := strings.ReplaceAll(groupId, ".", "/") + urlStr := fmt.Sprintf("https://repo1.maven.org/maven2/%s/%s/%s/%s-%s.jar", + groupIdPath, artifactId, version, artifactId, version) + + resp, err := s.client.Get(urlStr) + if err != nil { + return nil, fmt.Errorf("HTTP request failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP %d", resp.StatusCode) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read response: %w", err) + } + return data, nil +} + +// DepsDevService handles querying the deps.dev API for GitHub repository information. +type DepsDevService struct { + client HTTPClient +} + +func NewDepsDevService(client HTTPClient) *DepsDevService { + if client == nil { + client = NewHTTPClient() + } + return &DepsDevService{client: client} +} + +// DepsDevVersionResponse represents the response from deps.dev API. +type DepsDevVersionResponse struct { + Links []struct { + Label string `json:"label"` + URL string `json:"url"` + } `json:"links"` +} + +// ResolveRepo queries deps.dev API to find the GitHub repository for a Maven artifact. +func (s *DepsDevService) ResolveRepo(groupId, artifactId, version string) (owner, repo string, err error) { + packageKey := fmt.Sprintf("%s:%s", groupId, artifactId) + apiURL := fmt.Sprintf("https://api.deps.dev/v3/systems/maven/packages/%s/versions/%s", + url.PathEscape(packageKey), url.PathEscape(version)) + + resp, err := s.client.Get(apiURL) + if err != nil { + return "", "", fmt.Errorf("deps.dev API request failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", "", fmt.Errorf("deps.dev API returned HTTP %d", resp.StatusCode) + } + + var depsDevResp DepsDevVersionResponse + if err := json.NewDecoder(resp.Body).Decode(&depsDevResp); err != nil { + return "", "", fmt.Errorf("failed to parse deps.dev response: %w", err) + } + + // Look for SOURCE_REPO link with GitHub URL + for _, link := range depsDevResp.Links { + if strings.Contains(link.URL, "github.com") { + owner, repo, err := ExtractGitHubRepoFromURL(link.URL) + if err == nil { + return owner, repo, nil + } + } + } + + return "", "", fmt.Errorf("no GitHub repository found in deps.dev response") +} + +// GitHubTagService handles querying GitHub API for tags. +type GitHubTagService struct { + client HTTPClient + authToken string +} + +func NewGitHubTagService(client HTTPClient) *GitHubTagService { + if client == nil { + client = NewHTTPClient() + } + authToken := os.Getenv("GITHUB_TOKEN") + return &GitHubTagService{ + client: client, + authToken: authToken, + } +} + +type GitHubTag struct { + Name string `json:"name"` +} + +func (s *GitHubTagService) FindTagForVersion(owner, repo, version string) (string, error) { + versionNormalized := strings.TrimPrefix(version, "v") + hasVPrefix := strings.HasPrefix(version, "v") + + // Build candidates list prioritizing exact match + var candidates []string + if hasVPrefix { + candidates = []string{version, versionNormalized} + } else { + candidates = []string{version, "v" + versionNormalized} + } + + makeRequest := func(urlStr string) (*http.Response, error) { + req, err := http.NewRequest("GET", urlStr, nil) + if err != nil { + return nil, err + } + if s.authToken != "" { + req.Header.Set("Authorization", fmt.Sprintf("token %s", s.authToken)) + } + req.Header.Set("User-Agent", "pyroscope-jar-mapper") + if defaultClient, ok := s.client.(*DefaultHTTPClient); ok { + return defaultClient.client.Do(req) + } + return s.client.Get(urlStr) + } + + // Try direct lookup for each candidate + for _, candidate := range candidates { + refURL := fmt.Sprintf("https://api.github.com/repos/%s/%s/git/refs/tags/%s", owner, repo, candidate) + resp, err := makeRequest(refURL) + if err == nil { + defer resp.Body.Close() + switch resp.StatusCode { + case http.StatusOK: + var refData struct { + Ref string `json:"ref"` + } + if err := json.NewDecoder(resp.Body).Decode(&refData); err == nil { + if strings.HasPrefix(refData.Ref, "refs/tags/") { + return strings.TrimPrefix(refData.Ref, "refs/tags/"), nil + } + } + case http.StatusForbidden: + return "", fmt.Errorf("GitHub API rate limited. Set GITHUB_TOKEN environment variable") + } + } + } + + // Fallback: fetch tags list with pagination + nextURL := fmt.Sprintf("https://api.github.com/repos/%s/%s/tags?per_page=100", owner, repo) + + for nextURL != "" { + resp, err := makeRequest(nextURL) + if err != nil { + return "", fmt.Errorf("failed to query GitHub API: %w", err) + } + + if resp.StatusCode == http.StatusForbidden { + resp.Body.Close() + return "", fmt.Errorf("GitHub API rate limited. Set GITHUB_TOKEN environment variable") + } + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + return "", fmt.Errorf("GitHub API returned HTTP %d", resp.StatusCode) + } + + var tags []GitHubTag + if err := json.NewDecoder(resp.Body).Decode(&tags); err != nil { + resp.Body.Close() + return "", fmt.Errorf("failed to parse GitHub API response: %w", err) + } + + // Try exact matches + for _, candidate := range candidates { + for _, tag := range tags { + if tag.Name == candidate { + resp.Body.Close() + return tag.Name, nil + } + } + } + + // Try normalized matching + for _, tag := range tags { + tagNormalized := strings.TrimPrefix(tag.Name, "v") + if tagNormalized == versionNormalized { + resp.Body.Close() + return tag.Name, nil + } + } + + nextURL = parseNextPageURL(resp.Header.Get("Link")) + resp.Body.Close() + } + + // Fallback: return version with "v" prefix + return "v" + versionNormalized, nil +} + +// parseNextPageURL extracts the "next" URL from a GitHub Link header. +// Example: ; rel="next", <...>; rel="last" +func parseNextPageURL(linkHeader string) string { + if linkHeader == "" { + return "" + } + + // Split by comma to get individual links + links := strings.Split(linkHeader, ",") + for _, link := range links { + parts := strings.Split(strings.TrimSpace(link), ";") + if len(parts) < 2 { + continue + } + + // Check if this is the "next" link + relPart := strings.TrimSpace(parts[1]) + if relPart == `rel="next"` { + // Extract URL from angle brackets + urlPart := strings.TrimSpace(parts[0]) + if strings.HasPrefix(urlPart, "<") && strings.HasSuffix(urlPart, ">") { + return urlPart[1 : len(urlPart)-1] + } + } + } + + return "" +}