Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 179 additions & 0 deletions cli/internal/providers/transformations/parser/javascript.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
package parser

import (
"fmt"
"regexp"
"strings"

"github.com/evanw/esbuild/pkg/api"
)

// JavaScriptParser extracts imports from JavaScript code using esbuild
type JavaScriptParser struct{}

// ValidateSyntax validates JavaScript code syntax using esbuild
func (p *JavaScriptParser) ValidateSyntax(code string) error {
result := api.Transform(code, api.TransformOptions{
Loader: api.LoaderJS,
})
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't there an option regarding validating against a specific ECMAScript ?
I mean what we are supporting, we will only validate against those core JS conditionals and types right ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

some v0 transformation follow commonjs format. we need to parse both commonjs and ecmascript


if len(result.Errors) > 0 {
// Collect all error messages
var errorMsgs []string
for _, err := range result.Errors {
errorMsgs = append(errorMsgs, err.Text)
}
return fmt.Errorf("javascript syntax error: %s", strings.Join(errorMsgs, "; "))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return fmt.Errorf("javascript syntax error: %s", strings.Join(errorMsgs, "; "))
return fmt.Errorf("javascript syntax error: \n\t%s", strings.Join(errorMsgs, "\n\t"))

This might give better readability, wdyt ?

}

return nil
}

// ExtractImports parses JavaScript code and returns external library import names.
// Returns an error if the code contains:
// - require() syntax (CommonJS not supported)
// - Relative imports (./file, ../file)
// - Absolute imports (/path)
// Filters out RudderStack built-in libraries (@rs/<library>/<version>)
func (p *JavaScriptParser) ExtractImports(code string) ([]string, error) {
result := api.Transform(code, api.TransformOptions{
Loader: api.LoaderJS,
Format: api.FormatESModule,
})

if len(result.Errors) > 0 {
var errorMsgs []string
for _, err := range result.Errors {
errorMsgs = append(errorMsgs, err.Text)
}
return nil, fmt.Errorf("extracting imports: %s", strings.Join(errorMsgs, "; "))
}

transformedCode := string(result.Code)

// Validate no require() in transformed code
if err := validateNoRequire(transformedCode); err != nil {
return nil, err
}

// Extract all import statements
imports := extractAllImports(transformedCode)

// Validate and filter imports
libraryImports := make([]string, 0)
for imp := range imports {
if isRelativeOrAbsoluteImport(imp) {
return nil, fmt.Errorf("relative imports (./file, ../file) and absolute imports (/path) are not supported")
}
if isExternalLibraryImport(imp) {
libraryImports = append(libraryImports, imp)
}
}

return libraryImports, nil
}

// validateNoRequire checks if transformed code contains require() calls
func validateNoRequire(code string) error {
i := 0
for i < len(code) {
idx := strings.Index(code[i:], "require")
if idx == -1 {
break
}

pos := i + idx
i = pos + 7 // Move past "require"

// Check if followed by ( with optional whitespace
for j := pos + 7; j < len(code); j++ {
ch := code[j]
if ch == ' ' || ch == '\t' || ch == '\n' {
continue
}
if ch == '(' {
return fmt.Errorf("require() syntax is not supported")
}
break
}
}
return nil
}

// extractAllImports extracts all import paths from esbuild-transformed code
func extractAllImports(code string) map[string]bool {
imports := make(map[string]bool)

i := 0
for i < len(code) {
idx := strings.Index(code[i:], "import")
if idx == -1 {
break
}

pos := i + idx
i = pos + 1

// Extract module name from import statement
remaining := code[pos+6:] // Skip "import"

// Look for "from" keyword
fromIdx := strings.Index(remaining, "from")
if fromIdx != -1 {
remaining = remaining[fromIdx+4:]
}

if moduleName := extractQuotedString(remaining); moduleName != "" {
imports[moduleName] = true
}
}

return imports
}

// extractQuotedString extracts the first quoted string from text
func extractQuotedString(text string) string {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like a very complex function, can you explain the values which this function can receive and can we simplify it's complexity ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does miss detecting first and last quotes as same ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used claude to try and simplify the function implementation for a bit more readable and I think it generated the output in accordance the expectations:

package parser

import (
	"regexp"
	"strings"
)

type JavaScriptParser struct{}

// ExtractImports extracts all non-relative library imports from JavaScript code
func (p *JavaScriptParser) ExtractImports(code string) []string {
	// Step 1: Remove comments to avoid false matches
	code = removeComments(code)
	
	// Step 2: Extract all import statements
	imports := extractAllImports(code)
	
	// Step 3: Filter out relative/absolute paths
	imports = filterNonRelativeImports(imports)
	
	// Step 4: Deduplicate
	return deduplicateImports(imports)
}

with each implementation downstream ...

// Skip whitespace and opening parenthesis
start := 0
for start < len(text) && (text[start] == ' ' || text[start] == '\t' || text[start] == '\n' || text[start] == '(') {
start++
}

if start >= len(text) {
return ""
}

// Find opening quote
quote := text[start]
if quote != '"' && quote != '\'' && quote != '`' {
return ""
}

// Find closing quote
for i := start + 1; i < len(text); i++ {
if text[i] == quote {
return text[start+1 : i]
}
}

return ""
}

// isRelativeOrAbsoluteImport checks if import is relative (./file, ../file) or absolute (/path)
func isRelativeOrAbsoluteImport(path string) bool {
return strings.HasPrefix(path, "./") || strings.HasPrefix(path, "../") || strings.HasPrefix(path, "/")
}

// rudderStackLibraryPattern matches RudderStack built-in library imports
// Pattern: @rs/<library>/v<digits>
// Examples: @rs/hash/v1, @rs/utils/v2, @rs/crypto/v10
var rudderStackLibraryPattern = regexp.MustCompile(`^@rs/[^/]+/v\d+$`)

func isRudderStackLibrary(path string) bool {
return rudderStackLibraryPattern.MatchString(path)
}

// isExternalLibraryImport checks if import is an external library (not RudderStack built-in)
func isExternalLibraryImport(path string) bool {
return path != "" && !isRudderStackLibrary(path)
}
Loading