-
Notifications
You must be signed in to change notification settings - Fork 1
feat: javascript code parser for transformations #337
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: chore.transformation-spec
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,179 @@ | ||||||
| package parser | ||||||
|
|
||||||
| import ( | ||||||
| "fmt" | ||||||
| "regexp" | ||||||
| "strings" | ||||||
|
|
||||||
| "github.com/evanw/esbuild/pkg/api" | ||||||
| ) | ||||||
|
|
||||||
| // JavaScriptParser extracts imports from JavaScript code using esbuild | ||||||
| type JavaScriptParser struct{} | ||||||
|
|
||||||
| // ValidateSyntax validates JavaScript code syntax using esbuild | ||||||
| func (p *JavaScriptParser) ValidateSyntax(code string) error { | ||||||
| result := api.Transform(code, api.TransformOptions{ | ||||||
| Loader: api.LoaderJS, | ||||||
| }) | ||||||
|
|
||||||
| if len(result.Errors) > 0 { | ||||||
| // Collect all error messages | ||||||
| var errorMsgs []string | ||||||
| for _, err := range result.Errors { | ||||||
| errorMsgs = append(errorMsgs, err.Text) | ||||||
| } | ||||||
| return fmt.Errorf("javascript syntax error: %s", strings.Join(errorMsgs, "; ")) | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
This might give better readability, wdyt ? |
||||||
| } | ||||||
|
|
||||||
| return nil | ||||||
| } | ||||||
|
|
||||||
| // ExtractImports parses JavaScript code and returns external library import names. | ||||||
| // Returns an error if the code contains: | ||||||
| // - require() syntax (CommonJS not supported) | ||||||
| // - Relative imports (./file, ../file) | ||||||
| // - Absolute imports (/path) | ||||||
| // Filters out RudderStack built-in libraries (@rs/<library>/<version>) | ||||||
| func (p *JavaScriptParser) ExtractImports(code string) ([]string, error) { | ||||||
| result := api.Transform(code, api.TransformOptions{ | ||||||
| Loader: api.LoaderJS, | ||||||
| Format: api.FormatESModule, | ||||||
| }) | ||||||
|
|
||||||
| if len(result.Errors) > 0 { | ||||||
| var errorMsgs []string | ||||||
| for _, err := range result.Errors { | ||||||
| errorMsgs = append(errorMsgs, err.Text) | ||||||
| } | ||||||
| return nil, fmt.Errorf("extracting imports: %s", strings.Join(errorMsgs, "; ")) | ||||||
| } | ||||||
|
|
||||||
| transformedCode := string(result.Code) | ||||||
|
|
||||||
| // Validate no require() in transformed code | ||||||
| if err := validateNoRequire(transformedCode); err != nil { | ||||||
| return nil, err | ||||||
| } | ||||||
|
|
||||||
| // Extract all import statements | ||||||
| imports := extractAllImports(transformedCode) | ||||||
|
|
||||||
| // Validate and filter imports | ||||||
| libraryImports := make([]string, 0) | ||||||
| for imp := range imports { | ||||||
| if isRelativeOrAbsoluteImport(imp) { | ||||||
| return nil, fmt.Errorf("relative imports (./file, ../file) and absolute imports (/path) are not supported") | ||||||
| } | ||||||
| if isExternalLibraryImport(imp) { | ||||||
| libraryImports = append(libraryImports, imp) | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| return libraryImports, nil | ||||||
| } | ||||||
|
|
||||||
| // validateNoRequire checks if transformed code contains require() calls | ||||||
| func validateNoRequire(code string) error { | ||||||
| i := 0 | ||||||
| for i < len(code) { | ||||||
| idx := strings.Index(code[i:], "require") | ||||||
| if idx == -1 { | ||||||
| break | ||||||
| } | ||||||
|
|
||||||
| pos := i + idx | ||||||
| i = pos + 7 // Move past "require" | ||||||
|
|
||||||
| // Check if followed by ( with optional whitespace | ||||||
| for j := pos + 7; j < len(code); j++ { | ||||||
| ch := code[j] | ||||||
| if ch == ' ' || ch == '\t' || ch == '\n' { | ||||||
| continue | ||||||
| } | ||||||
| if ch == '(' { | ||||||
| return fmt.Errorf("require() syntax is not supported") | ||||||
| } | ||||||
| break | ||||||
| } | ||||||
| } | ||||||
| return nil | ||||||
| } | ||||||
|
|
||||||
| // extractAllImports extracts all import paths from esbuild-transformed code | ||||||
| func extractAllImports(code string) map[string]bool { | ||||||
| imports := make(map[string]bool) | ||||||
|
|
||||||
| i := 0 | ||||||
| for i < len(code) { | ||||||
| idx := strings.Index(code[i:], "import") | ||||||
| if idx == -1 { | ||||||
| break | ||||||
| } | ||||||
|
|
||||||
| pos := i + idx | ||||||
| i = pos + 1 | ||||||
|
|
||||||
| // Extract module name from import statement | ||||||
| remaining := code[pos+6:] // Skip "import" | ||||||
|
|
||||||
| // Look for "from" keyword | ||||||
| fromIdx := strings.Index(remaining, "from") | ||||||
| if fromIdx != -1 { | ||||||
| remaining = remaining[fromIdx+4:] | ||||||
| } | ||||||
|
|
||||||
| if moduleName := extractQuotedString(remaining); moduleName != "" { | ||||||
| imports[moduleName] = true | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| return imports | ||||||
| } | ||||||
|
|
||||||
| // extractQuotedString extracts the first quoted string from text | ||||||
| func extractQuotedString(text string) string { | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems like a very complex function, can you explain the values which this function can receive and can we simplify it's complexity ?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does miss detecting first and last quotes as same ?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I used claude to try and simplify the function implementation for a bit more readable and I think it generated the output in accordance the expectations: |
||||||
| // Skip whitespace and opening parenthesis | ||||||
| start := 0 | ||||||
| for start < len(text) && (text[start] == ' ' || text[start] == '\t' || text[start] == '\n' || text[start] == '(') { | ||||||
| start++ | ||||||
| } | ||||||
|
|
||||||
| if start >= len(text) { | ||||||
| return "" | ||||||
| } | ||||||
|
|
||||||
| // Find opening quote | ||||||
| quote := text[start] | ||||||
| if quote != '"' && quote != '\'' && quote != '`' { | ||||||
| return "" | ||||||
| } | ||||||
|
|
||||||
| // Find closing quote | ||||||
| for i := start + 1; i < len(text); i++ { | ||||||
| if text[i] == quote { | ||||||
| return text[start+1 : i] | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| return "" | ||||||
| } | ||||||
|
|
||||||
| // isRelativeOrAbsoluteImport checks if import is relative (./file, ../file) or absolute (/path) | ||||||
| func isRelativeOrAbsoluteImport(path string) bool { | ||||||
| return strings.HasPrefix(path, "./") || strings.HasPrefix(path, "../") || strings.HasPrefix(path, "/") | ||||||
| } | ||||||
|
|
||||||
| // rudderStackLibraryPattern matches RudderStack built-in library imports | ||||||
| // Pattern: @rs/<library>/v<digits> | ||||||
| // Examples: @rs/hash/v1, @rs/utils/v2, @rs/crypto/v10 | ||||||
| var rudderStackLibraryPattern = regexp.MustCompile(`^@rs/[^/]+/v\d+$`) | ||||||
|
|
||||||
| func isRudderStackLibrary(path string) bool { | ||||||
| return rudderStackLibraryPattern.MatchString(path) | ||||||
| } | ||||||
|
|
||||||
| // isExternalLibraryImport checks if import is an external library (not RudderStack built-in) | ||||||
| func isExternalLibraryImport(path string) bool { | ||||||
| return path != "" && !isRudderStackLibrary(path) | ||||||
| } | ||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't there an option regarding validating against a specific ECMAScript ?
I mean what we are supporting, we will only validate against those core JS conditionals and types right ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
some v0 transformation follow commonjs format. we need to parse both commonjs and ecmascript