Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import (
"unicode"

"github.com/PuerkitoBio/goquery"
sitter "github.com/smacker/go-tree-sitter"
"github.com/smacker/go-tree-sitter/javascript"
tree_sitter "github.com/tree-sitter/go-tree-sitter"
tree_sitter_javascript "github.com/tree-sitter/tree-sitter-javascript/bindings/go"
)

// Analyzer could be considered the core type of jsluice. It wraps
Expand All @@ -21,15 +21,25 @@ type Analyzer struct {
// NewAnalyzer accepts a slice of bytes representing some JavaScript
// source code and returns a pointer to a new Analyzer
func NewAnalyzer(source []byte) *Analyzer {
parser := sitter.NewParser()
parser := tree_sitter.NewParser()
defer parser.Close()

parser.SetLanguage(javascript.GetLanguage())
language := tree_sitter.NewLanguage(tree_sitter_javascript.Language())
parser.SetLanguage(language)

if isProbablyHTML(source) {
source = extractInlineJS(source)
}

tree := parser.Parse(nil, source)
tree, err := parser.Parse(source, nil)
if err != nil || tree == nil {
// Return analyzer with empty root node if parsing fails
return &Analyzer{
urlMatchers: AllURLMatchers(),
rootNode: &Node{},
}
}
defer tree.Close()

// TODO: Align how URLMatcher and SecretMatcher slices
// are loaded. At the moment we load URLMatchers now,
Expand Down
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ require (
github.com/ditashi/jsbeautifier-go v0.0.0-20141206144643-2520a8026a9c
github.com/pkg/profile v1.6.0
github.com/slyrz/warc v0.0.0-20150806225202-a50edd19b690
github.com/smacker/go-tree-sitter v0.0.0-20230720070738-0d0a9f78d8f8
github.com/tree-sitter/go-tree-sitter v0.24.0
github.com/tree-sitter-grammars/tree-sitter-javascript v0.23.0
github.com/spf13/pflag v1.0.5
golang.org/x/exp v0.0.0-20230905200255-921286631fa9
)
Expand Down
112 changes: 69 additions & 43 deletions tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import (
"strings"

"github.com/ditashi/jsbeautifier-go/jsbeautifier"
sitter "github.com/smacker/go-tree-sitter"
"github.com/smacker/go-tree-sitter/javascript"
tree_sitter "github.com/tree-sitter/go-tree-sitter"
tree_sitter_javascript "github.com/tree-sitter/tree-sitter-javascript/bindings/go"
)

// ExpressionPlaceholder is the string used to replace any
Expand All @@ -22,7 +22,7 @@ var ExpressionPlaceholder = "EXPR"
// store the raw JavaScript source that is a required argument
// for many tree-sitter functions.
type Node struct {
node *sitter.Node
node *tree_sitter.Node
source []byte
captureName string
}
Expand All @@ -31,7 +31,7 @@ type Node struct {
// node and a byte-slice containing the JavaScript source.
// The source provided should be the complete source code
// and not just the source for the node in question.
func NewNode(n *sitter.Node, source []byte) *Node {
func NewNode(n *tree_sitter.Node, source []byte) *Node {
return &Node{
node: n,
source: source,
Expand All @@ -49,7 +49,7 @@ func (n *Node) Content() string {
if n.node == nil {
return ""
}
return n.node.Content(n.source)
return n.node.Utf8Text(n.source)
}

// Type returns the tree-sitter type string for a Node.
Expand All @@ -76,7 +76,7 @@ func (n *Node) Child(index int) *Node {
if !n.IsValid() {
return nil
}
return NewNode(n.node.Child(index), n.source)
return NewNode(n.node.Child(uint(index)), n.source)
}

// NamedChild returns the 'named' child Node at the provided
Expand All @@ -89,7 +89,7 @@ func (n *Node) NamedChild(index int) *Node {
if !n.IsValid() {
return nil
}
return NewNode(n.node.NamedChild(index), n.source)
return NewNode(n.node.NamedChild(uint(index)), n.source)
}

// ChildCount returns the number of children a node has
Expand Down Expand Up @@ -339,23 +339,43 @@ func (n *Node) IsNamed() bool {
// ForEachChild iterates over a node's children in a depth-first
// manner, calling the supplied function for each node
func (n *Node) ForEachChild(fn func(*Node)) {
it := sitter.NewIterator(n.node, sitter.DFSMode)

it.ForEach(func(sn *sitter.Node) error {
fn(NewNode(sn, n.source))
return nil
})
if !n.IsValid() {
return
}

var walk func(*tree_sitter.Node)
walk = func(node *tree_sitter.Node) {
if node == nil {
return
}
fn(NewNode(node, n.source))
for i := uint(0); i < node.ChildCount(); i++ {
walk(node.Child(i))
}
}
walk(n.node)
}

// ForEachNamedChild iterates over a node's named children in a
// depth-first manner, calling the supplied function for each node
func (n *Node) ForEachNamedChild(fn func(*Node)) {
it := sitter.NewNamedIterator(n.node, sitter.DFSMode)

it.ForEach(func(sn *sitter.Node) error {
fn(NewNode(sn, n.source))
return nil
})
if !n.IsValid() {
return
}

var walk func(*tree_sitter.Node)
walk = func(node *tree_sitter.Node) {
if node == nil {
return
}
if node.IsNamed() {
fn(NewNode(node, n.source))
}
for i := uint(0); i < node.ChildCount(); i++ {
walk(node.Child(i))
}
}
walk(n.node)
}

// Format outputs a nicely formatted version of the source code for the
Expand Down Expand Up @@ -429,32 +449,30 @@ func (n *Node) QueryMulti(query string, fn func(QueryResult)) {
if !n.IsValid() {
return
}
q, err := sitter.NewQuery(
[]byte(query),
javascript.GetLanguage(),
)

language := tree_sitter.NewLanguage(tree_sitter_javascript.Language())
q, err := tree_sitter.NewQuery(language, query)
if err != nil {
return
}
defer q.Close()

qc := sitter.NewQueryCursor()
qc := tree_sitter.NewQueryCursor()
defer qc.Close()

qc.Exec(q, n.node)

for {
match, exists := qc.NextMatch()
if !exists || match == nil {
match, ok := qc.NextMatch()
if !ok {
break
}

match = qc.FilterPredicates(match, n.source)

qr := NewQueryResult()

for _, capture := range match.Captures {
node := NewNode(capture.Node, n.source)
node.captureName = q.CaptureNameForId(capture.Index)
node.captureName = q.CaptureNames()[capture.Index]
qr.Add(node)
}
if len(qr) == 0 {
Expand Down Expand Up @@ -493,54 +511,62 @@ func (n *Node) CaptureName() string {

// dequote removes surround quotes from the provided string
func dequote(in string) string {
return strings.Trim(in, "'\"`")
return strings.Trim(in, "'\"` ")
}

// content returns the source for the provided tree-sitter
// node, checking if the node is nil first.
func content(n *sitter.Node, source []byte) string {
func content(n *tree_sitter.Node, source []byte) string {
if n == nil {
return ""
}
return n.Content(source)
return n.Utf8Text(source)
}

// PrintTree returns a string representation of the syntax tree
// for the provided JavaScript source
func PrintTree(source []byte) string {
parser := sitter.NewParser()
parser.SetLanguage(javascript.GetLanguage())

tree := parser.Parse(nil, source)
parser := tree_sitter.NewParser()
defer parser.Close()

language := tree_sitter.NewLanguage(tree_sitter_javascript.Language())
parser.SetLanguage(language)

tree, err := parser.Parse(source, nil)
if err != nil || tree == nil {
return ""
}
defer tree.Close()

root := tree.RootNode()

return getTree(root, source)
}

// getTree does the actual heavy lifting and recursion for PrintTree
// TODO: provide a way to print the tree as a JSON object?
func getTree(n *sitter.Node, source []byte) string {
func getTree(n *tree_sitter.Node, source []byte) string {

out := &strings.Builder{}

c := sitter.NewTreeCursor(n)
c := tree_sitter.NewTreeCursor(n)
defer c.Close()

// walkies
depth := 0
recurse := true
for {
if recurse && c.CurrentNode().IsNamed() {
fieldName := c.CurrentFieldName()
if recurse && c.Node().IsNamed() {
fieldName := c.FieldName()
if fieldName != "" {
fieldName += ": "
}

contentStr := ""
if c.CurrentNode().ChildCount() == 0 || c.CurrentNode().Type() == "string" {
contentStr = fmt.Sprintf(" (%s)", content(c.CurrentNode(), source))
if c.Node().ChildCount() == 0 || c.Node().Type() == "string" {
contentStr = fmt.Sprintf(" (%s)", content(c.Node(), source))
}
fmt.Fprintf(out, "%s%s%s%s\n", strings.Repeat(" ", depth), fieldName, c.CurrentNode().Type(), contentStr)
fmt.Fprintf(out, "%s%s%s%s\n", strings.Repeat(" ", depth), fieldName, c.Node().Type(), contentStr)
}

// descend into the tree
Expand Down