maxgio92
diff --git a/‎cmd/find/find.go‎
Lines changed: 1 addition & 1 deletion b/‎cmd/find/find.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/find/file.go‎
Lines changed: 96 additions & 0 deletions b/‎pkg/find/file.go‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎pkg/find/find.go‎
Lines changed: 1 addition & 184 deletions b/‎pkg/find/find.go‎
Lines changed: 1 addition & 184 deletions
@@ -45,7 +45,7 @@ func NewCmd() *cobra.Command {
 
 	var filename string
 
-	cmd.Flags().StringVarP(&filename, "name", "n", "", "Base of file name (the path with the leading directories removed) exact pattern.")
+	cmd.Flags().StringVarP(&filename, "name", "n", ".+", "Base of file name (the path with the leading directories removed) exact pattern.")
 
 	// As of now only exact glob pattern expressions are allowed. The expression then translated to an exact-match regular expression.
 	o.FilenameRegexp = fmt.Sprintf("^%s$", filename)
 
@@ -0,0 +1,96 @@
+package find
+
+import (
+	"fmt"
+	"github.com/gocolly/colly"
+	d "github.com/gocolly/colly/debug"
+	"github.com/pkg/errors"
+	"net/url"
+	"path"
+	"regexp"
+	"strings"
+)
+
+// crawlFiles returns a list of file names found from the seed URL, filtered by file name regex.
+//
+//nolint:funlen,cyclop
+func (o *Options) crawlFiles() (*Result, error) {
+	seeds := []*url.URL{}
+
+	err := o.Validate()
+	if err != nil {
+		return nil, err
+	}
+
+	for _, v := range o.SeedURLs {
+		u, _ := url.Parse(v)
+
+		seeds = append(seeds, u)
+	}
+
+	var files, urls []string
+
+	folderPattern := regexp.MustCompile(folderRegex)
+
+	exactFilePattern := regexp.MustCompile(o.FilenameRegexp)
+
+	fileRegex := strings.TrimPrefix(o.FilenameRegexp, "^")
+	filePattern := regexp.MustCompile(fileRegex)
+
+	allowedDomains := getHostnamesFromURLs(seeds)
+
+	// Create the collector settings
+	coOptions := []func(*colly.Collector){
+		colly.AllowedDomains(allowedDomains...),
+		colly.Async(false),
+	}
+
+	if o.Verbose {
+		coOptions = append(coOptions, colly.Debugger(&d.LogDebugger{}))
+	}
+
+	// Create the collector.
+	co := colly.NewCollector(coOptions...)
+
+	// Add the callback to Visit the linked resource, for each HTML element found
+	co.OnHTML(HTMLTagLink, func(e *colly.HTMLElement) {
+		href := e.Attr(HTMLAttrRef)
+
+		folderMatch := folderPattern.FindStringSubmatch(href)
+
+		u, _ := url.JoinPath(e.Request.URL.String(), href)
+
+		// If the URL is not of a folder.
+		if len(folderMatch) == 0 {
+			fileMatch := filePattern.FindStringSubmatch(href)
+
+			// If the URL is of a file.
+			if len(fileMatch) > 0 {
+				fileName := path.Base(href)
+				fileNameMatch := exactFilePattern.FindStringSubmatch(fileName)
+
+				// If the URL matches the file filter regex.
+				if len(fileNameMatch) > 0 {
+					files = append(files, fileName)
+					urls = append(urls, u)
+				}
+			}
+		}
+
+		// Do not traverse the hierarchy in reverse order.
+		if o.Recursive && !(strings.Contains(href, UpDir)) && href != RootDir {
+			//nolint:errcheck
+			co.Visit(e.Request.AbsoluteURL(href))
+		}
+	})
+
+	// Visit each root folder.
+	for _, seedURL := range seeds {
+		err := co.Visit(seedURL.String())
+		if err != nil {
+			return nil, errors.Wrap(err, fmt.Sprintf("error scraping file with URL %seedURLs", seedURL.String()))
+		}
+	}
+
+	return &Result{BaseNames: files, URLs: urls}, nil
+}
@@ -1,14 +1,10 @@
 package find
 
 import (
-	"fmt"
 	"net/url"
-	"path"
 	"regexp"
 	"strings"
 
-	"github.com/gocolly/colly"
-	d "github.com/gocolly/colly/debug"
 	"github.com/pkg/errors"
 )
 
@@ -122,7 +118,7 @@ func (o *Options) Validate() error {
 }
 
 func (o *Options) sanitize() {
-	if strings.HasPrefix(o.FilenameRegexp, "^") && !strings.HasPrefix(o.FilenameRegexp, "^./") {
+	if strings.HasPrefix(o.FilenameRegexp, "^") && !strings.HasPrefix(o.FilenameRegexp, "^./") && !strings.HasPrefix(o.FilenameRegexp, `^(\./)?`) {
 		o.FilenameRegexp = strings.Replace(o.FilenameRegexp, "^", `^(\./)?`, 1)
 	}
 
@@ -148,182 +144,3 @@ func (o *Options) Find() (*Result, error) {
 		return o.crawlFiles()
 	}
 }
-
-// crawlFiles returns a list of file names found from the seed URL, filtered by file name regex.
-//
-//nolint:funlen,cyclop
-func (o *Options) crawlFiles() (*Result, error) {
-	seeds := []*url.URL{}
-
-	err := o.Validate()
-	if err != nil {
-		return nil, err
-	}
-
-	for _, v := range o.SeedURLs {
-		u, _ := url.Parse(v)
-
-		seeds = append(seeds, u)
-	}
-
-	var files, urls []string
-
-	folderPattern := regexp.MustCompile(folderRegex)
-
-	exactFilePattern := regexp.MustCompile(o.FilenameRegexp)
-
-	fileRegex := strings.TrimPrefix(o.FilenameRegexp, "^")
-	filePattern := regexp.MustCompile(fileRegex)
-
-	allowedDomains := getHostnamesFromURLs(seeds)
-
-	// Create the collector settings
-	coOptions := []func(*colly.Collector){
-		colly.AllowedDomains(allowedDomains...),
-		colly.Async(false),
-	}
-
-	if o.Verbose {
-		coOptions = append(coOptions, colly.Debugger(&d.LogDebugger{}))
-	}
-
-	// Create the collector.
-	co := colly.NewCollector(coOptions...)
-
-	// Add the callback to Visit the linked resource, for each HTML element found
-	co.OnHTML(HTMLTagLink, func(e *colly.HTMLElement) {
-		link := e.Attr(HTMLAttrRef)
-
-		// Do not traverse the hierarchy in reverse order.
-		if o.Recursive && !(strings.Contains(link, UpDir)) && link != RootDir {
-			//nolint:errcheck
-			co.Visit(e.Request.AbsoluteURL(link))
-		}
-	})
-
-	// Add the analysis callback to find file URLs, for each Visit call
-	co.OnRequest(func(r *colly.Request) {
-		folderMatch := folderPattern.FindStringSubmatch(r.URL.String())
-
-		// If the URL is not of a folder.
-		if len(folderMatch) == 0 {
-			fileMatch := filePattern.FindStringSubmatch(r.URL.String())
-
-			// If the URL is of a file.
-			if len(fileMatch) > 0 {
-				fileName := path.Base(r.URL.String())
-				fileNameMatch := exactFilePattern.FindStringSubmatch(fileName)
-
-				// If the URL matches the file filter regex.
-				if len(fileNameMatch) > 0 {
-					files = append(files, fileName)
-					urls = append(urls, r.URL.String())
-				}
-			}
-			// Otherwise abort the request.
-			r.Abort()
-		}
-	})
-
-	// Visit each root folder.
-	for _, seedURL := range seeds {
-		err := co.Visit(seedURL.String())
-		if err != nil {
-			return nil, errors.Wrap(err, fmt.Sprintf("error scraping file with URL %seedURLs", seedURL.String()))
-		}
-	}
-
-	return &Result{BaseNames: files, URLs: urls}, nil
-}
-
-// crawlFolders returns a list of folder names found from each seed URL, filtered by folder name regex.
-//
-//nolint:funlen,cyclop
-func (o *Options) crawlFolders() (*Result, error) {
-	seeds := []*url.URL{}
-
-	err := o.Validate()
-	if err != nil {
-		return nil, err
-	}
-
-	for _, v := range o.SeedURLs {
-		u, _ := url.Parse(v)
-
-		seeds = append(seeds, u)
-	}
-
-	var folders, urls []string
-
-	folderPattern := regexp.MustCompile(folderRegex)
-
-	exactFolderPattern := regexp.MustCompile(o.FilenameRegexp)
-
-	allowedDomains := getHostnamesFromURLs(seeds)
-	if len(allowedDomains) < 1 {
-		//nolint:goerr113
-		return nil, fmt.Errorf("invalid seed urls")
-	}
-
-	// Create the collector settings
-	coOptions := []func(*colly.Collector){
-		colly.AllowedDomains(allowedDomains...),
-		colly.Async(false),
-	}
-
-	if o.Verbose {
-		coOptions = append(coOptions, colly.Debugger(&d.LogDebugger{}))
-	}
-
-	// Create the collector.
-	co := colly.NewCollector(coOptions...)
-
-	// Visit each specific folder.
-	co.OnHTML(HTMLTagLink, func(e *colly.HTMLElement) {
-		href := e.Attr(HTMLAttrRef)
-
-		folderMatch := folderPattern.FindStringSubmatch(href)
-
-		// if the URL is of a folder.
-		//nolint:nestif
-		if len(folderMatch) > 0 {
-			// Do not traverse the hierarchy in reverse order.
-			if strings.Contains(href, UpDir) || href == RootDir {
-				return
-			}
-
-			exactFolderMatch := exactFolderPattern.FindStringSubmatch(href)
-			if len(exactFolderMatch) > 0 {
-				hrefAbsURL, _ := url.Parse(e.Request.AbsoluteURL(href))
-
-				if !urlSliceContains(seeds, hrefAbsURL) {
-					folders = append(folders, path.Base(hrefAbsURL.Path))
-					urls = append(urls, hrefAbsURL.String())
-				}
-			}
-			if o.Recursive {
-				//nolint:errcheck
-				co.Visit(e.Request.AbsoluteURL(href))
-			}
-		}
-	})
-
-	co.OnRequest(func(r *colly.Request) {
-		folderMatch := folderPattern.FindStringSubmatch(r.URL.String())
-
-		// if the URL is not of a folder.
-		if len(folderMatch) == 0 {
-			r.Abort()
-		}
-	})
-
-	// Visit each root folder.
-	for _, seedURL := range seeds {
-		err := co.Visit(seedURL.String())
-		if err != nil {
-			return nil, errors.Wrap(err, fmt.Sprintf("error scraping folder with URL %seedURLs", seedURL.String()))
-		}
-	}
-
-	return &Result{BaseNames: folders, URLs: urls}, nil
-}