Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update main.go #914

Open
wants to merge 2 commits into
base: dev
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 43 additions & 44 deletions cmd/katana/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,72 +28,45 @@ var (

func main() {
flagSet, err := readFlags()
if err != nil {
gologger.Fatal().Msgf("Could not read flags: %s\n", err)
}
handleError("Could not read flags", err)

if options.HealthCheck {
gologger.Print().Msgf("%s\n", runner.DoHealthCheck(options, flagSet))
os.Exit(0)
}

katanaRunner, err := runner.New(options)
if err != nil || katanaRunner == nil {
if options.Version {
return
}
gologger.Fatal().Msgf("could not create runner: %s\n", err)
}
handleError("could not create runner", err)
defer katanaRunner.Close()

// close handler
resumeFilename := defaultResumeFilename()
go func() {
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
for range c {
gologger.DefaultLogger.Info().Msg("- Ctrl+C pressed in Terminal")
katanaRunner.Close()
setupCloseHandler(katanaRunner, resumeFilename)

gologger.Info().Msgf("Creating resume file: %s\n", resumeFilename)
err := katanaRunner.SaveState(resumeFilename)
if err != nil {
gologger.Error().Msgf("Couldn't create resume file: %s\n", err)
}

os.Exit(0)
}
}()
err = katanaRunner.ExecuteCrawling()
handleError("could not execute crawling", err)

if err := katanaRunner.ExecuteCrawling(); err != nil {
gologger.Fatal().Msgf("could not execute crawling: %s", err)
}

// on successful execution:

// deduplicate the lines in each file in the store-field-dir
//use options.StoreFieldDir once https://github.com/projectdiscovery/katana/pull/877 is merged
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should either use options.StoreFieldDir or add back this comment

// Deduplicate lines in each file in the store-field-dir
storeFieldDir := "katana_field"
_ = folderutil.DedupeLinesInFiles(storeFieldDir)

// remove the resume file in case it exists
// Remove the resume file if it exists
if fileutil.FileExists(resumeFilename) {
os.Remove(resumeFilename)
}

}

func readFlags() (*goflags.FlagSet, error) {
flagSet := goflags.NewFlagSet()
flagSet.SetDescription(`Katana is a fast crawler focused on execution in automation
pipelines offering both headless and non-headless crawling.`)
flagSet.SetDescription(`Katana is a fast crawler focused on execution in automation pipelines offering both headless and non-headless crawling.`)

// Input group
flagSet.CreateGroup("input", "Input",
flagSet.StringSliceVarP(&options.URLs, "list", "u", nil, "target url / list to crawl", goflags.FileCommaSeparatedStringSliceOptions),
flagSet.StringVar(&options.Resume, "resume", "", "resume scan using resume.cfg"),
flagSet.StringSliceVarP(&options.Exclude, "exclude", "e", nil, "exclude host matching specified filter ('cdn', 'private-ips', cidr, ip, regex)", goflags.CommaSeparatedStringSliceOptions),
)

// Configuration group
flagSet.CreateGroup("config", "Configuration",
flagSet.StringSliceVarP(&options.Resolvers, "resolvers", "r", nil, "list of custom resolver (file or comma separated)", goflags.FileCommaSeparatedStringSliceOptions),
flagSet.IntVarP(&options.MaxDepth, "depth", "d", 3, "maximum depth to crawl"),
Expand All @@ -117,11 +90,13 @@ pipelines offering both headless and non-headless crawling.`)
flagSet.BoolVarP(&options.DisableRedirects, "disable-redirects", "dr", false, "disable following redirects (default false)"),
)

// Debug group
flagSet.CreateGroup("debug", "Debug",
flagSet.BoolVarP(&options.HealthCheck, "hc", "health-check", false, "run diagnostic check up"),
flagSet.StringVarP(&options.ErrorLogFile, "error-log", "elog", "", "file to write sent requests error log"),
)

// Headless group
flagSet.CreateGroup("headless", "Headless",
flagSet.BoolVarP(&options.Headless, "headless", "hl", false, "enable headless hybrid crawling (experimental)"),
flagSet.BoolVarP(&options.UseInstalledChrome, "system-chrome", "sc", false, "use local installed chrome browser instead of katana installed"),
Expand All @@ -135,6 +110,7 @@ pipelines offering both headless and non-headless crawling.`)
flagSet.BoolVarP(&options.XhrExtraction, "xhr-extraction", "xhr", false, "extract xhr request url,method in jsonl output"),
)

// Scope group
flagSet.CreateGroup("scope", "Scope",
flagSet.StringSliceVarP(&options.Scope, "crawl-scope", "cs", nil, "in scope url regex to be followed by crawler", goflags.FileCommaSeparatedStringSliceOptions),
flagSet.StringSliceVarP(&options.OutOfScope, "crawl-out-scope", "cos", nil, "out of scope url regex to be excluded by crawler", goflags.FileCommaSeparatedStringSliceOptions),
Expand All @@ -155,6 +131,7 @@ pipelines offering both headless and non-headless crawling.`)
flagSet.StringVarP(&options.OutputFilterCondition, "filter-condition", "fdc", "", "filter response with dsl based condition"),
)

// Rate-Limit group
flagSet.CreateGroup("ratelimit", "Rate-Limit",
flagSet.IntVarP(&options.Concurrency, "concurrency", "c", 10, "number of concurrent fetchers to use"),
flagSet.IntVarP(&options.Parallelism, "parallelism", "p", 10, "number of concurrent inputs to process"),
Expand All @@ -163,11 +140,13 @@ pipelines offering both headless and non-headless crawling.`)
flagSet.IntVarP(&options.RateLimitMinute, "rate-limit-minute", "rlm", 0, "maximum number of requests to send per minute"),
)

// Update group
flagSet.CreateGroup("update", "Update",
flagSet.CallbackVarP(runner.GetUpdateCallback(), "update", "up", "update katana to latest version"),
flagSet.BoolVarP(&options.DisableUpdateCheck, "disable-update-check", "duc", false, "disable automatic katana update check"),
)

// Output group
flagSet.CreateGroup("output", "Output",
flagSet.StringVarP(&options.OutputFile, "output", "o", "", "file to write output to"),
flagSet.BoolVarP(&options.StoreResponse, "store-response", "sr", false, "store http requests/responses"),
Expand Down Expand Up @@ -207,23 +186,43 @@ func init() {

func defaultResumeFilename() string {
homedir, err := os.UserHomeDir()
if err != nil {
gologger.Fatal().Msgf("could not get home directory: %s", err)
}
handleError("could not get home directory", err)
configDir := filepath.Join(homedir, ".config", "katana")
return filepath.Join(configDir, fmt.Sprintf("resume-%s.cfg", xid.New().String()))
}

// cleanupOldResumeFiles cleans up resume files older than 10 days.
func setupCloseHandler(runner *runner.Runner, resumeFilename string) {
go func() {
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt, syscall.SIGTERM)
for range c {
gologger.DefaultLogger.Info().Msg("- Ctrl+C pressed in Terminal")
runner.Close()

gologger.Info().Msgf("Creating resume file: %s\n", resumeFilename)
err := runner.SaveState(resumeFilename)
if err != nil {
gologger.Error().Msgf("Couldn't create resume file: %s\n", err)
}

os.Exit(0)
}
}()
}

func cleanupOldResumeFiles() {
homedir, err := os.UserHomeDir()
if err != nil {
gologger.Fatal().Msgf("could not get home directory: %s", err)
}
handleError("could not get home directory", err)
root := filepath.Join(homedir, ".config", "katana")
filter := fileutil.FileFilters{
OlderThan: 24 * time.Hour * 10, // cleanup on the 10th day
Prefix: "resume-",
}
_ = fileutil.DeleteFilesOlderThan(root, filter)
}

func handleError(message string, err error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is going to fail, we should be more explicit about it. Something like failWithError or similar name would be great

if err != nil {
gologger.Fatal().Msgf("%s: %s\n", message, err)
}
}
Loading