From 43b2b1f8144f9ad2bf59e892081bc98e058fd04a Mon Sep 17 00:00:00 2001 From: dxuian Date: Fri, 4 Oct 2024 23:12:28 +0530 Subject: [PATCH 1/2] added a lite option to remove content --- seek/seek.rb | 49 +++++++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/seek/seek.rb b/seek/seek.rb index d34701b4a..c64d8ad84 100755 --- a/seek/seek.rb +++ b/seek/seek.rb @@ -57,7 +57,7 @@ class Parser # Custom OptionParser ScriptOptions class ScriptOptions - attr_accessor :keyword, :location, :range, :worktype, :delay, :time, :print_total + attr_accessor :keyword, :location, :range, :worktype, :delay, :time, :print_total, :lite def define_options(parser) parser.banner = "Usage: #{Paint['seek.rb [options]', :red, :white]}" @@ -72,6 +72,7 @@ def define_options(parser) delay_execution_option(parser) execute_at_time_option(parser) print_total_number_option(parser) + lite_option(parser) parser.separator "" parser.separator "Common options:" @@ -157,11 +158,22 @@ def print_total_number_option(parser) end end end + + def lite_option(parser) + parser.on("--lite [BOOLEAN]", "If BOOLEAN is true or 'yes', do not include the content column in the CSV") do |value| + self.lite = case value + when TrueClass, "yes", "Yes", "YES" + true + when FalseClass, NilClass, "no", "No", "NO" + false + else + value.to_s.casecmp("true").zero? || value.to_s.casecmp("yes").zero? + end + end + end + end - # - # Return a structure describing the options. - # def parse(args) # The options specified on the command line will be collected in # *options*. @@ -209,6 +221,10 @@ def parse(args) print "Only print the total number of jobs found? (yes/no): " options.print_total = $stdin.gets.chomp.casecmp("yes").zero? end +if options.lite.nil? + print "Discard the content column in the results? (yes/no): " + options.lite = $stdin.gets.chomp.casecmp("yes").zero? +end agent = Mechanize.new agent.user_agent_alias = "Windows Chrome" @@ -225,19 +241,11 @@ def parse(args) ) results = [] results << - [ - "Title", - "URL", - "Advertiser", - "Location", - "Listing Date", - "Salary", - "Classification", - "Sub Classification", - # "Work Type", - "Short Description", - "Content" - ] + if options.lite + ["Title", "URL", "Advertiser", "Location", "Listing Date", "Salary", "Classification", "Sub Classification", "Short Description"] + else + ["Title", "URL", "Advertiser", "Location", "Listing Date", "Salary", "Classification", "Sub Classification", "Short Description", "Content"] + end if options.print_total # Using the CSS selector @@ -276,7 +284,12 @@ def parse(args) # listing_date = ad.at('dd[data-automation="job-detail-date"]').text if listing_date.empty? get_script = ad.at('script[data-automation="server-state"]').text salary = get_script.gsub(/(.*"jobSalary":")(.*?)(".*)/m, '\2') if salary.empty? && get_script.include?("jobSalary") - content = get_script.gsub(/(.*"content\(\{\\"platform\\":\\"WEB\\"\}\)":")(.*?)(".*)/m, '\2') + content = if !options.lite + get_script.gsub(/(.*"content\(\{\\"platform\\":\\"WEB\\"\}\)":")(.*?)(".*)/m, '\2') + else + nil + end + results << [ title, From ad99f4bb7fc7ca550c03762e23f4f260db225a62 Mon Sep 17 00:00:00 2001 From: dxuian Date: Sun, 6 Oct 2024 18:16:16 +0530 Subject: [PATCH 2/2] removed content and enhanced old code --- seek/seek.rb | 45 ++++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/seek/seek.rb b/seek/seek.rb index c64d8ad84..364e3c0bf 100755 --- a/seek/seek.rb +++ b/seek/seek.rb @@ -239,13 +239,9 @@ def parse(args) ["worktype", options.worktype] ] ) -results = [] -results << - if options.lite - ["Title", "URL", "Advertiser", "Location", "Listing Date", "Salary", "Classification", "Sub Classification", "Short Description"] - else - ["Title", "URL", "Advertiser", "Location", "Listing Date", "Salary", "Classification", "Sub Classification", "Short Description", "Content"] - end +results = [ + ["Title", "URL", "Advertiser", "Location", "Listing Date", "Salary", "Classification", "Sub Classification", "Short Description"] + (options.lite ? [] : ["Content"]) +] if options.print_total # Using the CSS selector @@ -284,26 +280,21 @@ def parse(args) # listing_date = ad.at('dd[data-automation="job-detail-date"]').text if listing_date.empty? get_script = ad.at('script[data-automation="server-state"]').text salary = get_script.gsub(/(.*"jobSalary":")(.*?)(".*)/m, '\2') if salary.empty? && get_script.include?("jobSalary") - content = if !options.lite - get_script.gsub(/(.*"content\(\{\\"platform\\":\\"WEB\\"\}\)":")(.*?)(".*)/m, '\2') - else - nil - end - - results << - [ - title, - url, - advertiser, - location, - listing_date, - salary, - classification, - sub_classification, - # work_type, - short_description, - content - ] + content = options.lite ? nil : get_script.gsub(/(.*"content\(\{\\"platform\\":\\"WEB\\"\}\)":")(.*?)(".*)/m, '\2') + resultsrow = [ + title, + url, + advertiser, + location, + listing_date, + salary, + classification, + sub_classification, + # work_type, + short_description, + ] + resultsrow << content unless options.lite + results << resultsrow end if (link = page.link_with(text: "Next")) # As long as there is still a next page link