mirror of
https://github.com/discourse/discourse.git
synced 2025-04-17 13:09:02 +08:00
FIX: By default, don't abort Google Groups crawling on error
This commit is contained in:
parent
a4f5f5d0bb
commit
ed1e5ef6cc
@ -100,7 +100,7 @@ def crawl_topic(url)
|
|||||||
@scraped_topic_urls << url
|
@scraped_topic_urls << url
|
||||||
rescue
|
rescue
|
||||||
puts "Failed to scrape topic at #{url}".red
|
puts "Failed to scrape topic at #{url}".red
|
||||||
raise
|
raise if @abort_on_error
|
||||||
end
|
end
|
||||||
|
|
||||||
def crawl_message(url, might_be_deleted)
|
def crawl_message(url, might_be_deleted)
|
||||||
@ -122,11 +122,15 @@ def crawl_message(url, might_be_deleted)
|
|||||||
|
|
||||||
File.write(filename, content)
|
File.write(filename, content)
|
||||||
rescue Selenium::WebDriver::Error::NoSuchElementError
|
rescue Selenium::WebDriver::Error::NoSuchElementError
|
||||||
raise unless might_be_deleted
|
if might_be_deleted
|
||||||
puts "Message might be deleted. Skipping #{url}"
|
puts "Message might be deleted. Skipping #{url}"
|
||||||
|
else
|
||||||
|
puts "Failed to scrape message at #{url}".red
|
||||||
|
raise if @abort_on_error
|
||||||
|
end
|
||||||
rescue
|
rescue
|
||||||
puts "Failed to scrape message at #{url}".red
|
puts "Failed to scrape message at #{url}".red
|
||||||
raise
|
raise if @abort_on_error
|
||||||
end
|
end
|
||||||
|
|
||||||
def login
|
def login
|
||||||
@ -195,7 +199,10 @@ end
|
|||||||
def parse_arguments
|
def parse_arguments
|
||||||
puts ""
|
puts ""
|
||||||
|
|
||||||
|
# default values
|
||||||
@force_import = false
|
@force_import = false
|
||||||
|
@abort_on_error = false
|
||||||
|
@cookies = DEFAULT_COOKIES_TXT if File.exist?(DEFAULT_COOKIES_TXT)
|
||||||
|
|
||||||
parser = OptionParser.new do |opts|
|
parser = OptionParser.new do |opts|
|
||||||
opts.banner = "Usage: google_groups.rb [options]"
|
opts.banner = "Usage: google_groups.rb [options]"
|
||||||
@ -204,6 +211,7 @@ def parse_arguments
|
|||||||
opts.on("-c", "--cookies PATH", "path to cookies.txt") { |v| @cookies = v }
|
opts.on("-c", "--cookies PATH", "path to cookies.txt") { |v| @cookies = v }
|
||||||
opts.on("--path PATH", "output path for emails") { |v| @path = v }
|
opts.on("--path PATH", "output path for emails") { |v| @path = v }
|
||||||
opts.on("-f", "--force", "force import when user isn't allowed to see email addresses") { @force_import = true }
|
opts.on("-f", "--force", "force import when user isn't allowed to see email addresses") { @force_import = true }
|
||||||
|
opts.on("-a", "--abort-on-error", "abort crawl on error instead of skipping message") { @abort_on_error = true }
|
||||||
opts.on("-h", "--help") do
|
opts.on("-h", "--help") do
|
||||||
puts opts
|
puts opts
|
||||||
exit
|
exit
|
||||||
@ -216,15 +224,13 @@ def parse_arguments
|
|||||||
exit_with_error(e.message, "", parser)
|
exit_with_error(e.message, "", parser)
|
||||||
end
|
end
|
||||||
|
|
||||||
@cookies = DEFAULT_COOKIES_TXT if @cookies.nil? && File.exist?(DEFAULT_COOKIES_TXT)
|
|
||||||
@path = File.join(DEFAULT_OUTPUT_PATH, @groupname) if @path.nil?
|
|
||||||
|
|
||||||
mandatory = [:groupname, :cookies]
|
mandatory = [:groupname, :cookies]
|
||||||
missing = mandatory.select { |name| instance_variable_get("@#{name}").nil? }
|
missing = mandatory.select { |name| instance_variable_get("@#{name}").nil? }
|
||||||
|
|
||||||
exit_with_error("Missing arguments: #{missing.join(', ')}".red.bold, "", parser, "") if missing.any?
|
exit_with_error("Missing arguments: #{missing.join(', ')}".red.bold, "", parser, "") if missing.any?
|
||||||
exit_with_error("cookies.txt not found at #{@cookies}".red.bold, "") if !File.exist?(@cookies)
|
exit_with_error("cookies.txt not found at #{@cookies}".red.bold, "") if !File.exist?(@cookies)
|
||||||
|
|
||||||
|
@path = File.join(DEFAULT_OUTPUT_PATH, @groupname) if @path.nil?
|
||||||
FileUtils.mkpath(@path)
|
FileUtils.mkpath(@path)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user