From a413a1e015a62fda2575279c61404031b8865d67 Mon Sep 17 00:00:00 2001 From: Constanza <constanza@discourse.org> Date: Wed, 6 Oct 2021 12:24:12 -0400 Subject: [PATCH] DEV: process image uploads in the Zendesk API import script (#14524) --- script/import_scripts/zendesk_api.rb | 63 ++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/script/import_scripts/zendesk_api.rb b/script/import_scripts/zendesk_api.rb index 7d16be87e0d..4b5a2b56801 100644 --- a/script/import_scripts/zendesk_api.rb +++ b/script/import_scripts/zendesk_api.rb @@ -4,6 +4,7 @@ # # This one uses their API. +require 'open-uri' require 'reverse_markdown' require_relative 'base' require_relative 'base/generic_database' @@ -13,6 +14,24 @@ require_relative 'base/generic_database' class ImportScripts::ZendeskApi < ImportScripts::Base BATCH_SIZE = 1000 + HTTP_ERRORS = [ + EOFError, + Errno::ECONNRESET, + Errno::EINVAL, + Net::HTTPBadResponse, + Net::HTTPHeaderSyntaxError, + Net::ProtocolError, + Timeout::Error, + OpenURI::HTTPError, + OpenSSL::SSL::SSLError + ] + + MAX_RETRIES = 5 + + IMAGE_DOWNLOAD_PATH = "replace-me" + + SUBDOMAIN = "replace-me" + def initialize(source_url, path, auth_email, auth_token) super() @@ -219,7 +238,7 @@ class ImportScripts::ZendeskApi < ImportScripts::Base { id: import_topic_id(row['id']), title: row['title'].present? ? row['title'].strip[0...255] : "Topic title missing", - raw: normalize_raw(row['raw']), + raw: normalize_raw(row['raw'], user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id), category: category_id_from_imported_category_id(row['category_id']), user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id, created_at: row['created_at'], @@ -257,7 +276,7 @@ class ImportScripts::ZendeskApi < ImportScripts::Base { id: row['id'], - raw: normalize_raw(row['raw']), + raw: normalize_raw(row['raw'], user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id), user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id, topic_id: topic[:topic_id], created_at: row['created_at'], @@ -302,9 +321,47 @@ class ImportScripts::ZendeskApi < ImportScripts::Base end end - def normalize_raw(raw) + def normalize_raw(raw, user_id) raw = raw.gsub('\n', '') raw = ReverseMarkdown.convert(raw) + + # Process images, after the ReverseMarkdown they look like + #  + raw.gsub!(/!\[\]\((https:\/\/#{SUBDOMAIN}\.zendesk\.com\/hc\/user_images\/([^).]+\.[^)]+))\)/i) do + image_url = $1 + filename = $2 + attempts = 0 + + begin + open("#{$1}") do |image| + # IMAGE_DOWNLOAD_PATH is whatever image, it will be replaced with the downloaded image + File.open(IMAGE_DOWNLOAD_PATH, "wb") do |file| + file.write(image.read) + end + end + rescue *HTTP_ERRORS => e + if attempts < MAX_RETRIES + attempts += 1 + sleep(2) + retry + else + puts "Error downloading image" + end + next + end + + upl_obj = create_upload(user_id, IMAGE_DOWNLOAD_PATH, filename) + + if upl_obj&.persisted? + html = html_for_upload(upl_obj, filename) + html + else + puts "Error creating image upload" + "" + exit + end + end + raw end