diff --git a/lib/onebox/helpers.rb b/lib/onebox/helpers.rb index 3d2ee7013a0..ab807a875c3 100644 --- a/lib/onebox/helpers.rb +++ b/lib/onebox/helpers.rb @@ -13,15 +13,20 @@ module Onebox html.gsub(/<[^>]+>/, " ").gsub(/\n/, "") end + # Fetches the HTML response body for a URL. + # + # Note that the size of the response body is capped at `Onebox.options.max_download_kb`. When the limit has been reached, + # this method will return the response body that has been downloaded up to the limit. def self.fetch_html_doc(url, headers = nil, body_cacher = nil) response = ( begin - fetch_response(url, headers: headers, body_cacher: body_cacher) + fetch_response(url, headers:, body_cacher:, raise_error_when_response_too_large: false) rescue StandardError nil end ) + doc = Nokogiri.HTML(response) uri = Addressable::URI.parse(url) @@ -45,7 +50,12 @@ module Onebox response = ( begin - fetch_response(uri.to_s, headers: headers, body_cacher: body_cacher) + fetch_response( + uri.to_s, + headers:, + body_cacher:, + raise_error_when_response_too_large: false, + ) rescue StandardError nil end @@ -63,7 +73,8 @@ module Onebox redirect_limit: 5, domain: nil, headers: nil, - body_cacher: nil + body_cacher: nil, + raise_error_when_response_too_large: true ) redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit @@ -125,7 +136,11 @@ module Onebox response.read_body do |chunk| result.write(chunk) - raise DownloadTooLarge.new if result.size > size_bytes + + if result.size > size_bytes + raise_error_when_response_too_large ? raise(DownloadTooLarge.new) : break + end + raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout end diff --git a/spec/lib/onebox/helpers_spec.rb b/spec/lib/onebox/helpers_spec.rb index 195b24ec5f3..139d422aec4 100644 --- a/spec/lib/onebox/helpers_spec.rb +++ b/spec/lib/onebox/helpers_spec.rb @@ -16,6 +16,7 @@ RSpec.describe Onebox::Helpers do around do |example| previous_options = Onebox.options.to_h Onebox.options = { max_download_kb: 1 } + stub_request(:get, "http://example.com/large-file").to_return( status: 200, body: onebox_response("slides"), @@ -32,6 +33,15 @@ RSpec.describe Onebox::Helpers do ) end + it "returns the body of the response when size of response body exceeds the limit and `raise_error_when_response_too_large` has been set to `false`" do + expect( + described_class.fetch_response( + "http://example.com/large-file", + raise_error_when_response_too_large: false, + ), + ).to eq(onebox_response("slides")) + end + it "raises an exception when private url requested" do FinalDestination::TestHelper.stub_to_fail do expect { described_class.fetch_response("http://example.com/large-file") }.to raise_error( @@ -49,6 +59,22 @@ RSpec.describe Onebox::Helpers do expect(described_class.fetch_html_doc(uri).to_s).to match("success") end + it "does not raise an error when response body exceeds Onebox's `max_download_kb` limit" do + previous_options = Onebox.options.to_h + Onebox.options = previous_options.merge(max_download_kb: 1) + + stub_request(:get, "http://example.com/large-file").to_return( + status: 200, + body: onebox_response("slides"), + ) + + expect(described_class.fetch_html_doc("http://example.com/large-file").to_s).to include( + "ECMAScript 2015 by David Leonard", + ) + ensure + Onebox.options = previous_options + end + context "with canonical link" do it "follows canonical link" do uri = "https://www.example.com"