From f29cf36d00e5ab6b68b54fc6c1ce303fe31c7ef5 Mon Sep 17 00:00:00 2001 From: Carlos Zhao Date: Wed, 1 Mar 2023 17:03:59 -0500 Subject: [PATCH 1/5] Add a new configurable argument read_timeout for unblock normal user access when bot request takes too long. --- lib/prerender_rails.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/prerender_rails.rb b/lib/prerender_rails.rb index 98c1ca9..88f6d21 100644 --- a/lib/prerender_rails.rb +++ b/lib/prerender_rails.rb @@ -183,6 +183,7 @@ def get_prerendered_page_response(env) req.basic_auth(ENV['PRERENDER_USERNAME'], ENV['PRERENDER_PASSWORD']) if @options[:basic_auth] http = Net::HTTP.new(url.host, url.port) http.use_ssl = true if url.scheme == 'https' + http.read_timeout = @options[:read_timeout] if @options[:read_timeout] response = http.request(req) if response['Content-Encoding'] == 'gzip' response.body = ActiveSupport::Gzip.decompress(response.body) @@ -203,7 +204,13 @@ def get_prerendered_page_response(env) hop_by_hop_headers.each { |h| response.delete(h) } response - rescue + rescue Timeout::Error => e + if @options[:read_timeout] + error_type = '503 Service Unavailable' + error_msg = "ERROR: timed out while trying to connect. #{e.message}" + raise Timeout::Error.new(error_type), error_msg + end + rescue StandardError nil end end From 5538390d4c612ce2f744a6518db53977b0c7518f Mon Sep 17 00:00:00 2001 From: Carlos Zhao Date: Wed, 1 Mar 2023 17:04:23 -0500 Subject: [PATCH 2/5] Add test for the new argument read_timeout. --- test/lib/prerender_rails.rb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/lib/prerender_rails.rb b/test/lib/prerender_rails.rb index 719b61e..d1558c0 100644 --- a/test/lib/prerender_rails.rb +++ b/test/lib/prerender_rails.rb @@ -102,6 +102,18 @@ end + it 'should return a timeout error to the crawler if the request takes longer than the read_timeout argument' do + request = Rack::MockRequest.env_for '/', 'HTTP_USER_AGENT' => bot + stub_request(:get, @prerender.build_api_url(request)) + .with(headers: { 'User-Agent': bot }) + .to_return(body: '503 Service Unavailable', status: 503) + response = Rack::Prerender.new(@app, read_timeout: 22).call(request) + + assert_equal response[0], 503 + assert_equal response[2], ['503 Service Unavailable'] + end + + it "should continue to app routes if the url is part of the regex specific blacklist" do request = Rack::MockRequest.env_for "/search/things/123/page", "HTTP_USER_AGENT" => bot response = Rack::Prerender.new(@app, blacklist: ['^/search', '/help']).call(request) From 1b3a8cb80b3f83b9d0d2d476d1a5a5756e941e98 Mon Sep 17 00:00:00 2001 From: Carlos Zhao Date: Fri, 3 Mar 2023 14:39:41 -0500 Subject: [PATCH 3/5] Update test. --- test/lib/prerender_rails.rb | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/test/lib/prerender_rails.rb b/test/lib/prerender_rails.rb index d1558c0..3d1aff7 100644 --- a/test/lib/prerender_rails.rb +++ b/test/lib/prerender_rails.rb @@ -106,11 +106,13 @@ request = Rack::MockRequest.env_for '/', 'HTTP_USER_AGENT' => bot stub_request(:get, @prerender.build_api_url(request)) .with(headers: { 'User-Agent': bot }) - .to_return(body: '503 Service Unavailable', status: 503) - response = Rack::Prerender.new(@app, read_timeout: 22).call(request) + .to_raise(Timeout::Error) - assert_equal response[0], 503 - assert_equal response[2], ['503 Service Unavailable'] + err = assert_raises Timeout::Error do + Rack::Prerender.new(@app, read_timeout: 22).call(request) + end + + assert_equal err.message, '503 Service Unavailable' end From 91518b76b8a3c65f60ad218014fce52aea70818d Mon Sep 17 00:00:00 2001 From: Carlos Zhao Date: Tue, 21 Mar 2023 23:38:47 -0400 Subject: [PATCH 4/5] Remove Rails error message from the timeout error message. --- lib/prerender_rails.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/prerender_rails.rb b/lib/prerender_rails.rb index 88f6d21..05740f0 100644 --- a/lib/prerender_rails.rb +++ b/lib/prerender_rails.rb @@ -204,10 +204,10 @@ def get_prerendered_page_response(env) hop_by_hop_headers.each { |h| response.delete(h) } response - rescue Timeout::Error => e + rescue Timeout::Error if @options[:read_timeout] error_type = '503 Service Unavailable' - error_msg = "ERROR: timed out while trying to connect. #{e.message}" + error_msg = 'ERROR: timed out while trying to connect.' raise Timeout::Error.new(error_type), error_msg end rescue StandardError From 8a67611e77e6b83b67ae7a15fe70f7bec329a450 Mon Sep 17 00:00:00 2001 From: Carlos Zhao Date: Thu, 23 Mar 2023 17:17:02 -0400 Subject: [PATCH 5/5] Move the read timeout error handler to the call method. --- lib/prerender_rails.rb | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/lib/prerender_rails.rb b/lib/prerender_rails.rb index 05740f0..a664579 100644 --- a/lib/prerender_rails.rb +++ b/lib/prerender_rails.rb @@ -111,12 +111,19 @@ def call(env) return cached_response.finish end - prerendered_response = get_prerendered_page_response(env) - - if prerendered_response - response = build_rack_response_from_prerender(prerendered_response) - after_render(env, prerendered_response) - return response.finish + begin + prerendered_response = get_prerendered_page_response(env) + + if prerendered_response + response = build_rack_response_from_prerender(prerendered_response) + after_render(env, prerendered_response) + return response.finish + end + rescue Timeout::Error + if @options[:read_timeout] + response = Rack::Response.new(nil, 503, nil) + return response.finish + end end end @@ -204,13 +211,7 @@ def get_prerendered_page_response(env) hop_by_hop_headers.each { |h| response.delete(h) } response - rescue Timeout::Error - if @options[:read_timeout] - error_type = '503 Service Unavailable' - error_msg = 'ERROR: timed out while trying to connect.' - raise Timeout::Error.new(error_type), error_msg - end - rescue StandardError + rescue nil end end