diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..91fdd16 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,5 @@ +0.5.0 +===== + +* Follow redirects to a configurable depth + diff --git a/README.md b/README.md index ae54999..73d8f5c 100644 --- a/README.md +++ b/README.md @@ -8,15 +8,16 @@ We want to allow people to keep embedding images in comments/issues/READMEs/goog Using a shared key, proxy URLs are encrypted with [hmac](http://en.wikipedia.org/wiki/HMAC) so we can bust caches/ban/rate limit if needed. -Camo currently runs on node version 0.4.8 in production at GitHub. +Camo currently runs on node version 0.4.10 at GitHub. Features -------- -* Proxy remote images with a content-type of `image/*` -* Proxy images under 5 MB * Proxy google charts -* 404s for anything other than a 200 or 304 HTTP response +* Proxy images under 5 MB +* Follow redirects to a configurable depth +* Proxy remote images with a content-type of `image/*` +* 404s for anything other than a 200, 301, 302 or 304 HTTP response * Disallows proxying to private IP ranges At GitHub we render markdown and replace all of the `src` attributes on the `img` tags with the appropriate URL to hit the proxies. There's example code for creating URLs in [the tests](https://github.com/atmos/camo/blob/master/test/proxy_test.rb). diff --git a/server.coffee b/server.coffee index 4379ab7..73306f7 100644 --- a/server.coffee +++ b/server.coffee @@ -4,10 +4,11 @@ Http = require 'http' Crypto = require 'crypto' QueryString = require 'querystring' -port = process.env.PORT || 8081 -version = "0.3.0" +port = parseInt process.env.PORT || 8081 +version = "0.5.0" excluded = process.env.CAMO_HOST_EXCLUSIONS || '*.example.org' shared_key = process.env.CAMO_KEY || '0x24FEEDFACEDEADBEEFCAFE' +max_redirects = process.env.CAMO_MAX_REDIRECTS || 4 camo_hostname = process.env.CAMO_HOSTNAME || "unknown" logging_enabled = process.env.CAMO_LOGGING_ENABLED || "disabled" pidfile = process.env.PIDFILE || 'tmp/camo.pid' @@ -35,6 +36,86 @@ finish = (resp, str) -> current_connections = 0 if current_connections < 1 resp.connection && resp.end str +process_url = (url, transferred_headers, resp, remaining_redirects) -> + if url.host? && !url.host.match(RESTRICTED_IPS) + if url.host.match(EXCLUDED_HOSTS) + return four_oh_four(resp, "Hitting excluded hostnames") + + src = Http.createClient url.port || 80, url.hostname + + src.on 'error', (error) -> + four_oh_four(resp, "Client Request error #{error.stack}") + + query_path = url.pathname + if url.query? + query_path += "?#{url.query}" + + transferred_headers.host = url.host + + log transferred_headers + + srcReq = src.request 'GET', query_path, transferred_headers + + srcReq.on 'response', (srcResp) -> + is_finished = true + + log srcResp.headers + + content_length = srcResp.headers['content-length'] + + if content_length > 5242880 + four_oh_four(resp, "Content-Length exceeded") + else + newHeaders = + 'expires' : srcResp.headers['expires'] + 'content-type' : srcResp.headers['content-type'] + 'cache-control' : srcResp.headers['cache-control'] + 'content-length' : content_length + 'Camo-Host' : camo_hostname + 'X-Content-Type-Options' : 'nosniff' + + if srcResp.headers['content-encoding'] + newHeaders['content-encoding'] = srcResp.headers['content-encoding'] + + srcResp.on 'end', -> + if is_finished + finish resp + srcResp.on 'error', -> + if is_finished + finish resp + switch srcResp.statusCode + when 200 + if newHeaders['content-type'] && newHeaders['content-type'].slice(0, 5) != 'image' + four_oh_four(resp, "Non-Image content-type returned") + + log newHeaders + + resp.writeHead srcResp.statusCode, newHeaders + srcResp.on 'data', (chunk) -> + resp.write chunk + when 301, 302 + if remaining_redirects <= 0 + four_oh_four(resp, "Exceeded max depth") + else + is_finished = false + newUrl = Url.parse srcResp.headers['location'] + unless newUrl.host? and newUrl.hostname? + newUrl.host = newUrl.hostname = url.hostname + newUrl.protocol = url.protocol + + console.log newUrl + process_url newUrl, transferred_headers, resp, remaining_redirects - 1 + when 304 + resp.writeHead srcResp.statusCode, newHeaders + else + four_oh_four(resp, "Responded with " + srcResp.statusCode + ":" + srcResp.headers) + srcReq.on 'error', -> + finish resp + + srcReq.end() + else + four_oh_four(resp, "No host found " + url.host) + # decode a string of two char hex digits hexdec = (str) -> if str and str.length > 0 and str.length % 2 == 0 and not str.match(/[^0-9a-f]/) @@ -92,71 +173,7 @@ server = Http.createServer (req, resp) -> if hmac_digest == query_digest url = Url.parse dest_url - if url.host? && !url.host.match(RESTRICTED_IPS) - if url.host.match(EXCLUDED_HOSTS) - return four_oh_four(resp, "Hitting excluded hostnames") - - src = Http.createClient url.port || 80, url.hostname - - src.on 'error', (error) -> - four_oh_four(resp, "Client Request error #{error.stack}") - - query_path = url.pathname - if url.query? - query_path += "?#{url.query}" - - transferred_headers.host = url.host - - log transferred_headers - - srcReq = src.request 'GET', query_path, transferred_headers - - srcReq.on 'response', (srcResp) -> - log srcResp.headers - - content_length = srcResp.headers['content-length'] - - if content_length > 5242880 - four_oh_four(resp, "Content-Length exceeded") - else - newHeaders = - 'expires' : srcResp.headers['expires'] - 'content-type' : srcResp.headers['content-type'] - 'cache-control' : srcResp.headers['cache-control'] - 'content-length' : content_length - 'Camo-Host' : camo_hostname - 'X-Content-Type-Options' : 'nosniff' - - srcResp.on 'end', -> - finish resp - - srcResp.on 'error', -> - finish resp - - switch srcResp.statusCode - when 200 - if newHeaders['content-type'] && newHeaders['content-type'].slice(0, 5) != 'image' - four_oh_four(resp, "Non-Image content-type returned") - - log newHeaders - - resp.writeHead srcResp.statusCode, newHeaders - srcResp.on 'data', (chunk) -> - resp.write chunk - - when 304 - resp.writeHead srcResp.statusCode, newHeaders - - else - four_oh_four(resp, "Responded with #{srcResp.statusCode}:#{srcResp.headers}") - - srcReq.on 'error', -> - finish resp - - srcReq.end() - - else - four_oh_four(resp, "No host found #{url.host}") + process_url url, transferred_headers, resp, max_redirects else four_oh_four(resp, "checksum mismatch #{hmac_digest}:#{query_digest}") else diff --git a/test/proxy_test.rb b/test/proxy_test.rb index 20cef00..4ee07ec 100644 --- a/test/proxy_test.rb +++ b/test/proxy_test.rb @@ -4,6 +4,7 @@ require 'openssl' require 'rest_client' require 'addressable/uri' +require 'ruby-debug' require 'test/unit' @@ -28,21 +29,37 @@ def test_proxy_valid_google_chart_url assert_equal(200, response.code) end - def test_404s_on_urls_without_an_http_host + def test_follows_redirects + response = request('http://cl.ly/1K0X2Y2F1P0o3z140p0d/boom-headshot.gif') + assert_equal(200, response.code) + end + + def test_follows_redirects_formatted_strangely + response = request('http://cl.ly/DPcp/Screen%20Shot%202012-01-17%20at%203.42.32%20PM.png') + assert_equal(200, response.code) + end + + def test_follows_redirects_with_path_only_location_headers + assert_nothing_raised do + request('http://blogs.msdn.com/photos/noahric/images/9948044/425x286.aspx') + end + end + + def test_404s_on_infinidirect assert_raise RestClient::ResourceNotFound do - request('/picture/Mincemeat/Pimp.jpg') + request('http://modeselektor.herokuapp.com/') end end - def test_404s_on_images_greater_than_5_megabytes + def test_404s_on_urls_without_an_http_host assert_raise RestClient::ResourceNotFound do - request('http://apod.nasa.gov/apod/image/0505/larryslookout_spirit_big.jpg') + request('/picture/Mincemeat/Pimp.jpg') end end - def test_404s_on_redirects + def test_404s_on_images_greater_than_5_megabytes assert_raise RestClient::ResourceNotFound do - request('http://blogs.msdn.com/photos/noahric/images/9948044/425x286.aspx') + request('http://apod.nasa.gov/apod/image/0505/larryslookout_spirit_big.jpg') end end