Skip to content

Commit 17cb839

Browse files
committed
Cache the DTD file.
The URLs in the DOCTYPE instruction are replaced with a path to a local file, stored in the cache of the program. The main reason is to avoid 429 (HTTP) errors from www.gnu.org when the DTD is downloaded in the saxonb command.
1 parent 0323199 commit 17cb839

File tree

2 files changed

+39
-0
lines changed

2 files changed

+39
-0
lines changed

lib/ffdocs/source_docs/collection.rb

+31
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# frozen_string_literal: true
22

33
require "digest/sha2"
4+
require "net/http"
45
require "nokogiri"
56
require "tempfile"
67

@@ -11,6 +12,8 @@ module FFDocs::SourceDocs
1112

1213
PATCHES_DIR = Pathname.new(File.expand_path("../patches", __FILE__))
1314

15+
DTD_MUTEX = Mutex.new
16+
1417
class SourceNotFound < StandardError; end
1518
class XMLTransformFailed < StandardError; end
1619

@@ -78,6 +81,8 @@ def initialize(options, storage, release)
7881
io.read
7982
end
8083

84+
xml = cache_dtd(xml, storage)
85+
8186
::FFDocs.log.info "Parsing source for #{@release.version} ..."
8287
parse_xml(release, Nokogiri::XML.parse(xml))
8388
end
@@ -200,6 +205,32 @@ def initialize(options, storage, release)
200205
html
201206
end
202207

208+
# Replace the URLs for the DTD to use a cached file.
209+
private def cache_dtd(xml, storage)
210+
xml.sub(%r[<!DOCTYPE .*?>]) do |m|
211+
m.gsub(/https?:[^"]+/) do |url|
212+
cached = storage.dtd_file(url)
213+
214+
DTD_MUTEX.synchronize do
215+
if not cached.exist?
216+
url = url.sub(/^http:/, "https:")
217+
::FFDocs.log.info "Downloading DTD for #{url} ..."
218+
219+
case Net::HTTP.get_response(URI(url))
220+
in Net::HTTPSuccess => response
221+
cached.write(response.body)
222+
in _ => failure
223+
::FFDocs.log.error "Failed request: #{url}: #{failure}"
224+
end
225+
end
226+
end
227+
228+
cached.to_s
229+
end
230+
end
231+
end
232+
233+
203234
end
204235

205236
end

lib/ffdocs/storage.rb

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
require "digest/sha2"
12
require "json"
23
require "logger"
34
require "pathname"
@@ -43,6 +44,13 @@ def get_file(version, path)
4344
end
4445
end
4546

47+
def dtd_file(url)
48+
dtd_dir = CACHE_DIR.join("dtd")
49+
dtd_dir.mkpath if not dtd_dir.directory?
50+
51+
dtd_dir.join(Digest::SHA2.hexdigest(url) + ".dtd")
52+
end
53+
4654
module SyncData
4755

4856
GIT_URL = "https://github.com/FFmpeg/FFmpeg.git"

0 commit comments

Comments
 (0)