This repository has been archived by the owner on Oct 20, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 57
/
Copy pathRakefile
99 lines (84 loc) · 2.67 KB
/
Rakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
task :environment do
require_relative 'app'
end
desc %(Create the database, prepare schema, import RFC index)
task :bootstrap => [:'db:bootstrap', :import_index, :import_popular]
namespace :db do
desc %(Rebuild the db schema)
task :rebuild => :environment do
DataMapper.auto_migrate!
end
desc %(Automatically migrate the db schema based on changed model attributes)
task :migrate => :environment do
DataMapper.auto_upgrade!
end
desc %(Create and upgrade the database schema if necessary)
task :bootstrap => :environment do
if RfcEntry.storage_exists?
Rake::Task[:'db:migrate'].invoke
else
Rake::Task[:'db:rebuild'].invoke
end
end
end
desc %(Import the complete RFC index into the database)
task :import_index => ['tmp/rfc-index.xml', :environment] do |task|
require 'nokogiri'
require 'active_support/core_ext/object/try'
require 'date'
DataMapper.logger.set_log($stderr, :warn)
index = Nokogiri File.open(task.prerequisites.first)
num = 0
date_from_xml = ->(xml_date) {
if xml_date
year = xml_date.at('./year').text
month_name = xml_date.at('./month').text
day = xml_date.at('./day').try(:text)
Date.parse [year, month_name, day].join(' ')
end
}
index.search('rfc-entry').each do |xml_entry|
doc_id = xml_entry.at('./doc-id').text
unless entry = RfcEntry.get(doc_id)
entry = RfcEntry.new
entry.document_id = doc_id
entry.title = xml_entry.at('./title').text
entry.abstract = xml_entry.at('./abstract').try(:inner_html)
entry.keywords = xml_entry.search('./keywords/*').map(&:text)
end
entry.obsoleted = xml_entry.search('./obsoleted-by').any?
entry.publish_date = date_from_xml.(xml_entry.at('./date'))
num += 1 if entry.dirty?
entry.save
end
puts "updated #{num} entries (%d in database)." % RfcEntry.count
end
file 'tmp/rfc-index.xml' do |task|
mkdir_p 'tmp'
index_url = 'ftp://ftp.rfc-editor.org/in-notes/rfc-index.xml'
sh 'curl', '-#', index_url, '-o', task.name
end
desc %(Update the RFCs in the database with a popularity score)
task :import_popular => :environment do
require 'nokogiri'
require 'open-uri'
popular = []
pop_url = 'http://www.faqs.org/rfc-pop%d.html'
num = 0
(1..5).each do |n|
html = Nokogiri open(pop_url % n)
html.search('#fmaincolumn a[href^="/rfcs/"]').each do |link|
popular << File.basename(link['href'], '.html')
end
end
popular.each_with_index do |name, idx|
if entry = RfcEntry.get(name)
entry.popularity = idx + 1
entry.save
num += 1
else
warn "could not find #{name}"
end
end
puts "applied popular score to #{num} entries."
end