Skip to content

Commit 3cf6df7

Browse files
committed
Merge pull request #197 from reedloden/fix-scraper
Fix OSVDB scraper to work with CloudFlare's protection
2 parents 36d448e + 9e6a80a commit 3cf6df7

File tree

3 files changed

+26
-11
lines changed

3 files changed

+26
-11
lines changed

Gemfile

+5
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,8 @@ source 'https://rubygems.org'
22

33
gem 'rspec', '3.3.0'
44
gem 'rake'
5+
6+
group :development do
7+
gem 'pry'
8+
gem 'nokogiri'
9+
end

lib/cf_scrape.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import cfscrape
2+
import sys
3+
4+
scraper = cfscrape.create_scraper() # returns a requests.Session object
5+
print scraper.get(sys.argv[1]).content

lib/scrape.rb lib/osvdb_scrape.rb

+16-11
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,27 @@
1-
require 'rubygems'
2-
require 'bundler/setup'
3-
41
require 'pry'
5-
require 'mechanize'
2+
require 'nokogiri'
63
require 'yaml'
74
require 'date'
85

96
class OSVDB
107
attr_accessor :osvdb, :cve, :title, :description, :date, :cvss_v2, :gem, :url, :patched_versions, :page
11-
def initialize(url)
12-
self.url = url
8+
def initialize(osvdb)
9+
self.osvdb = osvdb
10+
self.url = "http://osvdb.org/show/osvdb/#{self.osvdb}"
11+
scrape!
1312
parse!
1413
end
1514

16-
def parse!
17-
mech = Mechanize.new
18-
self.page = mech.get(url)
15+
def scrape!
16+
html = `bash --login -c "python cf_scrape.py #{self.url}"`
17+
doc = Nokogiri::XML(html) do |config|
18+
config.nonet.noent
19+
end
1920

21+
self.page = doc
22+
end
23+
24+
def parse!
2025
page.search(".show_vuln_table").search("td ul li").each do |li|
2126
case li.children[0].text.strip
2227
when "CVE ID:"
@@ -29,7 +34,6 @@ def parse!
2934
self.description = page.search(".show_vuln_table").search("tr td tr .white_content p")[0].text
3035
self.date = page.search(".show_vuln_table").search("tr td tr .white_content tr td")[0].text
3136
self.title = page.search("title").text.gsub(/\d+: /, "")
32-
self.osvdb = page.search("title").text.match(/\d+/)[0]
3337
if cvss_p = page.search(".show_vuln_table").search("tr td tr .white_content div p")[0]
3438
self.set_cvss(cvss_p.children[0].text)
3539
end
@@ -68,7 +72,8 @@ def to_yaml
6872
'date' => date,
6973
'description' => description,
7074
'cvss_v2' => cvss_v2,
71-
'patched_versions' => patched_versions }.to_yaml
75+
'patched_versions' => patched_versions
76+
}.to_yaml(options = { line_width: 80 })
7277
end
7378

7479
def filename

0 commit comments

Comments
 (0)