diff --git a/fixtures/test_techcrunch1.json b/fixtures/test_techcrunch1.json index 33f1ee1..8e889f8 100644 --- a/fixtures/test_techcrunch1.json +++ b/fixtures/test_techcrunch1.json @@ -30,7 +30,7 @@ "href": "http://money.cnn.com/popups/2006/biz2/peoplewhodontmatter/frameset.exclude.html" }, { - "text": "\"\"", + "text": "", "href": "http://tctechcrunch2011.files.wordpress.com/2011/08/screen-shot-2011-08-13-at-6-43-20-pm1.png" } ] diff --git a/lib/extractor.js b/lib/extractor.js index 7a13832..be8a560 100644 --- a/lib/extractor.js +++ b/lib/extractor.js @@ -41,7 +41,7 @@ void function () { }, publisher: function (doc) { var cache$, cache$1, publisherCandidates; - publisherCandidates = doc("meta[property='og:site_name'], meta[name='dc.publisher'], meta[name='DC.publisher'], meta[name='DC.Publisher']"); + publisherCandidates = doc("meta[property='og:site_name'], meta[itemprop=name], meta[name='dc.publisher'], meta[name='DC.publisher'], meta[name='DC.Publisher']"); return (null != (cache$ = cleanNull(null != publisherCandidates && null != (cache$1 = publisherCandidates.first()) ? cache$1.attr('content') : void 0)) ? cache$.trim() : void 0) || null; }, title: function (doc) { diff --git a/src/extractor.coffee b/src/extractor.coffee index 766489a..f09f2a1 100644 --- a/src/extractor.coffee +++ b/src/extractor.coffee @@ -71,6 +71,7 @@ module.exports = # Grab the publisher of the page/site publisher: (doc) -> publisherCandidates = doc("meta[property='og:site_name'], \ + meta[itemprop=name], \ meta[name='dc.publisher'], \ meta[name='DC.publisher'], \ meta[name='DC.Publisher']") diff --git a/test/extractor.coffee b/test/extractor.coffee index 3b2dd8e..4a0fce5 100644 --- a/test/extractor.coffee +++ b/test/extractor.coffee @@ -120,6 +120,10 @@ suite 'Extractor', -> publisher = extractor.publisher(doc) eq publisher, "Polygon" + doc2 = cheerio.load("") + publisher2 = extractor.publisher(doc2) + eq publisher2, "The New York Times" + test 'returns nothing if publisher eq "null"', -> doc = cheerio.load("") publisher = extractor.publisher(doc)