diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9a534a4..36ceab1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,6 @@
+###1.1.2
+* Add support for Chinese.
+
 ### 1.0.0
 * Add support for extracting out `softTitle`, `date`, `copyright`, `author`, `publisher` thanks to @philgooch. See [#49](https://github.com/ageitgey/node-unfluff/pull/49).
 
diff --git a/README.md b/README.md
index 5f35258..ab32b37 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,14 @@
-# unfluff
+# node-article-extractor
 
-An automatic web page content extractor for Node.js!
+An automatic web page content extractor for Node.js! based on https://github.com/ageitgey/node-unfluff, but support Chinese.
 
-[![Build Status](https://travis-ci.org/ageitgey/node-unfluff.svg?branch=master)](https://travis-ci.org/ageitgey/node-unfluff)
+[![Build Status](https://api.travis-ci.org/ahkimkoo/node-article-extractor.svg?branch=master)](https://travis-ci.org/ahkimkoo/node-article-extractor)
 
 Automatically grab the main
 text out of a webpage like this:
 
 ```
-extractor = require('unfluff');
+extractor = require('node-article-extractor');
 data = extractor(my_html_data);
 console.log(data.text);
 ```
@@ -38,21 +38,21 @@ check out those libraries!
 
 ## Install
 
-To install the command-line `unfluff` utility:
+To install the command-line `node-article-extractor` utility:
 
-    npm install -g unfluff
+    npm install -g node-article-extractor
 
-To install the `unfluff` module for use in your Node.js project:
+To install the `node-article-extractor` module for use in your Node.js project:
 
-    npm install --save unfluff
+    npm install --save node-article-extractor
 
 ## Usage
 
-You can use `unfluff` from node or right on the command line!
+You can use `node-article-extractor` from node or right on the command line!
 
 ### Extracted data elements
 
-This is what `unfluff` will try to grab from a web page:
+This is what `node-article-extractor` will try to grab from a web page:
 - `title` - The document's title (from the &lt;title&gt; tag)
 - `softTitle` - A version of `title` with less truncation
 - `date` - The document's publication date
@@ -73,19 +73,19 @@ This is returned as a simple json object.
 
 ### Command line interface
 
-You can pass a webpage to unfluff and it will try to parse out the interesting
+You can pass a webpage to node-article-extractor and it will try to parse out the interesting
 bits.
 
 You can either pass in a file name:
 
 ```
-unfluff my_file.html
+node-article-extractor my_file.html
 ```
 
 Or you can pipe it in:
 
 ```
-curl -s "http://somesite.com/page" | unfluff
+curl -s "http://somesite.com/page" | node-article-extractor
 ```
 
 You can easily chain this together with other unix commands to do cool stuff.
@@ -93,13 +93,13 @@ For example, you can download a web page, parse it and then use
 [jq](http://stedolan.github.io/jq/) to print it just the body text.
 
 ```
-curl -s "http://www.polygon.com/2014/6/26/5842180/shovel-knight-review-pc-3ds-wii-u" | unfluff | jq -r .text
+curl -s "http://www.polygon.com/2014/6/26/5842180/shovel-knight-review-pc-3ds-wii-u" | node-article-extractor | jq -r .text
 ```
 
 And here's how to find the top 10 most common words in an article:
 
 ```
-curl -s "http://www.polygon.com/2014/6/26/5842180/shovel-knight-review-pc-3ds-wii-u" | unfluff |  tr -c '[:alnum:]' '[\n*]' | sort | uniq -c | sort -nr | head -10
+curl -s "http://www.polygon.com/2014/6/26/5842180/shovel-knight-review-pc-3ds-wii-u" | node-article-extractor |  tr -c '[:alnum:]' '[\n*]' | sort | uniq -c | sort -nr | head -10
 ```
 
 ### Module Interface
@@ -116,7 +116,7 @@ The extraction algorithm depends heavily on the language, so it probably won't w
 if you have the language set incorrectly.
 
 ```javascript
-extractor = require('unfluff');
+extractor = require('node-article-extractor');
 
 data = extractor(my_html_data);
 ```
@@ -124,7 +124,7 @@ data = extractor(my_html_data);
 Or supply the language code yourself:
 
 ```javascript
-extractor = require('unfluff');
+extractor = require('node-article-extractor');
 
 data = extractor(my_html_data, 'en');
 ```
@@ -169,7 +169,7 @@ are replaced by functions and evaluation is only done when you call those
 functions.
 
 ```javascript
-extractor = require('unfluff');
+extractor = require('node-article-extractor');
 
 data = extractor.lazy(my_html_data, 'en');
 
@@ -196,24 +196,15 @@ and looking them up multiple times should be as fast as possible.
 
 ### Demo
 
-The easiest way to try out `unfluff` is to just install it:
+The easiest way to try out `node-article-extractor` is to just install it:
 
 ```
-$ npm install -g unfluff
-$ curl -s "http://www.cnn.com/2014/07/07/world/americas/mexico-earthquake/index.html" | unfluff
+$ npm install -g node-article-extractor
+$ curl -s "http://www.cnn.com/2014/07/07/world/americas/mexico-earthquake/index.html" | node-article-extractor
 ```
 
 But if you can't be bothered, you can check out
 [fetch text](http://fetchtext.herokuapp.com/). It's a site by
-[Andy Jiang](https://twitter.com/andyjiang) that uses `unfluff`. You send an
+[Andy Jiang](https://twitter.com/andyjiang) that uses `node-article-extractor`. You send an
 email with a url and it emails back with the cleaned content of that url. It
-should give you a good idea of how `unfluff` handles different urls.
-
-### What is broken
-
-- Parsing web pages in languages other than English is poorly tested and probably
-  is buggy right now.
-- This definitely won't work yet for languages like Chinese / Arabic / Korean /
-  etc that need smarter word tokenization.
-- This has only been tested on a limited set of web pages. There are probably lots
-  of lurking bugs with web pages that haven't been tested yet.
+should give you a good idea of how `node-article-extractor` handles different urls.
diff --git a/bin/unfluff b/bin/node-article-extractor
similarity index 100%
rename from bin/unfluff
rename to bin/node-article-extractor
diff --git a/lib/formatter.js b/lib/formatter.js
index 846d05a..be98fb6 100644
--- a/lib/formatter.js
+++ b/lib/formatter.js
@@ -3,7 +3,7 @@ void function () {
   var _, addNewlineToBr, cleanParagraphText, convertToText, formatter, linksToText, removeFewwordsParagraphs, removeNegativescoresNodes, replaceWithText, stopwords, ulToText, XRegExp;
   stopwords = require('./stopwords');
   _ = require('lodash');
-  XRegExp = require('xregexp').XRegExp;
+  XRegExp = require('xregexp');
   module.exports = formatter = function (doc, topNode, language) {
     removeNegativescoresNodes(doc, topNode);
     linksToText(doc, topNode);
diff --git a/lib/stopwords.js b/lib/stopwords.js
index 2a1888d..e1adebe 100644
--- a/lib/stopwords.js
+++ b/lib/stopwords.js
@@ -1,4 +1,4 @@
-// Generated by CoffeeScript 2.0.0-beta7
+var nodejieba = require("nodejieba");
 void function () {
   var _, cache, candiateWords, fs, getFilePath, path, removePunctuation, stopwords;
   path = require('path');
@@ -11,7 +11,8 @@ void function () {
   module.exports = stopwords = function (content, language) {
     var count, filePath, overlappingStopwords, stopWords, strippedInput, words;
     if (null == language)
-      language = 'en';
+      //language = 'en';
+      language = 'zh';
     filePath = getFilePath(language);
     if (!fs.existsSync(filePath)) {
       console.error("WARNING: No stopwords file found for '" + language + "' - defaulting to English!");
@@ -20,7 +21,7 @@ void function () {
     if (cache.hasOwnProperty(language)) {
       stopWords = cache[language];
     } else {
-      stopWords = fs.readFileSync(filePath).toString().split('\n').filter(function (s) {
+      stopWords = fs.readFileSync(filePath).toString().split(/[\n\r]+/).filter(function (s) {
         return s.length > 0;
       });
       cache[language] = stopWords;
@@ -41,9 +42,11 @@ void function () {
     };
   };
   removePunctuation = function (content) {
-    return content.replace(/[\|\@\<\>\[\]\"\'\.,-\/#\?!$%\^&\*\+;:{}=\-_`~()]/g, '');
+    return content.replace(/[\|\@\<\>\[\]\"\'\.,-\/#\?!$%\^&\*\+;:{}=\-_`~()。，！｀、～；：（）－／×？]/g, '');
   };
   candiateWords = function (strippedInput) {
-    return strippedInput.split(' ');
+    //return strippedInput.split(' ');
+    // return strippedInput.split('');//Chinese supported
+    return nodejieba.cut(strippedInput);//Chinese smart split
   };
 }.call(this);
diff --git a/lib/unfluff.js b/lib/unfluff.js
index 1a52dc6..d4da27d 100644
--- a/lib/unfluff.js
+++ b/lib/unfluff.js
@@ -6,8 +6,8 @@ void function () {
   cleaner = require('./cleaner');
   module.exports = unfluff = function (html, language) {
     var doc, lng, pageData, topNode;
-    doc = cheerio.load(html);
-    lng = language || extractor.lang(doc);
+    doc = cheerio.load(html,{'decodeEntities':false});
+    lng = (language || extractor.lang(doc))||'zh';
     pageData = {
       title: extractor.title(doc),
       softTitle: extractor.softTitle(doc),
diff --git a/package.json b/package.json
index 0188ab1..8acc78c 100644
--- a/package.json
+++ b/package.json
@@ -1,8 +1,8 @@
 {
-  "name": "unfluff",
-  "version": "1.1.0",
+  "name": "node-article-extractor",
+  "version": "1.1.2",
   "description": "A web page content extractor",
-  "homepage": "https://github.com/ageitgey/node-unfluff",
+  "homepage": "https://github.com/ahkimkoo/node-article-extractor",
   "keywords": [
     "content extraction",
     "html",
@@ -12,14 +12,14 @@
     "body text"
   ],
   "author": {
-    "name": "Adam Geitgey",
-    "email": "ageitgey@gmail.com"
+    "name": "Cherokee Liu",
+    "email": "successage@gmail.com"
   },
   "repository": {
     "type": "git",
-    "url": "git://github.com/ageitgey/node-unfluff"
+    "url": "git://github.com/ahkimkoo/node-article-extractor"
   },
-  "bugs": "https://github.com/ageitgey/node-unfluff/issues",
+  "bugs": "https://github.com/ahkimkoo/node-article-extractor/issues",
   "engines": {
     "node": "0.8.x || 0.9.x || 0.10.x"
   },
@@ -30,18 +30,18 @@
     "test": "test"
   },
   "dependencies": {
-    "cheerio": "~0.17.0",
+    "cheerio": "~0.22.0",
+    "lodash": "~4.17.4",
+    "nodejieba": "^2.2.5",
     "optimist": "~0.6.1",
-    "lodash": "~2.4.1",
-    "xregexp": "~2.0.0"
+    "xregexp": "~3.2.0"
   },
   "devDependencies": {
-    "coffee-script-redux": "2.0.0-beta7",
     "commonjs-everywhere": "0.9.x",
-    "mocha": "~1.12.1",
+    "mocha": "~3.4.1",
     "scopedfs": "~0.1.0",
-    "semver": "~2.1.0",
-    "deep-equal": "~0.2.1"
+    "semver": "~5.3.0",
+    "deep-equal": "~1.0.1"
   },
   "scripts": {
     "test": "make test"
@@ -49,7 +49,7 @@
   "licenses": [
     {
       "type": "Apache",
-      "url": "https://github.com/ageitgey/node-unfluff/blob/master/LICENSE"
+      "url": "https://github.com/ahkimkoo/node-article-extractor/blob/master/LICENSE"
     }
   ]
 }
diff --git a/src/cleaner.coffee b/src/cleaner.coffee
deleted file mode 100644
index 781f189..0000000
--- a/src/cleaner.coffee
+++ /dev/null
@@ -1,183 +0,0 @@
-_ = require("lodash")
-
-module.exports = cleaner = (doc) ->
-  removeBodyClasses(doc)
-  cleanArticleTags(doc)
-  cleanEmTags(doc)
-  cleanCodeBlocks(doc)
-  removeDropCaps(doc)
-  removeScriptsStyles(doc)
-  cleanBadTags(doc)
-  removeNodesRegex(doc, /^caption$/)
-  removeNodesRegex(doc, / google /)
-  removeNodesRegex(doc, /^[^entry-]more.*$/)
-  removeNodesRegex(doc, /[^-]facebook/)
-  removeNodesRegex(doc, /facebook-broadcasting/)
-  removeNodesRegex(doc, /[^-]twitter/)
-  cleanParaSpans(doc)
-  cleanUnderlines(doc)
-  cleanErrantLinebreaks(doc)
-  divToPara(doc, 'div')
-  divToPara(doc, 'span')
-  return doc
-
-removeBodyClasses = (doc) ->
-  doc("body").removeClass()
-
-cleanArticleTags = (doc) ->
-  articles = doc("article")
-  articles.each () ->
-    doc(this).removeAttr('id')
-    doc(this).removeAttr('name')
-    doc(this).removeAttr('class')
-
-cleanEmTags = (doc) ->
-  ems = doc("em")
-  ems.each () ->
-    images = ems.find("img")
-    if images.length == 0
-      doc(this).replaceWith(doc(this).html())
-
-cleanCodeBlocks = (doc) ->
-  nodes = doc("[class*='highlight-'], pre code, code, pre, ul.task-list")
-  nodes.each () ->
-    doc(this).replaceWith(doc(this).text())
-
-removeDropCaps = (doc) ->
-  nodes = doc("span[class~=dropcap], span[class~=drop_cap]")
-  nodes.each () ->
-    doc(this).replaceWith(doc(this).html())
-
-removeScriptsStyles = (doc) ->
-  doc("script").remove()
-  doc("style").remove()
-
-  comments = doc('*').contents().filter () ->
-    this.type == "comment"
-
-  doc(comments).remove()
-
-cleanBadTags = (doc) ->
-  removeNodesRe = "^side$|combx|retweet|mediaarticlerelated|menucontainer|navbar|partner-gravity-ad|video-full-transcript|storytopbar-bucket|utility-bar|inline-share-tools|comment|PopularQuestions|contact|foot|footer|Footer|footnote|cnn_strycaptiontxt|cnn_html_slideshow|cnn_strylftcntnt|links|meta$|shoutbox|sponsor|tags|socialnetworking|socialNetworking|cnnStryHghLght|cnn_stryspcvbx|^inset$|pagetools|post-attributes|welcome_form|contentTools2|the_answers|communitypromo|runaroundLeft|subscribe|vcard|articleheadings|date|^print$|popup|author-dropdown|tools|socialtools|byline|konafilter|KonaFilter|breadcrumbs|^fn$|wp-caption-text|legende|ajoutVideo|timestamp|js_replies"
-  re = new RegExp(removeNodesRe, "i");
-
-  toRemove = doc('*').filter () ->
-    doc(this).attr('id')?.match(re) || doc(this).attr('class')?.match(re) || doc(this).attr('name')?.match(re)
-
-  doc(toRemove).remove()
-
-removeNodesRegex = (doc, pattern) ->
-  toRemove = doc('div').filter () ->
-    doc(this).attr('id')?.match(pattern) || doc(this).attr('class')?.match(pattern)
-
-  doc(toRemove).remove()
-
-cleanParaSpans = (doc) ->
-  nodes = doc("p span")
-  nodes.each () ->
-    doc(this).replaceWith(doc(this).html())
-
-cleanUnderlines = (doc) ->
-  nodes = doc("u")
-  nodes.each () ->
-    doc(this).replaceWith(doc(this).html())
-
-getReplacementNodes = (doc, div) ->
-  replacementText = []
-  nodesToReturn = []
-  nodesToRemove = []
-  childs = div.contents()
-
-  childs.each () ->
-    kid = doc(this)
-
-    # node is a p
-    # and already have some replacement text
-    if kid[0].name == 'p' && replacementText.length > 0
-      txt = replacementText.join('')
-      nodesToReturn.push(txt)
-      replacementText = []
-      nodesToReturn.push(doc(kid).html())
-
-    # node is a text node
-    else if kid[0].type == 'text'
-      kidTextNode = kid
-      kidText = kid.text()
-      replaceText = kidText.replace(/\n/g, "\n\n").replace(/\t/g, "").replace(/^\s+$/g, "")
-
-      if(replaceText.length) > 1
-        previousSiblingNode = kidTextNode.prev()
-
-        while previousSiblingNode[0] && previousSiblingNode[0].name == "a" && previousSiblingNode.attr('grv-usedalready') != 'yes'
-          outer = " " + doc.html(previousSiblingNode) + " "
-          replacementText.push(outer)
-          nodesToRemove.push(previousSiblingNode)
-          previousSiblingNode.attr('grv-usedalready', 'yes')
-          previousSiblingNode = previousSiblingNode.prev()
-
-        replacementText.push(replaceText)
-
-        nextSiblingNode = kidTextNode.next()
-
-        while nextSiblingNode[0] && nextSiblingNode[0].name == "a" && nextSiblingNode.attr('grv-usedalready') != 'yes'
-          outer = " " + doc.html(nextSiblingNode) + " "
-          replacementText.push(outer)
-          nodesToRemove.push(nextSiblingNode)
-          nextSiblingNode.attr('grv-usedalready', 'yes')
-          previousSiblingNode = nextSiblingNode.next()
-
-    # otherwise
-    else
-      nodesToReturn.push(doc(kid).html())
-
-  # flush out anything still remaining
-  if replacementText.length > 0
-    txt = replacementText.join('')
-    nodesToReturn.push(txt)
-    replacementText = []
-
-  _.each nodesToRemove, (n) ->
-    doc(n).remove()
-
-  nodesToReturn
-
-replaceWithPara = (doc, div) ->
-  divContent = doc(div).html()
-  doc(div).replaceWith("<p>#{divContent}</p>")
-
-divToPara = (doc, domType) ->
-  divs = doc(domType)
-  lastCount = divs.length + 1
-
-  tags = ['a', 'blockquote', 'dl', 'div', 'img', 'ol', 'p', 'pre', 'table', 'ul']
-
-  divs.each () ->
-    div = doc(this)
-
-    items = div.find(tags.join(", "))
-
-    if items.length == 0
-      replaceWithPara(doc, this)
-    else
-      replaceNodes = getReplacementNodes(doc, div)
-
-      html = ""
-      _.each replaceNodes, (node) ->
-        if node != ''
-          html += "<p>#{node}</p>"
-
-      div.empty()
-      doc(div).replaceWith("#{html}")
-
-# For plain text nodes directly inside of p tags that contain random single
-# line breaks, remove those junky line breaks. They would never be rendered
-# by a browser anyway.
-cleanErrantLinebreaks = (doc) ->
-  doc("p").each () ->
-    node = doc(this)
-    c = node.contents()
-
-    doc(c).each () ->
-      n = doc(this)
-      if n[0].type == 'text'
-        n.replaceWith(n.text().replace(/([^\n])\n([^\n])/g, "$1 $2"))
diff --git a/src/cli.coffee b/src/cli.coffee
deleted file mode 100644
index 836227c..0000000
--- a/src/cli.coffee
+++ /dev/null
@@ -1,52 +0,0 @@
-# We use optimist for parsing the CLI arguments
-fs = require('fs')
-extractor = require('./unfluff')
-
-argvParser = require('optimist')
-.usage(
-  'unfluff [OPTIONS] [FILE_NAME]'
-).options(
-  version:
-    alias: 'v'
-    describe: 'Show version information'
-    boolean: true
-  help:
-    alias: 'h'
-    describe: 'Show this. See: https://github.com/ageitgey/node-unfluff'
-    boolean: true
-  lang:
-    describe: 'Override language auto-detection. Valid values are en, es, fr, etc.'
-)
-
-argv = argvParser.argv
-
-if argv.version
-  version = require('../package.json').version
-  process.stdout.write "#{version}\n"
-  process.exit 0
-
-if argv.help
-  argvParser.showHelp()
-  process.exit 0
-
-language = undefined
-if argv.lang
-  language = argv.lang
-
-file = argv._.shift()
-html = ""
-
-
-if file
-  html = fs.readFileSync(file).toString()
-  process.stdout.write(JSON.stringify(extractor(html, language)))
-else
-  process.stdin.setEncoding('utf8')
-
-  process.stdin.on 'readable', () ->
-    chunk = process.stdin.read()
-    if (chunk != null)
-      html += chunk
-
-  process.stdin.on 'end', () ->
-    process.stdout.write(JSON.stringify(extractor(html, language)))
diff --git a/src/extractor.coffee b/src/extractor.coffee
deleted file mode 100644
index ea72e3b..0000000
--- a/src/extractor.coffee
+++ /dev/null
@@ -1,554 +0,0 @@
-_ = require("lodash")
-stopwords = require("./stopwords")
-formatter = require("./formatter")
-
-module.exports =
-  # Grab the date of an html doc
-  date: (doc) ->
-    dateCandidates = doc("meta[property='article:published_time'], \
-    meta[itemprop*='datePublished'], meta[name='dcterms.modified'], \
-    meta[name='dcterms.date'], \
-    meta[name='DC.date.issued'],  meta[name='dc.date.issued'], \
-    meta[name='dc.date.modified'], meta[name='dc.date.created'], \
-    meta[name='DC.date'], \
-    meta[name='DC.Date'], \
-    meta[name='dc.date'], \
-    meta[name='date'], \
-    time[itemprop*='pubDate'], \
-    time[itemprop*='pubdate'], \
-    span[itemprop*='datePublished'], \
-    span[property*='datePublished'], \
-    p[itemprop*='datePublished'], \
-    p[property*='datePublished'], \
-    div[itemprop*='datePublished'], \
-    div[property*='datePublished'], \
-    li[itemprop*='datePublished'], \
-    li[property*='datePublished'], \
-    time, \
-    span[class*='date'], \
-    p[class*='date'], \
-    div[class*='date']")
-    cleanNull(dateCandidates?.first()?.attr("content"))?.trim() || cleanNull(dateCandidates?.first()?.attr("datetime"))?.trim() || cleanText(dateCandidates?.first()?.text()) || null
-
-
-  # Grab the copyright line
-  copyright: (doc) ->
-    copyrightCandidates = doc("p[class*='copyright'], div[class*='copyright'], span[class*='copyright'], li[class*='copyright'], \
-    p[id*='copyright'], div[id*='copyright'], span[id*='copyright'], li[id*='copyright']")
-    text = copyrightCandidates?.first()?.text()
-    if !text
-      # try to find the copyright in the text
-      text = doc("body").text().replace(/\s*[\r\n]+\s*/g, ". ")
-      return null unless text.indexOf("©") > 0
-    copyright = text.replace(/.*?©(\s*copyright)?([^,;:.|\r\n]+).*/gi, "$2").trim()
-    cleanText(copyright)
-
-
-  # Grab the author of an html doc
-  author: (doc) ->
-    authorCandidates = doc("meta[property='article:author'], \
-    meta[property='og:article:author'], meta[name='author'], \
-    meta[name='dcterms.creator'], \
-    meta[name='DC.creator'], \
-    meta[name='DC.Creator'], \
-    meta[name='dc.creator'], \
-    meta[name='creator']")
-    authorList = []
-    authorCandidates.each () ->
-      author = cleanNull(doc(this)?.attr("content"))?.trim()
-      if author
-        authorList.push(author)
-    # fallback to a named author div
-    if authorList.length == 0
-      fallbackAuthor = doc("span[class*='author']").first()?.text() || doc("p[class*='author']").first()?.text() || doc("div[class*='author']").first()?.text() || \
-      doc("span[class*='byline']").first()?.text() || doc("p[class*='byline']").first()?.text() || doc("div[class*='byline']").first()?.text()
-      if fallbackAuthor
-        authorList.push(cleanText(fallbackAuthor))
-
-    authorList
-
-
-  # Grab the publisher of the page/site
-  publisher: (doc) ->
-    publisherCandidates = doc("meta[property='og:site_name'], \
-    meta[name='dc.publisher'], \
-    meta[name='DC.publisher'], \
-    meta[name='DC.Publisher']")
-    cleanNull(publisherCandidates?.first()?.attr("content"))?.trim() || null
-
-
-  # Grab the title of an html doc (excluding junk)
-  # Hard-truncates titles containing colon or spaced dash
-  title: (doc) ->
-    titleText = rawTitle(doc)
-    return cleanTitle(titleText, ["|", " - ", "»", ":"])
-
-  # Grab the title with soft truncation
-  softTitle: (doc) ->
-    titleText = rawTitle(doc)
-    return cleanTitle(titleText, ["|", " - ", "»"])
-
-
-  # Grab the 'main' text chunk
-  text: (doc, topNode, lang) ->
-    if topNode
-      topNode = postCleanup(doc, topNode, lang)
-      formatter(doc, topNode, lang)
-    else
-      ""
-
-  # Grab an image for the page
-  image: (doc) ->
-    images = doc("meta[property='og:image'], meta[itemprop=image], meta[name='twitter:image:src'], meta[name='twitter:image'], meta[name='twitter:image0']")
-
-    if images.length > 0 && cleanNull(images.first().attr('content'))
-      return cleanNull(images.first().attr('content'))
-
-    null
-
-  # Find any links in the doc
-  links: (doc, topNode, lang) ->
-    links = []
-    gatherLinks = (doc, topNode) ->
-      nodes = topNode.find('a')
-      nodes.each () ->
-        href = doc(this).attr('href')
-        text = doc(this).html()
-        if href && text
-          links.push({
-            text: text,
-            href: href
-          })
-      
-    if topNode
-      topNode = postCleanup(doc, topNode, lang)
-      gatherLinks(doc, topNode)
-    links
-      
-  # Find any embedded videos in the doc
-  videos: (doc, topNode) ->
-    videoList = []
-    candidates = doc(topNode).find("iframe, embed, object, video")
-
-    candidates.each () ->
-      candidate = doc(this)
-      tag = candidate[0].name
-
-      if tag == "embed"
-        if candidate.parent() && candidate.parent()[0].name == "object"
-          videoList.push(getObjectTag(doc, candidate))
-        else
-          videoList.push(getVideoAttrs(doc, candidate))
-      else if tag == "object"
-        videoList.push(getObjectTag(doc, candidate))
-      else if tag == "iframe" || tag == "video"
-        videoList.push(getVideoAttrs(doc, candidate))
-
-    # Filter out junky or duplicate videos
-    urls = []
-    results = []
-    _.each videoList, (vid) ->
-      if vid && vid.height && vid.width && urls.indexOf(vid.src) == -1
-        results.push(vid)
-        urls.push(vid.src)
-
-    results
-
-  # Grab the favicon from an html doc
-  favicon: (doc) ->
-    tag = doc('link').filter ->
-      doc(this).attr('rel')?.toLowerCase() == 'shortcut icon'
-    tag.attr('href')
-
-  # Determine the language of an html doc
-  lang: (doc) ->
-    # Check the <html> tag
-    l = doc("html")?.attr("lang")
-
-    if !l
-      # Otherwise look up for a content-language in meta
-      tag = doc("meta[name=lang]") || doc("meta[http-equiv=content-language]")
-      l = tag?.attr("content")
-
-    if l
-      # Just return the 2 letter ISO language code with no country
-      value = l[0..1]
-      if /^[A-Za-z]{2}$/.test(value)
-        return value.toLowerCase()
-
-    null
-
-  # Get the meta description of an html doc
-  description: (doc) ->
-    tag = doc("meta[name=description], meta[property='og:description']")
-    cleanNull(tag?.first()?.attr("content"))?.trim()
-
-  # Get the meta keywords of an html doc
-  keywords: (doc) ->
-    tag = doc("meta[name=keywords]")
-    cleanNull(tag?.attr("content"))
-
-  # Get the canonical link of an html doc
-  canonicalLink: (doc) ->
-    tag = doc("link[rel=canonical]")
-    cleanNull(tag?.attr("href"))
-
-  # Get any tags or keywords from an html doc
-  tags: (doc) ->
-    elements = doc("a[rel='tag']")
-
-    if elements.length == 0
-      elements = doc("a[href*='/tag/'], a[href*='/tags/'], a[href*='/topic/'], a[href*='?keyword=']")
-      if elements.length == 0
-        return []
-
-    tags = []
-    elements.each () ->
-      el = doc(this)
-
-      tag = el.text().trim()
-      tag.replace(/[\s\t\n]+/g, '')
-
-      if tag && tag.length > 0
-        tags.push(tag)
-
-    _.uniq(tags)
-
-  # Walk the document's text nodes and find the most 'texty' node in the doc
-  calculateBestNode: (doc, lang) ->
-    topNode = null
-    nodesToCheck = doc("p, pre, td")
-
-    startingBoost = 1.0
-    cnt = 0
-    i = 0
-    parentNodes = []
-    nodesWithText = []
-
-    # Walk all the p, pre and td nodes
-    nodesToCheck.each () ->
-      node = doc(this)
-
-      textNode = node.text()
-      wordStats = stopwords(textNode, lang)
-      highLinkDensity = isHighlinkDensity(doc, node)
-
-      # If a node contains multiple common words and isn't just a bunch
-      # of links, it's worth consideration of being 'texty'
-      if wordStats.stopwordCount > 2 && !highLinkDensity
-        nodesWithText.push(node)
-
-    nodesNumber = nodesWithText.length
-    negativeScoring = 0
-    bottomNegativescoreNodes = nodesNumber * 0.25
-
-    # Walk all the potentially 'texty' nodes
-    _.each nodesWithText, (node) ->
-      boostScore = 0.0
-
-      # If this node has nearby nodes that contain
-      # some good text, give the node some boost points
-      if isBoostable(doc, node, lang) == true
-        if cnt >= 0
-          boostScore = (1.0 / startingBoost) * 50
-          startingBoost += 1
-
-      if nodesNumber > 15
-        if (nodesNumber - i) <= bottomNegativescoreNodes
-          booster = bottomNegativescoreNodes - (nodesNumber - i)
-          boostScore = -1.0 * Math.pow(booster, 2)
-          negscore = Math.abs(boostScore) + negativeScoring
-
-          if negscore > 40
-            boostScore = 5.0
-
-      # Give the current node a score of how many common words
-      # it contains plus any boost
-      textNode = node.text()
-      wordStats = stopwords(textNode, lang)
-      upscore = Math.floor(wordStats.stopwordCount + boostScore)
-
-      # Propigate the score upwards
-      parentNode = node.parent()
-      updateScore(parentNode, upscore)
-      updateNodeCount(parentNode, 1)
-
-      if parentNodes.indexOf(parentNode[0]) == -1
-        parentNodes.push(parentNode[0])
-
-      parentParentNode = parentNode.parent()
-
-      if parentParentNode
-        updateNodeCount(parentParentNode, 1)
-        updateScore(parentParentNode, upscore / 2)
-
-        if parentNodes.indexOf(parentParentNode[0]) == -1
-          parentNodes.push(parentParentNode[0])
-
-      cnt += 1
-      i += 1
-
-    topNodeScore = 0
-
-    # Walk each parent and parent-parent and find the one that
-    # contains the highest sum score of 'texty' child nodes.
-    # That's probably out best node!
-    _.each parentNodes, (e) ->
-      score = getScore(doc(e))
-
-      if score > topNodeScore
-        topNode = e
-        topNodeScore = score
-
-      if topNode == null
-        topNode = e
-
-    doc(topNode)
-
-
-getVideoAttrs = (doc, node) ->
-  el = doc(node)
-  data =
-    src: el.attr('src')
-    height: el.attr('height')
-    width: el.attr('width')
-
-getObjectTag = (doc, node) ->
-  srcNode = node.find('param[name=movie]')
-  return null unless srcNode.length > 0
-
-  src = srcNode.attr("value")
-  video = getVideoAttrs(doc, node)
-  video.src = src
-  video
-
-# Find the biggest chunk of text in the title
-biggestTitleChunk = (title, splitter) ->
-  largeTextLength = 0
-  largeTextIndex = 0
-
-  titlePieces = title.split(splitter)
-
-  # find the largest substring
-  _.each titlePieces, (piece, i)->
-    if piece.length > largeTextLength
-      largeTextLength = piece.length
-      largeTextIndex = i
-
-  titlePieces[largeTextIndex]
-
-# Given a text node, check all previous siblings.
-# If the sibling node looks 'texty' and isn't too many
-# nodes away, it's probably some yummy text
-isBoostable = (doc, node, lang) ->
-  stepsAway = 0
-  minimumStopwordCount = 5
-  maxStepsawayFromNode = 3
-
-  nodes = node.prevAll()
-
-  boostable = false
-
-  nodes.each () ->
-    currentNode = doc(this)
-    currentNodeTag = currentNode[0].name
-
-    if currentNodeTag == "p"
-      # Make sure the node isn't more than 3 hops away
-      if stepsAway >= maxStepsawayFromNode
-        boostable = false
-        return false
-
-      paraText = currentNode.text()
-      wordStats = stopwords(paraText, lang)
-
-      # Check if the node contains more than 5 common words
-      if wordStats.stopwordCount > minimumStopwordCount
-        boostable = true
-        return false
-
-      stepsAway += 1
-
-  boostable
-
-addSiblings = (doc, topNode, lang) ->
-  baselinescoreSiblingsPara = getSiblingsScore(doc, topNode, lang)
-  sibs = topNode.prevAll()
-
-  sibs.each () ->
-    currentNode = doc(this)
-    ps = getSiblingsContent(doc, lang, currentNode, baselinescoreSiblingsPara)
-    _.each ps, (p) ->
-      topNode.prepend("<p>#{p}</p>")
-  return topNode
-
-getSiblingsContent = (doc, lang, currentSibling, baselinescoreSiblingsPara) ->
-
-  if currentSibling[0].name == 'p' && currentSibling.text().length > 0
-    return [currentSibling]
-  else
-    potentialParagraphs = currentSibling.find("p")
-    if potentialParagraphs == null
-      return null
-    else
-      ps = []
-      potentialParagraphs.each () ->
-        firstParagraph = doc(this)
-        txt = firstParagraph.text()
-
-        if txt.length > 0
-          wordStats = stopwords(txt, lang)
-          paragraphScore = wordStats.stopwordCount
-          siblingBaselineScore = 0.30
-          highLinkDensity = isHighlinkDensity(doc, firstParagraph)
-          score = baselinescoreSiblingsPara * siblingBaselineScore
-
-          if score < paragraphScore && !highLinkDensity
-            ps.push(txt)
-
-      return ps
-
-getSiblingsScore = (doc, topNode, lang) ->
-  base = 100000
-  paragraphsNumber = 0
-  paragraphsScore = 0
-  nodesToCheck = topNode.find("p")
-
-  nodesToCheck.each () ->
-    node = doc(this)
-    textNode = node.text()
-    wordStats = stopwords(textNode, lang)
-    highLinkDensity = isHighlinkDensity(doc, node)
-
-    if wordStats.stopwordCount > 2 && !highLinkDensity
-      paragraphsNumber += 1
-      paragraphsScore += wordStats.stopwordCount
-
-  if paragraphsNumber > 0
-    base = paragraphsScore / paragraphsNumber
-
-  return base
-
-# Keep track of a node's score with a gravityScore attribute
-updateScore = (node, addToScore) ->
-  currentScore = 0
-  scoreString = node.attr('gravityScore')
-  if scoreString
-    currentScore = parseInt(scoreString)
-
-  newScore = currentScore + addToScore
-  node.attr("gravityScore", newScore)
-
-# Keep track of # of 'texty' child nodes under this node with
-# graveityNodes attribute
-updateNodeCount = (node, addToCount) ->
-  currentScore = 0
-  countString = node.attr('gravityNodes')
-  if countString
-    currentScore = parseInt(countString)
-
-  newScore = currentScore + addToCount
-  node.attr("gravityNodes", newScore)
-
-# Check the ratio of links to words in a node.
-# If the ratio is high, this node is probably trash.
-isHighlinkDensity = (doc, node) ->
-  links = node.find('a')
-  return false unless links.length > 0
-
-  txt = node.text()
-  words = txt.split(' ')
-  numberOfWords = words.length
-
-  sb = []
-  links.each () ->
-    sb.push(doc(this).text())
-
-  linkText = sb.join(' ')
-  linkWords = linkText.split(' ')
-  numberOfLinkWords = linkWords.length
-  numberOfLinks = links.length
-  percentLinkWords = numberOfLinkWords / numberOfWords
-  score = percentLinkWords * numberOfLinks
-
-  score >= 1.0
-
-# Return a node's gravity score (amount of texty-ness under it)
-getScore = (node) ->
-  grvScoreString = node.attr('gravityScore')
-  if !grvScoreString
-    return 0
-  else
-    parseInt(grvScoreString)
-
-
-isTableAndNoParaExist = (doc, e) ->
-  subParagraphs = e.find("p")
-
-  subParagraphs.each () ->
-    p = doc(this)
-    txt = p.text()
-
-    if txt.length < 25
-      doc(p).remove()
-
-  subParagraphs2 = e.find("p")
-  if subParagraphs2.length == 0 && !(e[0].name in ["td", "ul", "ol"])
-    return true
-  else
-    return false
-
-isNodescoreThresholdMet = (doc, node, e) ->
-  topNodeScore = getScore(node)
-  currentNodeScore = getScore(e)
-  thresholdScore = topNodeScore * 0.08
-
-  if (currentNodeScore < thresholdScore) && !(e[0].name in ["td", "ul", "ol", "blockquote"])
-    return false
-  else
-    return true
-
-# Remove any remaining trash nodes (clusters of nodes with little/no content)
-postCleanup = (doc, targetNode, lang) ->
-  node = addSiblings(doc, targetNode, lang)
-
-  node.children().each () ->
-    e = doc(this)
-    eTag = e[0].name
-    if eTag not in ['p', 'a']
-      if isHighlinkDensity(doc, e) || isTableAndNoParaExist(doc, e) || !isNodescoreThresholdMet(doc, node, e)
-        doc(e).remove()
-
-  return node
-
-cleanNull = (text) ->
-  return text?.replace(/^null$/g, "")
-
-cleanText = (text) ->
-  return text?.replace(/[\r\n\t]/g, " ").replace(/\s\s+/g, " ").replace(/<!--.+?-->/g, "").replace(/�/g, "").trim()
-
-
-cleanTitle = (title, delimiters) ->
-  titleText = title || ""
-  usedDelimeter = false
-  _.each delimiters, (c) ->
-    if titleText.indexOf(c) >= 0 && !usedDelimeter
-      titleText = biggestTitleChunk(titleText, c)
-      usedDelimeter = true
-  return cleanText(titleText)
-
-
-rawTitle = (doc) ->
-  gotTitle = false
-  titleText = ""
-  # The first h1 or h2 is a useful fallback
-  _.each [doc("meta[property='og:title']")?.first()?.attr("content"), \
-  doc("h1[class*='title']")?.first()?.text(), \
-  doc("title")?.first()?.text(), \
-  doc("h1")?.first()?.text(), \
-  doc("h2")?.first()?.text()], (candidate) ->
-    if candidate && candidate.trim() && !gotTitle
-      titleText = candidate.trim()
-      gotTitle = true
-
-  return titleText
diff --git a/src/formatter.coffee b/src/formatter.coffee
deleted file mode 100644
index 3fa5043..0000000
--- a/src/formatter.coffee
+++ /dev/null
@@ -1,124 +0,0 @@
-stopwords = require("./stopwords")
-_ = require("lodash")
-{XRegExp} = require('xregexp')
-
-module.exports = formatter = (doc, topNode, language) ->
-  removeNegativescoresNodes(doc, topNode)
-  linksToText(doc, topNode)
-  addNewlineToBr(doc, topNode)
-  replaceWithText(doc, topNode)
-  removeFewwordsParagraphs(doc, topNode, language)
-  return convertToText(doc, topNode)
-
-linksToText = (doc, topNode) ->
-  nodes = topNode.find('a')
-
-  nodes.each () ->
-    doc(this).replaceWith(doc(this).html())
-
-ulToText = (doc, node) ->
-  nodes = node.find('li')
-  txt = ""
-
-  nodes.each () ->
-    txt = txt + "\n * #{doc(this).text()}"
-
-  txt = txt + "\n"
-  txt
-
-replaceWithText = (doc, topNode) ->
-  nodes = topNode.find('b, strong, i, br, sup')
-  nodes.each () ->
-    doc(this).replaceWith(doc(this).text())
-
-cleanParagraphText = (rawText) ->
-  txt = rawText.trim()
-  txt.replace(/[\s\t]+/g, ' ')
-  txt
-
-# Turn an html element (and children) into nicely formatted text
-convertToText = (doc, topNode) ->
-  txts = []
-  nodes = topNode.contents()
-
-  # To hold any text fragments that end up in text nodes outside of
-  # html elements
-  hangingText = ""
-
-  nodes.each () ->
-    node = doc(this)
-    nodeType = node[0].type
-    nodeName = node[0].name
-
-    # Handle top level text nodes by adding them to a running list
-    # and then treating all the hanging nodes as one paragraph tag
-    if nodeType == "text"
-      hangingText += node.text()
-      # Same as 'continue'
-      return true
-    else if nodeName == "ul"
-      hangingText += ulToText(doc, node)
-      return true
-
-    # If we hit a real node and still have extra acculated text,
-    # pop it out as if it was a paragraph tag
-    if hangingText.length > 0
-      txt = cleanParagraphText(hangingText)
-      txts = txts.concat(txt.split(/\r?\n/))
-      hangingText = ""
-
-    txt = cleanParagraphText(node.text())
-    txt = txt.replace(/(\w+\.)([A-Z]+)/, '$1 $2')
-    txts = txts.concat(txt.split(/\r?\n/))
-
-  # Catch any left-over hanging text nodes
-  if hangingText.length > 0
-    txt = cleanParagraphText(hangingText)
-    txts = txts.concat(txt.split(/\r?\n/))
-
-  txts = _.map txts, (txt) ->
-    txt.trim()
-
-  # Make sure each text chunk includes at least one text character or number.
-  # This supports multiple languages words using XRegExp to generate the
-  # regex that matches wranges of unicode characters used in words.
-  regex = XRegExp('[\\p{Number}\\p{Letter}]')
-  txts = _.filter txts, (txt) ->
-    regex.test(txt)
-
-  txts.join('\n\n')
-
-addNewlineToBr = (doc, topNode) ->
-  brs = topNode.find("br")
-  brs.each () ->
-    br = doc(this)
-    br.replaceWith("\n\n")
-
-# Remove nodes with a negative score because they are probably trash
-removeNegativescoresNodes = (doc, topNode) ->
-  gravityItems = topNode.find("*[gravityScore]")
-
-  gravityItems.each () ->
-    item = doc(this)
-    score = parseInt(item.attr('gravityScore')) || 0
-
-    if score < 1
-      doc(item).remove()
-
-# remove paragraphs that have less than x number of words,
-# would indicate that it's some sort of link
-removeFewwordsParagraphs = (doc, topNode, language) ->
-  allNodes = topNode.find("*")
-
-  allNodes.each () ->
-    el = doc(this)
-    tag = el[0].name
-    text = el.text()
-
-    stopWords = stopwords(text, language)
-    if (tag != 'br' || text != '\\r') && stopWords.stopwordCount < 3 && el.find("object").length == 0 && el.find("embed").length == 0
-      doc(el).remove()
-    else
-      trimmed = text.trim()
-      if trimmed[0] == "(" && trimmed[trimmed.length - 1] == ")"
-        doc(el).remove()
diff --git a/src/stopwords.coffee b/src/stopwords.coffee
deleted file mode 100644
index ecebd0c..0000000
--- a/src/stopwords.coffee
+++ /dev/null
@@ -1,47 +0,0 @@
-path = require('path')
-fs = require('fs')
-_ = require('lodash')
-
-cache = {}
-
-getFilePath = (language) ->
-  path.join(__dirname, "..", "data", "stopwords", "stopwords-#{language}.txt")
-
-# Given a language, loads a list of stop words for that language
-# and then returns which of those words exist in the given content
-module.exports = stopwords = (content, language = 'en') ->
-  filePath = getFilePath(language)
-
-  if !fs.existsSync(filePath)
-    console.error("WARNING: No stopwords file found for '#{language}' - defaulting to English!")
-    filePath = getFilePath('en')
-
-  if cache.hasOwnProperty(language)
-    stopWords = cache[language]
-  else
-    stopWords = fs.readFileSync(filePath).toString().split('\n')
-                  .filter((s) -> s.length > 0)
-    cache[language] = stopWords
-
-  strippedInput = removePunctuation(content)
-  words = candiateWords(strippedInput)
-  overlappingStopwords = []
-
-  count = 0
-
-  _.each words, (w) ->
-    count += 1
-    if stopWords.indexOf(w.toLowerCase()) > -1
-      overlappingStopwords.push(w.toLowerCase())
-
-  {
-    wordCount: count,
-    stopwordCount: overlappingStopwords.length,
-    stopWords: overlappingStopwords
-  }
-
-removePunctuation = (content) ->
-  content.replace(/[\|\@\<\>\[\]\"\'\.,-\/#\?!$%\^&\*\+;:{}=\-_`~()]/g,"")
-
-candiateWords = (strippedInput) ->
-  strippedInput.split(' ')
diff --git a/src/unfluff.coffee b/src/unfluff.coffee
deleted file mode 100644
index e2cab08..0000000
--- a/src/unfluff.coffee
+++ /dev/null
@@ -1,122 +0,0 @@
-cheerio = require("cheerio")
-extractor = require("./extractor")
-cleaner = require("./cleaner")
-
-module.exports = unfluff = (html, language) ->
-  doc = cheerio.load(html)
-  lng = language || extractor.lang(doc)
-
-  pageData =
-    title: extractor.title(doc)
-    softTitle: extractor.softTitle(doc)
-    date: extractor.date(doc)
-    author: extractor.author(doc)
-    publisher: extractor.publisher(doc)
-    copyright: extractor.copyright(doc)
-    favicon: extractor.favicon(doc)
-    description: extractor.description(doc)
-    keywords: extractor.keywords(doc)
-    lang: lng
-    canonicalLink: extractor.canonicalLink(doc)
-    tags: extractor.tags(doc)
-    image: extractor.image(doc)
-
-  # Step 1: Clean the doc
-  cleaner(doc)
-
-  # Step 2: Find the doc node with the best text
-  topNode = extractor.calculateBestNode(doc, lng)
-
-  # Step 3: Extract text, videos, images, links
-  pageData.videos = extractor.videos(doc, topNode)
-  pageData.links = extractor.links(doc, topNode, lng)
-  pageData.text = extractor.text(doc, topNode, lng)
-
-  pageData
-
-# Allow access to document properties with lazy evaluation
-unfluff.lazy = (html, language) ->
-  title: () ->
-    doc = getParsedDoc.call(this, html)
-    @title_ ?= extractor.title(doc)
-
-  softTitle: () ->
-    doc = getParsedDoc.call(this, html)
-    @softTitle_ ?= extractor.softTitle(doc)
-
-  date: () ->
-    doc = getParsedDoc.call(this, html)
-    @date_ ?= extractor.date(doc)
-
-  copyright: () ->
-    doc = getParsedDoc.call(this, html)
-    @copyright_ ?= extractor.copyright(doc)
-
-  author: () ->
-    doc = getParsedDoc.call(this, html)
-    @author_ ?= extractor.author(doc)
-
-  publisher: () ->
-    doc = getParsedDoc.call(this, html)
-    @publisher_ ?= extractor.publisher(doc)
-
-  favicon: () ->
-    doc = getParsedDoc.call(this, html)
-    @favicon_ ?= extractor.favicon(doc)
-
-  description: () ->
-    doc = getParsedDoc.call(this, html)
-    @description_ ?= extractor.description(doc)
-
-  keywords: () ->
-    doc = getParsedDoc.call(this, html)
-    @keywords_ ?= extractor.keywords(doc)
-
-  lang: () ->
-    doc = getParsedDoc.call(this, html)
-    @language_ ?= language or extractor.lang(doc)
-
-  canonicalLink: () ->
-    doc = getParsedDoc.call(this, html)
-    @canonicalLink_ ?= extractor.canonicalLink(doc)
-
-  tags: () ->
-    doc = getParsedDoc.call(this, html)
-    @tags_ ?= extractor.tags(doc)
-
-  image: () ->
-    doc = getParsedDoc.call(this, html)
-    @image_ ?= extractor.image(doc)
-
-  videos: () ->
-    return @videos_ if @videos_?
-    doc = getCleanedDoc.call(this, html)
-    topNode = getTopNode.call(this, doc, this.lang())
-    @videos_ = extractor.videos(doc, topNode)
-
-  text: () ->
-    return @text_ if @text_?
-    doc = getCleanedDoc.call(this, html)
-    topNode = getTopNode.call(this, doc, this.lang())
-    @text_ = extractor.text(doc, topNode, this.lang())
-
-  links: () ->
-    return @links_ if @links_?
-    doc = getCleanedDoc.call(this, html)
-    topNode = getTopNode.call(this, doc, this.lang())
-    @links_ = extractor.links(doc, topNode, this.lang())
-
-# Load the doc in cheerio and cache it
-getParsedDoc = (html) ->
-  @doc_ ?= cheerio.load(html)
-
-# Cached version of calculateBestNode
-getTopNode = (doc, lng) ->
-  @topNode_ ?= extractor.calculateBestNode(doc, lng)
-
-# Cached version of the cleaned doc
-getCleanedDoc = (html) ->
-  return @cleanedDoc_ if @cleanedDoc_?
-  doc = getParsedDoc.call(this, html)
-  @cleanedDoc_ = cleaner(doc)
-  @cleanedDoc_
diff --git a/test-setup.coffee b/test-setup.coffee
deleted file mode 100644
index 7f642b6..0000000
--- a/test-setup.coffee
+++ /dev/null
@@ -1,30 +0,0 @@
-path = require 'path'
-util = require 'util'
-fs = require 'fs'
-deepEqual = require 'deep-equal'
-
-global[name] = func for name, func of require 'assert'
-
-# See http://wiki.ecmascript.org/doku.php?id=harmony:egal
-egal = (a, b) ->
-  if a is b
-    a isnt 0 or 1/a is 1/b
-  else
-    a isnt a and b isnt b
-
-# A recursive functional equivalence helper; uses egal for testing equivalence.
-arrayEgal = (a, b) ->
-  if egal a, b then yes
-  else if (Array.isArray a) and Array.isArray b
-    return no unless a.length is b.length
-    return no for el, idx in a when not arrayEgal el, b[idx]
-    yes
-
-global.inspect = (o) -> util.inspect o, no, 2, yes
-global.eq      = (a, b, msg) -> ok egal(a, b), msg ? "#{inspect a} === #{inspect b}"
-global.arrayEq = (a, b, msg) -> ok arrayEgal(a, b), msg ? "#{inspect a} === #{inspect b}"
-global.deepEq  = (a, b, msg) -> ok deepEqual(a, b), msg ? "#{inspect a} === #{inspect b}"
-
-global.fs = fs
-
-global[k] = v for own k, v of require './'
diff --git a/test/cleaner.coffee b/test/cleaner.coffee
deleted file mode 100644
index 772285e..0000000
--- a/test/cleaner.coffee
+++ /dev/null
@@ -1,109 +0,0 @@
-suite 'Cleaner', ->
-  cleaner = require("../src/cleaner")
-  cheerio = require("cheerio")
-
-  test 'exists', ->
-    ok cleaner
-
-  test 'removes body classes', ->
-    html = fs.readFileSync("./fixtures/test_businessWeek1.html").toString()
-    origDoc = cheerio.load(html)
-
-    eq origDoc("body").attr("class").trim(), "magazine"
-
-    newDoc = cleaner(origDoc)
-    eq newDoc("body").attr("class"), ''
-
-  test 'removes article attrs', ->
-    html = fs.readFileSync("./fixtures/test_gizmodo1.html").toString()
-    origDoc = cheerio.load(html)
-
-    eq origDoc("article").attr("class").trim(), "row post js_post_item status-published commented js_amazon_module"
-
-    newDoc = cleaner(origDoc)
-    eq newDoc("article").attr("class"), undefined
-
-  test 'removes em tag from image-less ems', ->
-    html = fs.readFileSync("./fixtures/test_gizmodo1.html").toString()
-    origDoc = cheerio.load(html)
-
-    eq origDoc("em").length, 6
-
-    newDoc = cleaner(origDoc)
-    eq newDoc("em").length, 0
-
-  test 'removes scripts', ->
-    html = fs.readFileSync("./fixtures/test_businessWeek1.html").toString()
-    origDoc = cheerio.load(html)
-
-    eq origDoc("script").length, 40
-
-    newDoc = cleaner(origDoc)
-    eq newDoc("script").length, 0
-
-  test 'removes comments', ->
-    html = fs.readFileSync("./fixtures/test_gizmodo1.html").toString()
-    origDoc = cheerio.load(html)
-    comments = origDoc('*').contents().filter () ->
-      this.type == "comment"
-    eq comments.length, 15
-
-    newDoc = cleaner(origDoc)
-    comments = newDoc('*').contents().filter () ->
-      this.type == "comment"
-    eq comments.length, 0
-
-  test 'replaces childless divs with p tags', ->
-    origDoc = cheerio.load("<html><body><div>text1</div></body></html>")
-    newDoc = cleaner(origDoc)
-    eq newDoc("div").length, 0
-    eq newDoc("p").length, 1
-    eq newDoc("p").text(), "text1"
-
-  test 'replaces u tags with plain text', ->
-    origDoc = cheerio.load("<html><body><u>text1</u></body></html>")
-    newDoc = cleaner(origDoc)
-    eq newDoc("u").length, 0
-    eq newDoc("body").html(), "text1"
-
-  test 'removes divs by re (ex: /caption/)', ->
-    html = fs.readFileSync("./fixtures/test_aolNews.html").toString()
-    origDoc = cheerio.load(html)
-    captions = origDoc('div.caption')
-    eq captions.length, 1
-
-    newDoc = cleaner(origDoc)
-    captions = newDoc('div.caption')
-    eq captions.length, 0
-
-  test 'removes naughty elms by re (ex: /caption/)', ->
-    html = fs.readFileSync("./fixtures/test_issue28.html").toString()
-    origDoc = cheerio.load(html)
-    naughty_els = origDoc('.retweet')
-    eq naughty_els.length, 2
-
-    newDoc = cleaner(origDoc)
-    naughty_els = newDoc('.retweet')
-    eq naughty_els.length, 0
-
-  test 'removes trash line breaks that wouldn\'t be rendered by the browser', ->
-    html = fs.readFileSync("./fixtures/test_sec1.html").toString()
-    origDoc = cheerio.load(html)
-    newDoc = cleaner(origDoc)
-
-    pEls = newDoc('p')
-    cleanedParaText = pEls[9].children[0].data
-    eq cleanedParaText.trim(), "“This transaction would not only strengthen our global presence, but also demonstrate our commitment to diversify and expand our U.S. commercial portfolio with meaningful new therapies,” said Russell Cox, executive vice president and chief operating officer of Jazz Pharmaceuticals plc. “We look forward to ongoing discussions with the FDA as we continue our efforts toward submission of an NDA for defibrotide in the U.S. Patients in the U.S. with severe VOD have a critical unmet medical need, and we believe that defibrotide has the potential to become an important treatment option for these patients.”"
-
-  test 'inlines code blocks as test', ->
-    html = fs.readFileSync("./fixtures/test_github1.html").toString()
-    origDoc = cheerio.load(html)
-    codeEls = origDoc('code')
-    eq codeEls.length, 26
-
-    newDoc = cleaner(origDoc)
-    codeEls = newDoc('code')
-    eq codeEls.length, 0
-
-    # This is a code block that should still be present in the doc after cleaning
-    ok newDoc('body').text().indexOf("extractor = require('unfluff');") > 0
diff --git a/test/cleaner.js b/test/cleaner.js
new file mode 100644
index 0000000..392cea9
--- /dev/null
+++ b/test/cleaner.js
@@ -0,0 +1,113 @@
+// Generated by CoffeeScript 1.12.6
+(function() {
+  suite('Cleaner', function() {
+    var cheerio, cleaner;
+    cleaner = require("../src/cleaner");
+    cheerio = require("cheerio");
+    test('exists', function() {
+      return ok(cleaner);
+    });
+    test('removes body classes', function() {
+      var html, newDoc, origDoc;
+      html = fs.readFileSync("./fixtures/test_businessWeek1.html").toString();
+      origDoc = cheerio.load(html);
+      eq(origDoc("body").attr("class").trim(), "magazine");
+      newDoc = cleaner(origDoc);
+      return eq(newDoc("body").attr("class"), '');
+    });
+    test('removes article attrs', function() {
+      var html, newDoc, origDoc;
+      html = fs.readFileSync("./fixtures/test_gizmodo1.html").toString();
+      origDoc = cheerio.load(html);
+      eq(origDoc("article").attr("class").trim(), "row post js_post_item status-published commented js_amazon_module");
+      newDoc = cleaner(origDoc);
+      return eq(newDoc("article").attr("class"), void 0);
+    });
+    test('removes em tag from image-less ems', function() {
+      var html, newDoc, origDoc;
+      html = fs.readFileSync("./fixtures/test_gizmodo1.html").toString();
+      origDoc = cheerio.load(html);
+      eq(origDoc("em").length, 6);
+      newDoc = cleaner(origDoc);
+      return eq(newDoc("em").length, 0);
+    });
+    test('removes scripts', function() {
+      var html, newDoc, origDoc;
+      html = fs.readFileSync("./fixtures/test_businessWeek1.html").toString();
+      origDoc = cheerio.load(html);
+      eq(origDoc("script").length, 40);
+      newDoc = cleaner(origDoc);
+      return eq(newDoc("script").length, 0);
+    });
+    test('removes comments', function() {
+      var comments, html, newDoc, origDoc;
+      html = fs.readFileSync("./fixtures/test_gizmodo1.html").toString();
+      origDoc = cheerio.load(html);
+      comments = origDoc('*').contents().filter(function() {
+        return this.type === "comment";
+      });
+      eq(comments.length, 15);
+      newDoc = cleaner(origDoc);
+      comments = newDoc('*').contents().filter(function() {
+        return this.type === "comment";
+      });
+      return eq(comments.length, 0);
+    });
+    test('replaces childless divs with p tags', function() {
+      var newDoc, origDoc;
+      origDoc = cheerio.load("<html><body><div>text1</div></body></html>");
+      newDoc = cleaner(origDoc);
+      eq(newDoc("div").length, 0);
+      eq(newDoc("p").length, 1);
+      return eq(newDoc("p").text(), "text1");
+    });
+    test('replaces u tags with plain text', function() {
+      var newDoc, origDoc;
+      origDoc = cheerio.load("<html><body><u>text1</u></body></html>");
+      newDoc = cleaner(origDoc);
+      eq(newDoc("u").length, 0);
+      return eq(newDoc("body").html(), "text1");
+    });
+    test('removes divs by re (ex: /caption/)', function() {
+      var captions, html, newDoc, origDoc;
+      html = fs.readFileSync("./fixtures/test_aolNews.html").toString();
+      origDoc = cheerio.load(html);
+      captions = origDoc('div.caption');
+      eq(captions.length, 1);
+      newDoc = cleaner(origDoc);
+      captions = newDoc('div.caption');
+      return eq(captions.length, 0);
+    });
+    test('removes naughty elms by re (ex: /caption/)', function() {
+      var html, naughty_els, newDoc, origDoc;
+      html = fs.readFileSync("./fixtures/test_issue28.html").toString();
+      origDoc = cheerio.load(html);
+      naughty_els = origDoc('.retweet');
+      eq(naughty_els.length, 2);
+      newDoc = cleaner(origDoc);
+      naughty_els = newDoc('.retweet');
+      return eq(naughty_els.length, 0);
+    });
+    test('removes trash line breaks that wouldn\'t be rendered by the browser', function() {
+      var cleanedParaText, html, newDoc, origDoc, pEls;
+      html = fs.readFileSync("./fixtures/test_sec1.html").toString();
+      origDoc = cheerio.load(html);
+      newDoc = cleaner(origDoc);
+      pEls = newDoc('p');
+      cleanedParaText = pEls[9].children[0].data;
+      return eq(cleanedParaText.trim(), "“This transaction would not only strengthen our global presence, but also demonstrate our commitment to diversify and expand our U.S. commercial portfolio with meaningful new therapies,” said Russell Cox, executive vice president and chief operating officer of Jazz Pharmaceuticals plc. “We look forward to ongoing discussions with the FDA as we continue our efforts toward submission of an NDA for defibrotide in the U.S. Patients in the U.S. with severe VOD have a critical unmet medical need, and we believe that defibrotide has the potential to become an important treatment option for these patients.”");
+    });
+    return test('inlines code blocks as test', function() {
+      var codeEls, html, newDoc, origDoc;
+      html = fs.readFileSync("./fixtures/test_github1.html").toString();
+      origDoc = cheerio.load(html);
+      codeEls = origDoc('code');
+      eq(codeEls.length, 26);
+      newDoc = cleaner(origDoc);
+      codeEls = newDoc('code');
+      eq(codeEls.length, 0);
+      return ok(newDoc('body').text().indexOf("extractor = require('unfluff');") > 0);
+    });
+  });
+
+}).call(this);
diff --git a/test/extractor.coffee b/test/extractor.coffee
deleted file mode 100644
index 3b2dd8e..0000000
--- a/test/extractor.coffee
+++ /dev/null
@@ -1,131 +0,0 @@
-suite 'Extractor', ->
-  extractor = require("../src/extractor")
-  cheerio = require("cheerio")
-
-  test 'exists', ->
-    ok extractor
-
-  test 'returns a blank title', ->
-    doc = cheerio.load("<html><head><title></title></head></html>")
-    title = extractor.title(doc)
-    eq title, ""
-
-  test 'returns a simple title', ->
-    doc = cheerio.load("<html><head><title>Hello!</title></head></html>")
-    title = extractor.title(doc)
-    eq title, "Hello!"
-
-  test 'returns a simple title chunk', ->
-    doc = cheerio.load("<html><head><title>This is my page - mysite</title></head></html>")
-    title = extractor.title(doc)
-    eq title, "This is my page"
-
-  test 'returns a soft title chunk without truncation', ->
-      doc = cheerio.load("<html><head><title>University Budgets: Where Your Fees Go | Top Universities</title></head></html>")
-      title = extractor.softTitle(doc)
-      eq title, "University Budgets: Where Your Fees Go"
-
-  test 'prefers the meta tag title', ->
-    doc = cheerio.load("<html><head><title>This is my page - mysite</title><meta property=\"og:title\" content=\"Open graph title\"></head></html>")
-    title = extractor.title(doc)
-    eq title, "Open graph title"
-
-  test 'falls back to title if empty meta tag', ->
-    doc = cheerio.load("<html><head><title>This is my page - mysite</title><meta property=\"og:title\" content=\"\"></head></html>")
-    title = extractor.title(doc)
-    eq title, "This is my page"
-
-  test 'returns another simple title chunk', ->
-    doc = cheerio.load("<html><head><title>coolsite.com: This is my page</title></head></html>")
-    title = extractor.title(doc)
-    eq title, "This is my page"
-
-  test 'returns a title chunk without &#65533;', ->
-    doc = cheerio.load("<html><head><title>coolsite.com: &#65533; This&#65533; is my page</title></head></html>")
-    title = extractor.title(doc)
-    eq title, "This is my page"
-
-  test 'returns the first title;', ->
-    doc = cheerio.load("<html><head><title>This is my page</title></head><svg xmlns=\"http://www.w3.org/2000/svg\"><title>svg title</title></svg></html>")
-    title = extractor.title(doc)
-    eq title, "This is my page"
-
-  test 'handles missing favicons', ->
-    doc = cheerio.load("<html><head><title></title></head></html>")
-    favicon = extractor.favicon(doc)
-    eq undefined, favicon
-
-  test 'returns the article published meta date', ->
-    doc = cheerio.load("<html><head><meta property=\"article:published_time\" content=\"2014-10-15T00:01:03+00:00\" /></head></html>")
-    date = extractor.date(doc)
-    eq date, "2014-10-15T00:01:03+00:00"
-
-  test 'returns the article dublin core meta date', ->
-      doc = cheerio.load("<html><head><meta name=\"DC.date.issued\" content=\"2014-10-15T00:01:03+00:00\" /></head></html>")
-      date = extractor.date(doc)
-      eq date, "2014-10-15T00:01:03+00:00"
-
-  test 'returns the date in the <time> element', ->
-    doc = cheerio.load("<html><head></head><body><time>24 May, 2010</time></body></html>")
-    date = extractor.date(doc)
-    eq date, "24 May, 2010"
-
-  test 'returns the date in the <time> element datetime attribute', ->
-    doc = cheerio.load("<html><head></head><body><time datetime=\"2010-05-24T13:47:52+0000\">24 May, 2010</time></body></html>")
-    date = extractor.date(doc)
-    eq date, "2010-05-24T13:47:52+0000"
-
-  test 'returns nothing if date eq "null"', ->
-    doc = cheerio.load("<html><head><meta property=\"article:published_time\" content=\"null\" /></head></html>")
-    date = extractor.date(doc)
-    eq date, null
-
-  test 'returns the copyright line element', ->
-    doc = cheerio.load("<html><head></head><body><div>Some stuff</div><ul><li class='copyright'><!-- // some garbage -->© 2016 The World Bank Group, All Rights Reserved.</li></ul></body></html>")
-    copyright = extractor.copyright(doc)
-    eq copyright, "2016 The World Bank Group"
-
-  test 'returns the copyright found in the text', ->
-    doc = cheerio.load("<html><head></head><body><div>Some stuff</div><ul>© 2016 The World Bank Group, All Rights Reserved\nSome garbage following</li></ul></body></html>")
-    copyright = extractor.copyright(doc)
-    eq copyright, "2016 The World Bank Group"
-
-  test 'returns nothing if no copyright in the text', ->
-    doc = cheerio.load("<html><head></head><body></body></html>")
-    copyright = extractor.copyright(doc)
-    eq copyright, null
-
-  test 'returns the article published meta author', ->
-    doc = cheerio.load("<html><head><meta property=\"article:author\" content=\"Joe Bloggs\" /></head></html>")
-    author = extractor.author(doc)
-    eq JSON.stringify(author), JSON.stringify(["Joe Bloggs"])
-
-  test 'returns the meta author', ->
-    doc = cheerio.load("<html><head><meta property=\"article:author\" content=\"Sarah Smith\" /><meta name=\"author\" content=\"Joe Bloggs\" /></head></html>")
-    author = extractor.author(doc)
-    eq JSON.stringify(author), JSON.stringify(["Sarah Smith", "Joe Bloggs"])
-
-  test 'returns the named author in the text as fallback', ->
-      doc = cheerio.load("<html><head></head><body><span class=\"author\"><a href=\"/author/gary-trust-6318\" class=\"article__author-link\">Gary Trust</a></span></body></html>")
-      author = extractor.author(doc)
-      eq JSON.stringify(author), JSON.stringify(["Gary Trust"])
-
-  test 'returns the meta author but ignore "null" value', ->
-    doc = cheerio.load("<html><head><meta property=\"article:author\" content=\"null\" /><meta name=\"author\" content=\"Joe Bloggs\" /></head></html>")
-    author = extractor.author(doc)
-    eq JSON.stringify(author), JSON.stringify(["Joe Bloggs"])
-
-  test 'returns the meta publisher', ->
-    doc = cheerio.load("<html><head><meta property=\"og:site_name\" content=\"Polygon\" /><meta name=\"author\" content=\"Griffin McElroy\" /></head></html>")
-    publisher = extractor.publisher(doc)
-    eq publisher, "Polygon"
-
-  test 'returns nothing if publisher eq "null"', ->
-    doc = cheerio.load("<html><head><meta property=\"og:site_name\" content=\"null\" /></head></html>")
-    publisher = extractor.publisher(doc)
-    eq publisher, null
-
-  test 'returns nothing if image eq "null"', ->
-    doc = cheerio.load("<html><head><meta property=\"og:image\" content=\"null\" /></head></html>")
-    image = extractor.image(doc)
-    eq image, null
diff --git a/test/extractor.js b/test/extractor.js
new file mode 100644
index 0000000..1821c90
--- /dev/null
+++ b/test/extractor.js
@@ -0,0 +1,162 @@
+// Generated by CoffeeScript 1.12.6
+(function() {
+  suite('Extractor', function() {
+    var cheerio, extractor;
+    extractor = require("../src/extractor");
+    cheerio = require("cheerio");
+    test('exists', function() {
+      return ok(extractor);
+    });
+    test('returns a blank title', function() {
+      var doc, title;
+      doc = cheerio.load("<html><head><title></title></head></html>");
+      title = extractor.title(doc);
+      return eq(title, "");
+    });
+    test('returns a simple title', function() {
+      var doc, title;
+      doc = cheerio.load("<html><head><title>Hello!</title></head></html>");
+      title = extractor.title(doc);
+      return eq(title, "Hello!");
+    });
+    test('returns a simple title chunk', function() {
+      var doc, title;
+      doc = cheerio.load("<html><head><title>This is my page - mysite</title></head></html>");
+      title = extractor.title(doc);
+      return eq(title, "This is my page");
+    });
+    test('returns a soft title chunk without truncation', function() {
+      var doc, title;
+      doc = cheerio.load("<html><head><title>University Budgets: Where Your Fees Go | Top Universities</title></head></html>");
+      title = extractor.softTitle(doc);
+      return eq(title, "University Budgets: Where Your Fees Go");
+    });
+    test('prefers the meta tag title', function() {
+      var doc, title;
+      doc = cheerio.load("<html><head><title>This is my page - mysite</title><meta property=\"og:title\" content=\"Open graph title\"></head></html>");
+      title = extractor.title(doc);
+      return eq(title, "Open graph title");
+    });
+    test('falls back to title if empty meta tag', function() {
+      var doc, title;
+      doc = cheerio.load("<html><head><title>This is my page - mysite</title><meta property=\"og:title\" content=\"\"></head></html>");
+      title = extractor.title(doc);
+      return eq(title, "This is my page");
+    });
+    test('returns another simple title chunk', function() {
+      var doc, title;
+      doc = cheerio.load("<html><head><title>coolsite.com: This is my page</title></head></html>");
+      title = extractor.title(doc);
+      return eq(title, "This is my page");
+    });
+    test('returns a title chunk without &#65533;', function() {
+      var doc, title;
+      doc = cheerio.load("<html><head><title>coolsite.com: &#65533; This&#65533; is my page</title></head></html>");
+      title = extractor.title(doc);
+      return eq(title, "This is my page");
+    });
+    test('returns the first title;', function() {
+      var doc, title;
+      doc = cheerio.load("<html><head><title>This is my page</title></head><svg xmlns=\"http://www.w3.org/2000/svg\"><title>svg title</title></svg></html>");
+      title = extractor.title(doc);
+      return eq(title, "This is my page");
+    });
+    test('handles missing favicons', function() {
+      var doc, favicon;
+      doc = cheerio.load("<html><head><title></title></head></html>");
+      favicon = extractor.favicon(doc);
+      return eq(void 0, favicon);
+    });
+    test('returns the article published meta date', function() {
+      var date, doc;
+      doc = cheerio.load("<html><head><meta property=\"article:published_time\" content=\"2014-10-15T00:01:03+00:00\" /></head></html>");
+      date = extractor.date(doc);
+      return eq(date, "2014-10-15T00:01:03+00:00");
+    });
+    test('returns the article dublin core meta date', function() {
+      var date, doc;
+      doc = cheerio.load("<html><head><meta name=\"DC.date.issued\" content=\"2014-10-15T00:01:03+00:00\" /></head></html>");
+      date = extractor.date(doc);
+      return eq(date, "2014-10-15T00:01:03+00:00");
+    });
+    test('returns the date in the <time> element', function() {
+      var date, doc;
+      doc = cheerio.load("<html><head></head><body><time>24 May, 2010</time></body></html>");
+      date = extractor.date(doc);
+      return eq(date, "24 May, 2010");
+    });
+    test('returns the date in the <time> element datetime attribute', function() {
+      var date, doc;
+      doc = cheerio.load("<html><head></head><body><time datetime=\"2010-05-24T13:47:52+0000\">24 May, 2010</time></body></html>");
+      date = extractor.date(doc);
+      return eq(date, "2010-05-24T13:47:52+0000");
+    });
+    test('returns nothing if date eq "null"', function() {
+      var date, doc;
+      doc = cheerio.load("<html><head><meta property=\"article:published_time\" content=\"null\" /></head></html>");
+      date = extractor.date(doc);
+      return eq(date, null);
+    });
+    test('returns the copyright line element', function() {
+      var copyright, doc;
+      doc = cheerio.load("<html><head></head><body><div>Some stuff</div><ul><li class='copyright'><!-- // some garbage -->© 2016 The World Bank Group, All Rights Reserved.</li></ul></body></html>");
+      copyright = extractor.copyright(doc);
+      return eq(copyright, "2016 The World Bank Group");
+    });
+    test('returns the copyright found in the text', function() {
+      var copyright, doc;
+      doc = cheerio.load("<html><head></head><body><div>Some stuff</div><ul>© 2016 The World Bank Group, All Rights Reserved\nSome garbage following</li></ul></body></html>");
+      copyright = extractor.copyright(doc);
+      return eq(copyright, "2016 The World Bank Group");
+    });
+    test('returns nothing if no copyright in the text', function() {
+      var copyright, doc;
+      doc = cheerio.load("<html><head></head><body></body></html>");
+      copyright = extractor.copyright(doc);
+      return eq(copyright, null);
+    });
+    test('returns the article published meta author', function() {
+      var author, doc;
+      doc = cheerio.load("<html><head><meta property=\"article:author\" content=\"Joe Bloggs\" /></head></html>");
+      author = extractor.author(doc);
+      return eq(JSON.stringify(author), JSON.stringify(["Joe Bloggs"]));
+    });
+    test('returns the meta author', function() {
+      var author, doc;
+      doc = cheerio.load("<html><head><meta property=\"article:author\" content=\"Sarah Smith\" /><meta name=\"author\" content=\"Joe Bloggs\" /></head></html>");
+      author = extractor.author(doc);
+      return eq(JSON.stringify(author), JSON.stringify(["Sarah Smith", "Joe Bloggs"]));
+    });
+    test('returns the named author in the text as fallback', function() {
+      var author, doc;
+      doc = cheerio.load("<html><head></head><body><span class=\"author\"><a href=\"/author/gary-trust-6318\" class=\"article__author-link\">Gary Trust</a></span></body></html>");
+      author = extractor.author(doc);
+      return eq(JSON.stringify(author), JSON.stringify(["Gary Trust"]));
+    });
+    test('returns the meta author but ignore "null" value', function() {
+      var author, doc;
+      doc = cheerio.load("<html><head><meta property=\"article:author\" content=\"null\" /><meta name=\"author\" content=\"Joe Bloggs\" /></head></html>");
+      author = extractor.author(doc);
+      return eq(JSON.stringify(author), JSON.stringify(["Joe Bloggs"]));
+    });
+    test('returns the meta publisher', function() {
+      var doc, publisher;
+      doc = cheerio.load("<html><head><meta property=\"og:site_name\" content=\"Polygon\" /><meta name=\"author\" content=\"Griffin McElroy\" /></head></html>");
+      publisher = extractor.publisher(doc);
+      return eq(publisher, "Polygon");
+    });
+    test('returns nothing if publisher eq "null"', function() {
+      var doc, publisher;
+      doc = cheerio.load("<html><head><meta property=\"og:site_name\" content=\"null\" /></head></html>");
+      publisher = extractor.publisher(doc);
+      return eq(publisher, null);
+    });
+    return test('returns nothing if image eq "null"', function() {
+      var doc, image;
+      doc = cheerio.load("<html><head><meta property=\"og:image\" content=\"null\" /></head></html>");
+      image = extractor.image(doc);
+      return eq(image, null);
+    });
+  });
+
+}).call(this);
diff --git a/test/formatter.coffee b/test/formatter.coffee
deleted file mode 100644
index 0dc8791..0000000
--- a/test/formatter.coffee
+++ /dev/null
@@ -1,24 +0,0 @@
-suite 'Formatter', ->
-  formatter = require("../src/formatter")
-  cheerio = require("cheerio")
-
-  test 'exists', ->
-    ok formatter
-
-  test 'replaces links with plain text', ->
-    html = fs.readFileSync("./fixtures/test_businessWeek1.html").toString()
-    origDoc = cheerio.load(html)
-
-    eq origDoc("a").length, 232
-
-    formatter(origDoc, origDoc('body'), 'en')
-    eq origDoc("a").length, 0
-
-  test 'doesn\'t drop text nodes accidentally', ->
-    html = fs.readFileSync("./fixtures/test_wikipedia1.html").toString()
-    doc = cheerio.load(html)
-
-    formatter(doc, doc('body'), 'en')
-    html = doc.html()
-    # This text was getting dropped by the formatter
-    ok /is a thirteen episode anime series directed by Akitaro Daichi and written by Hideyuki Kurata/.test(html)
diff --git a/test/formatter.js b/test/formatter.js
new file mode 100644
index 0000000..e24117c
--- /dev/null
+++ b/test/formatter.js
@@ -0,0 +1,28 @@
+// Generated by CoffeeScript 1.12.6
+(function() {
+  suite('Formatter', function() {
+    var cheerio, formatter;
+    formatter = require("../src/formatter");
+    cheerio = require("cheerio");
+    test('exists', function() {
+      return ok(formatter);
+    });
+    test('replaces links with plain text', function() {
+      var html, origDoc;
+      html = fs.readFileSync("./fixtures/test_businessWeek1.html").toString();
+      origDoc = cheerio.load(html);
+      eq(origDoc("a").length, 232);
+      formatter(origDoc, origDoc('body'), 'en');
+      return eq(origDoc("a").length, 0);
+    });
+    return test('doesn\'t drop text nodes accidentally', function() {
+      var doc, html;
+      html = fs.readFileSync("./fixtures/test_wikipedia1.html").toString();
+      doc = cheerio.load(html);
+      formatter(doc, doc('body'), 'en');
+      html = doc.html();
+      return ok(/is a thirteen episode anime series directed by Akitaro Daichi and written by Hideyuki Kurata/.test(html));
+    });
+  });
+
+}).call(this);
diff --git a/test/stopwords.coffee b/test/stopwords.coffee
deleted file mode 100644
index e17c977..0000000
--- a/test/stopwords.coffee
+++ /dev/null
@@ -1,36 +0,0 @@
-suite 'Stop words', ->
-  stopwords = require '../src/stopwords'
-
-  test 'exists', ->
-    s = stopwords
-    ok s
-
-  test 'counts stopwords', ->
-    data = stopwords('this is silly', 'en')
-    eq data.wordCount, 3
-    eq data.stopwordCount, 2
-    arrayEq data.stopWords, [ 'this', 'is' ]
-
-  test 'strips punctuation', ->
-    data = stopwords('this! is?? silly....', 'en')
-    eq data.wordCount, 3
-    eq data.stopwordCount, 2
-    arrayEq data.stopWords, [ 'this', 'is' ]
-
-  test 'defaults to english', ->
-    data = stopwords('this is fun')
-    eq data.wordCount, 3
-    eq data.stopwordCount, 2
-    arrayEq data.stopWords, [ 'this', 'is' ]
-
-  test 'handles spanish', ->
-    data = stopwords('este es rico', 'es')
-    eq data.wordCount, 3
-    eq data.stopwordCount, 2
-    arrayEq data.stopWords, [ 'este', 'es' ]
-
-  test 'Safely handles a bad language by falling back to english', ->
-    data = stopwords('this is fun', 'fake-language-to-test-fallbacks')
-    eq data.wordCount, 3
-    eq data.stopwordCount, 2
-    arrayEq data.stopWords, [ 'this', 'is' ]
diff --git a/test/stopwords.js b/test/stopwords.js
new file mode 100644
index 0000000..0f8a682
--- /dev/null
+++ b/test/stopwords.js
@@ -0,0 +1,48 @@
+// Generated by CoffeeScript 1.12.6
+(function() {
+  suite('Stop words', function() {
+    var stopwords;
+    stopwords = require('../src/stopwords');
+    test('exists', function() {
+      var s;
+      s = stopwords;
+      return ok(s);
+    });
+    test('counts stopwords', function() {
+      var data;
+      data = stopwords('this is silly', 'en');
+      eq(data.wordCount, 3);
+      eq(data.stopwordCount, 2);
+      return arrayEq(data.stopWords, ['this', 'is']);
+    });
+    test('strips punctuation', function() {
+      var data;
+      data = stopwords('this! is?? silly....', 'en');
+      eq(data.wordCount, 3);
+      eq(data.stopwordCount, 2);
+      return arrayEq(data.stopWords, ['this', 'is']);
+    });
+    test('defaults to english', function() {
+      var data;
+      data = stopwords('this is fun');
+      eq(data.wordCount, 3);
+      eq(data.stopwordCount, 2);
+      return arrayEq(data.stopWords, ['this', 'is']);
+    });
+    test('handles spanish', function() {
+      var data;
+      data = stopwords('este es rico', 'es');
+      eq(data.wordCount, 3);
+      eq(data.stopwordCount, 2);
+      return arrayEq(data.stopWords, ['este', 'es']);
+    });
+    return test('Safely handles a bad language by falling back to english', function() {
+      var data;
+      data = stopwords('this is fun', 'fake-language-to-test-fallbacks');
+      eq(data.wordCount, 3);
+      eq(data.stopwordCount, 2);
+      return arrayEq(data.stopWords, ['this', 'is']);
+    });
+  });
+
+}).call(this);
diff --git a/test/unfluff.coffee b/test/unfluff.coffee
deleted file mode 100644
index 7e66014..0000000
--- a/test/unfluff.coffee
+++ /dev/null
@@ -1,207 +0,0 @@
-suite 'Unfluff', ->
-  _ = require('lodash')
-  extractor = require("../src/unfluff")
-
-  cleanTestingText = (text, origTextLength) ->
-    text.replace(/\n\n/g, " ").replace(/\ \ /g, " ")[0..origTextLength-1]
-
-  cleanOrigText = (text) ->
-    text.replace(/\n\n/g, " ")
-
-  checkFixture = (site, fields) ->
-    html = fs.readFileSync("./fixtures/test_#{site}.html").toString()
-    orig = JSON.parse(fs.readFileSync("./fixtures/test_#{site}.json"))
-    data = extractor(html)
-    dataLazy = extractor.lazy(html)
-
-    _.each fields, (field) ->
-      if field == 'title'
-        eq orig.expected.title, data.title, "#{site}: title didn't match expected value"
-        eq data.title, dataLazy.title()
-
-      else if field == 'cleaned_text'
-        origText = cleanOrigText(orig.expected.cleaned_text)
-        newText = cleanTestingText(data.text, origText.length)
-        partialExtractText = cleanTestingText(dataLazy.text(), origText.length)
-        ok newText, "#{site}: no text was found"
-        ok data.text.length >= orig.expected.cleaned_text.length , "#{site}: cleaned text was too short"
-        eq origText, newText, "#{site}: cleaned text didn't match expected value"
-        eq origText, partialExtractText, "#{site}: cleaned text from partial extract didn't match expected value"
-
-      else if field == 'link'
-        eq orig.expected.final_url, data.canonicalLink, "#{site}: canonical link didn't match expected value"
-        eq data.canonicalLink, dataLazy.canonicalLink(), "#{site}: canonical link from partial extraction didn't match expected value"
-
-      else if field == 'image'
-        eq orig.expected.image, data.image, "#{site}: image didn't match expected value"
-        eq data.image, dataLazy.image(), "#{site}: image from partial extraction didn't match expected value"
-
-      else if field == 'description'
-        eq orig.expected.meta_description, data.description, "#{site}: meta description didn't match expected value"
-        eq data.description, dataLazy.description(), "#{site}: description from partial extraction didn't match expected value"
-
-      else if field == 'lang'
-        eq orig.expected.meta_lang, data.lang, "#{site}: detected langauge didn't match expected value"
-        eq data.lang, dataLazy.lang(), "#{site}: langauge from partial extraction didn't match expected value"
-
-      else if field == 'keywords'
-        eq orig.expected.meta_keywords, data.keywords, "#{site}: meta keywords didn't match expected value"
-        eq data.keywords, dataLazy.keywords(), "#{site}: meta keywords from partial extraction didn't match expected value"
-
-      else if field == 'favicon'
-        eq orig.expected.meta_favicon, data.favicon, "#{site}: favicon url didn't match expected value"
-        eq data.favicon, dataLazy.favicon(), "#{site}: favicon url from partial extraction didn't match expected value"
-
-      else if field == 'tags'
-        sortedTags = data.tags.sort()
-        arrayEq orig.expected.tags.sort(), sortedTags, "#{site}: meta tags didn't match expected value"
-        arrayEq sortedTags, dataLazy.tags().sort(), "#{site}: meta tags from partial extraction didn't match expected value"
-
-      else if field == 'links'
-        sortedLinks = data.links.sort()
-        sortedLazyLinks = dataLazy.links().sort()
-        if !orig.expected.links
-          orig.expected.links = sortedLinks
-          fs.writeFileSync("./fixtures/test_#{site}.json", JSON.stringify(orig, null, 4))
-        deepEq orig.expected.links.sort(), sortedLinks, "#{site}: links didn't match expected value"
-        deepEq orig.expected.links.sort(), sortedLazyLinks, "#{site}: links from partial extraction didn't match expected value"
-
-      else if field == 'videos'
-        sortedVideos = data.videos.sort()
-        deepEq orig.expected.movies.sort(), sortedVideos, "#{site}: videos didn't match expected value"
-        deepEq sortedVideos, dataLazy.videos().sort(), "#{site}: videos from partial extraction didn't match expected value"
-
-      else
-        # Oops!
-        eq true, false, "#{site}: Invalid test!"
-
-  test 'exists', ->
-    ok extractor
-
-  test 'lazy version exists', ->
-    ok extractor.lazy
-
-  test 'reads favicon', ->
-    checkFixture('aolNews' , ['favicon'])
-
-  test 'reads description', ->
-    checkFixture('allnewlyrics1' , ['description'])
-
-  test 'reads open graph description', ->
-    checkFixture('twitter' , ['description'])
-
-  test 'reads keywords', ->
-    checkFixture('allnewlyrics1' , ['keywords'])
-
-  test 'reads lang', ->
-    checkFixture('allnewlyrics1' , ['lang'])
-
-  test 'reads canonical link', ->
-    checkFixture('allnewlyrics1' , ['link'])
-
-  test 'reads tags', ->
-    checkFixture('tags_kexp' , ['tags'])
-    checkFixture('tags_deadline' , ['tags'])
-    checkFixture('tags_wnyc' , ['tags'])
-    checkFixture('tags_cnet' , ['tags'])
-    checkFixture('tags_abcau' , ['tags'])
-
-  test 'reads videos', ->
-    checkFixture('embed' , ['videos'])
-    checkFixture('iframe' , ['videos'])
-    checkFixture('object' , ['videos'])
-
-  test 'links', ->
-    checkFixture('theverge1' , ['links'])
-    checkFixture('techcrunch1' , ['links'])
-    checkFixture('polygon' , ['links'])
-
-  test 'images', ->
-    checkFixture('aolNews' , ['image'])
-    checkFixture('polygon' , ['image'])
-    checkFixture('theverge1' , ['image'])
-
-  test 'gets cleaned text - Polygon', ->
-    checkFixture('polygon' , ['cleaned_text', 'title', 'link', 'description', 'lang', 'favicon'])
-
-  test 'gets cleaned text - The Verge', ->
-    checkFixture('theverge1' , ['cleaned_text', 'title', 'link', 'description', 'lang', 'favicon'])
-
-  test 'gets cleaned tags - The Verge', ->
-    checkFixture('theverge2' , ['tags'])
-
-  test 'gets cleaned text - McSweeneys', ->
-    checkFixture('mcsweeney', ['cleaned_text', 'link', 'lang', 'favicon'])
-
-  test 'gets cleaned text - CNN', ->
-    checkFixture('cnn1' , ['cleaned_text'])
-
-  test 'gets cleaned text - MSN', ->
-    checkFixture('msn1' , ['cleaned_text'])
-
-  test 'gets cleaned text - Time', ->
-    checkFixture('time2' , ['cleaned_text'])
-
-  test 'gets cleaned text - BI', ->
-    checkFixture('businessinsider1' , ['cleaned_text'])
-    checkFixture('businessinsider2' , ['cleaned_text'])
-    checkFixture('businessinsider3' , ['cleaned_text'])
-
-  test 'gets cleaned text - CNBC', ->
-    checkFixture('cnbc1' , ['cleaned_text'])
-
-  test 'gets cleaned text - CBS Local', ->
-    checkFixture('cbslocal' , ['cleaned_text'])
-
-  test 'gets cleaned text - Business Week', ->
-    checkFixture('businessWeek1' , ['cleaned_text'])
-    checkFixture('businessWeek2' , ['cleaned_text'])
-    checkFixture('businessWeek3' , ['cleaned_text'])
-
-  test 'gets cleaned text - El Pais', ->
-    checkFixture('elpais' , ['cleaned_text'])
-
-  test 'gets cleaned text - Techcrunk', ->
-    checkFixture('techcrunch1' , ['cleaned_text'])
-
-  test 'gets cleaned text - Fox "News"', ->
-    checkFixture('foxNews' , ['cleaned_text'])
-
-  test 'gets cleaned text - Huff Po', ->
-    checkFixture('huffingtonPost2' , ['cleaned_text'])
-    checkFixture('testHuffingtonPost' , ['cleaned_text', 'description', 'title'])
-
-  test 'gets cleaned text - ESPN', ->
-    checkFixture('espn' , ['cleaned_text'])
-
-  test 'gets cleaned text - Time', ->
-    checkFixture('time' , ['cleaned_text'])
-
-  test 'gets cleaned text - CNet', ->
-    checkFixture('cnet' , ['cleaned_text'])
-
-  test 'gets cleaned text - Yahoo', ->
-    checkFixture('yahoo' , ['cleaned_text'])
-
-  test 'gets cleaned text - Politico', ->
-    checkFixture('politico' , ['cleaned_text'])
-
-  test 'gets cleaned text - Goose Regressions', ->
-    checkFixture('issue4' , ['cleaned_text'])
-    checkFixture('issue24' , ['cleaned_text'])
-    checkFixture('issue25' , ['cleaned_text'])
-    checkFixture('issue28' , ['cleaned_text'])
-    checkFixture('issue32' , ['cleaned_text'])
-
-  test 'gets cleaned text - Gizmodo', ->
-    checkFixture('gizmodo1' , ['cleaned_text', 'description', 'keywords'])
-
-  test 'gets cleaned text - Mashable', ->
-    checkFixture('mashable_issue_74' , ['cleaned_text'])
-
-  test 'gets cleaned text - USA Today', ->
-    checkFixture('usatoday_issue_74' , ['cleaned_text'])
-    checkFixture('usatoday1' , ['cleaned_text'])
-
-  test 'gets cleaned text - dcurt.is', ->
-    checkFixture('dcurtis' , ['cleaned_text'])
diff --git a/test/unfluff.js b/test/unfluff.js
new file mode 100644
index 0000000..1bed981
--- /dev/null
+++ b/test/unfluff.js
@@ -0,0 +1,205 @@
+// Generated by CoffeeScript 1.12.6
+(function() {
+  suite('Unfluff', function() {
+    var _, checkFixture, cleanOrigText, cleanTestingText, extractor;
+    _ = require('lodash');
+    extractor = require("../src/unfluff");
+    cleanTestingText = function(text, origTextLength) {
+      return text.replace(/\n\n/g, " ").replace(/\ \ /g, " ").slice(0, +(origTextLength - 1) + 1 || 9e9);
+    };
+    cleanOrigText = function(text) {
+      return text.replace(/\n\n/g, " ");
+    };
+    checkFixture = function(site, fields) {
+      var data, dataLazy, html, orig;
+      html = fs.readFileSync("./fixtures/test_" + site + ".html").toString();
+      orig = JSON.parse(fs.readFileSync("./fixtures/test_" + site + ".json"));
+      data = extractor(html);
+      dataLazy = extractor.lazy(html);
+      return _.each(fields, function(field) {
+        var newText, origText, partialExtractText, sortedLazyLinks, sortedLinks, sortedTags, sortedVideos;
+        if (field === 'title') {
+          eq(orig.expected.title, data.title, site + ": title didn't match expected value");
+          return eq(data.title, dataLazy.title());
+        } else if (field === 'cleaned_text') {
+          origText = cleanOrigText(orig.expected.cleaned_text);
+          newText = cleanTestingText(data.text, origText.length);
+          partialExtractText = cleanTestingText(dataLazy.text(), origText.length);
+          ok(newText, site + ": no text was found");
+          ok(data.text.length >= orig.expected.cleaned_text.length, site + ": cleaned text was too short");
+          eq(origText, newText, site + ": cleaned text didn't match expected value");
+          return eq(origText, partialExtractText, site + ": cleaned text from partial extract didn't match expected value");
+        } else if (field === 'link') {
+          eq(orig.expected.final_url, data.canonicalLink, site + ": canonical link didn't match expected value");
+          return eq(data.canonicalLink, dataLazy.canonicalLink(), site + ": canonical link from partial extraction didn't match expected value");
+        } else if (field === 'image') {
+          eq(orig.expected.image, data.image, site + ": image didn't match expected value");
+          return eq(data.image, dataLazy.image(), site + ": image from partial extraction didn't match expected value");
+        } else if (field === 'description') {
+          eq(orig.expected.meta_description, data.description, site + ": meta description didn't match expected value");
+          return eq(data.description, dataLazy.description(), site + ": description from partial extraction didn't match expected value");
+        } else if (field === 'lang') {
+          eq(orig.expected.meta_lang, data.lang, site + ": detected langauge didn't match expected value");
+          return eq(data.lang, dataLazy.lang(), site + ": langauge from partial extraction didn't match expected value");
+        } else if (field === 'keywords') {
+          eq(orig.expected.meta_keywords, data.keywords, site + ": meta keywords didn't match expected value");
+          return eq(data.keywords, dataLazy.keywords(), site + ": meta keywords from partial extraction didn't match expected value");
+        } else if (field === 'favicon') {
+          eq(orig.expected.meta_favicon, data.favicon, site + ": favicon url didn't match expected value");
+          return eq(data.favicon, dataLazy.favicon(), site + ": favicon url from partial extraction didn't match expected value");
+        } else if (field === 'tags') {
+          sortedTags = data.tags.sort();
+          arrayEq(orig.expected.tags.sort(), sortedTags, site + ": meta tags didn't match expected value");
+          return arrayEq(sortedTags, dataLazy.tags().sort(), site + ": meta tags from partial extraction didn't match expected value");
+        } else if (field === 'links') {
+          sortedLinks = data.links.sort();
+          sortedLazyLinks = dataLazy.links().sort();
+          if (!orig.expected.links) {
+            orig.expected.links = sortedLinks;
+            fs.writeFileSync("./fixtures/test_" + site + ".json", JSON.stringify(orig, null, 4));
+          }
+          deepEq(orig.expected.links.sort(), sortedLinks, site + ": links didn't match expected value");
+          return deepEq(orig.expected.links.sort(), sortedLazyLinks, site + ": links from partial extraction didn't match expected value");
+        } else if (field === 'videos') {
+          sortedVideos = data.videos.sort();
+          deepEq(orig.expected.movies.sort(), sortedVideos, site + ": videos didn't match expected value");
+          return deepEq(sortedVideos, dataLazy.videos().sort(), site + ": videos from partial extraction didn't match expected value");
+        } else {
+          return eq(true, false, site + ": Invalid test!");
+        }
+      });
+    };
+    test('exists', function() {
+      return ok(extractor);
+    });
+    test('lazy version exists', function() {
+      return ok(extractor.lazy);
+    });
+    test('reads favicon', function() {
+      return checkFixture('aolNews', ['favicon']);
+    });
+    test('reads description', function() {
+      return checkFixture('allnewlyrics1', ['description']);
+    });
+    test('reads open graph description', function() {
+      return checkFixture('twitter', ['description']);
+    });
+    test('reads keywords', function() {
+      return checkFixture('allnewlyrics1', ['keywords']);
+    });
+    test('reads lang', function() {
+      return checkFixture('allnewlyrics1', ['lang']);
+    });
+    test('reads canonical link', function() {
+      return checkFixture('allnewlyrics1', ['link']);
+    });
+    test('reads tags', function() {
+      checkFixture('tags_kexp', ['tags']);
+      checkFixture('tags_deadline', ['tags']);
+      checkFixture('tags_wnyc', ['tags']);
+      checkFixture('tags_cnet', ['tags']);
+      return checkFixture('tags_abcau', ['tags']);
+    });
+    test('reads videos', function() {
+      checkFixture('embed', ['videos']);
+      checkFixture('iframe', ['videos']);
+      return checkFixture('object', ['videos']);
+    });
+    test('links', function() {
+      checkFixture('theverge1', ['links']);
+      checkFixture('techcrunch1', ['links']);
+      return checkFixture('polygon', ['links']);
+    });
+    test('images', function() {
+      checkFixture('aolNews', ['image']);
+      checkFixture('polygon', ['image']);
+      return checkFixture('theverge1', ['image']);
+    });
+    test('gets cleaned text - Polygon', function() {
+      return checkFixture('polygon', ['cleaned_text', 'title', 'link', 'description', 'lang', 'favicon']);
+    });
+    test('gets cleaned text - The Verge', function() {
+      return checkFixture('theverge1', ['cleaned_text', 'title', 'link', 'description', 'lang', 'favicon']);
+    });
+    test('gets cleaned tags - The Verge', function() {
+      return checkFixture('theverge2', ['tags']);
+    });
+    test('gets cleaned text - McSweeneys', function() {
+      return checkFixture('mcsweeney', ['cleaned_text', 'link', 'lang', 'favicon']);
+    });
+    test('gets cleaned text - CNN', function() {
+      return checkFixture('cnn1', ['cleaned_text']);
+    });
+    test('gets cleaned text - MSN', function() {
+      return checkFixture('msn1', ['cleaned_text']);
+    });
+    test('gets cleaned text - Time', function() {
+      return checkFixture('time2', ['cleaned_text']);
+    });
+    test('gets cleaned text - BI', function() {
+      checkFixture('businessinsider1', ['cleaned_text']);
+      checkFixture('businessinsider2', ['cleaned_text']);
+      return checkFixture('businessinsider3', ['cleaned_text']);
+    });
+    test('gets cleaned text - CNBC', function() {
+      return checkFixture('cnbc1', ['cleaned_text']);
+    });
+    test('gets cleaned text - CBS Local', function() {
+      return checkFixture('cbslocal', ['cleaned_text']);
+    });
+    test('gets cleaned text - Business Week', function() {
+      checkFixture('businessWeek1', ['cleaned_text']);
+      checkFixture('businessWeek2', ['cleaned_text']);
+      return checkFixture('businessWeek3', ['cleaned_text']);
+    });
+    test('gets cleaned text - El Pais', function() {
+      return checkFixture('elpais', ['cleaned_text']);
+    });
+    test('gets cleaned text - Techcrunk', function() {
+      return checkFixture('techcrunch1', ['cleaned_text']);
+    });
+    test('gets cleaned text - Fox "News"', function() {
+      return checkFixture('foxNews', ['cleaned_text']);
+    });
+    test('gets cleaned text - Huff Po', function() {
+      checkFixture('huffingtonPost2', ['cleaned_text']);
+      return checkFixture('testHuffingtonPost', ['cleaned_text', 'description', 'title']);
+    });
+    test('gets cleaned text - ESPN', function() {
+      return checkFixture('espn', ['cleaned_text']);
+    });
+    test('gets cleaned text - Time', function() {
+      return checkFixture('time', ['cleaned_text']);
+    });
+    test('gets cleaned text - CNet', function() {
+      return checkFixture('cnet', ['cleaned_text']);
+    });
+    test('gets cleaned text - Yahoo', function() {
+      return checkFixture('yahoo', ['cleaned_text']);
+    });
+    test('gets cleaned text - Politico', function() {
+      return checkFixture('politico', ['cleaned_text']);
+    });
+    test('gets cleaned text - Goose Regressions', function() {
+      checkFixture('issue4', ['cleaned_text']);
+      checkFixture('issue24', ['cleaned_text']);
+      checkFixture('issue25', ['cleaned_text']);
+      checkFixture('issue28', ['cleaned_text']);
+      return checkFixture('issue32', ['cleaned_text']);
+    });
+    test('gets cleaned text - Gizmodo', function() {
+      return checkFixture('gizmodo1', ['cleaned_text', 'description', 'keywords']);
+    });
+    test('gets cleaned text - Mashable', function() {
+      return checkFixture('mashable_issue_74', ['cleaned_text']);
+    });
+    test('gets cleaned text - USA Today', function() {
+      checkFixture('usatoday_issue_74', ['cleaned_text']);
+      return checkFixture('usatoday1', ['cleaned_text']);
+    });
+    return test('gets cleaned text - dcurt.is', function() {
+      return checkFixture('dcurtis', ['cleaned_text']);
+    });
+  });
+
+}).call(this);