|
| 1 | +var pug = require('gulp-pug') |
| 2 | +var gulp = require('gulp') |
| 3 | +var rename = require('gulp-rename') |
| 4 | +var data = require('gulp-data') |
| 5 | +var connect = require('gulp-connect') |
| 6 | +var replace = require('gulp-replace') |
| 7 | +var ghPages = require('gulp-gh-pages') |
| 8 | +var bower = require('gulp-bower') |
| 9 | +var image = require('gulp-image') |
| 10 | +var stylus = require('gulp-stylus') |
| 11 | +var minify = require('gulp-minify') |
| 12 | +var path = require('path') |
| 13 | +var fs = require('fs') |
| 14 | +var cheerio = require('cheerio') |
| 15 | + |
| 16 | +var build_dir = 'data-centric-comp/' // good to have this be the same as the repo name for gh-pages purposes |
| 17 | + |
| 18 | +var rankEntries = function (entries) { |
| 19 | + entries.sort(function(a, b) { |
| 20 | + return b.acc- a.acc; |
| 21 | + }) |
| 22 | + |
| 23 | + for (var i = 0; i < entries.length; i++) { |
| 24 | + var entry = entries[i] |
| 25 | + if (i === 0) { |
| 26 | + entry.rank = 1 |
| 27 | + } else { |
| 28 | + var prevEntry = entries[i - 1] |
| 29 | + var rank = prevEntry.rank |
| 30 | + if (entry.acc < prevEntry.acc) rank++ |
| 31 | + entry.rank = rank |
| 32 | + } |
| 33 | + } |
| 34 | + return entries |
| 35 | +} |
| 36 | + |
| 37 | +function assert (condition, message) { |
| 38 | + if (!condition) { |
| 39 | + throw message || 'Assertion failed' |
| 40 | + } |
| 41 | +} |
| 42 | + |
| 43 | +var parseCompEntries = function (comp_file) { |
| 44 | + var leaderboard = require(comp_file).leaderboard |
| 45 | + var entries = [] |
| 46 | + |
| 47 | + for (var i = 0; i < leaderboard.length; i++) { |
| 48 | + try { |
| 49 | + var o_entry = leaderboard[i] |
| 50 | + var entry = {} |
| 51 | + entry.user = o_entry.submission.user_name |
| 52 | + |
| 53 | + var description = o_entry.submission.description.trim() |
| 54 | + var regex_match = description.match(/(.*) ?\((.*)\)(.*)/); |
| 55 | + if (regex_match) { |
| 56 | + entry.model_name = regex_match[1].trim(); |
| 57 | + entry.institution = regex_match[2].trim(); |
| 58 | + if (regex_match[3].lastIndexOf('http') !== -1) { |
| 59 | + entry.link = regex_match[4].trim() |
| 60 | + } |
| 61 | + } else { |
| 62 | + entry.model_name = description.substr(0, description.lastIndexOf('(')).trim() |
| 63 | + var firstPart = description.substr(description.lastIndexOf('(') + 1) |
| 64 | + entry.institution = firstPart.substr(0, firstPart.lastIndexOf(')')) |
| 65 | + if (description.lastIndexOf('http') !== -1) { |
| 66 | + entry.link = description.substr(description.lastIndexOf('http')).trim() |
| 67 | + } |
| 68 | + } |
| 69 | + |
| 70 | + entry.date = o_entry.submission.created |
| 71 | + entry.acc = parseFloat(o_entry.scores.accuracy) |
| 72 | + |
| 73 | + if (entry.model_name !== '' && Number(entry.acc) == entry.acc) { |
| 74 | + entries.push(entry); |
| 75 | + } |
| 76 | + } catch (err) { |
| 77 | + console.error(err) |
| 78 | + console.error(entry) |
| 79 | + } |
| 80 | + } |
| 81 | + entries = rankEntries(entries) |
| 82 | + return entries |
| 83 | +} |
| 84 | + |
| 85 | +var parseEntries = function (htmlStr) { |
| 86 | + var $ = cheerio.load(htmlStr) |
| 87 | + var parent = $('h1#leaderboard').closest('.ws-item').next() |
| 88 | + var entries = [] |
| 89 | + $(parent).find('tbody > tr').each(function () { |
| 90 | + var entry = {} |
| 91 | + var cells = $(this).find('td') |
| 92 | + entry.description = cells.eq(1).text().trim() |
| 93 | + entry.model_name = entry.description.substr(0, entry.description.lastIndexOf('(')).trim() |
| 94 | + var firstPart = entry.description.substr(entry.description.lastIndexOf('(') + 1) |
| 95 | + entry.institution = firstPart.substr(0, firstPart.lastIndexOf(')')) |
| 96 | + var httpPos = entry.description.lastIndexOf('http') |
| 97 | + if (httpPos !== -1) { |
| 98 | + entry.link = entry.description.substr(entry.description.lastIndexOf('http')).trim() |
| 99 | + } |
| 100 | + delete entry.description |
| 101 | + entry.acc = parseFloat(cells.eq(4).text()) |
| 102 | + entry.date = cells.eq(2).text().trim() |
| 103 | + entries.push(entry) |
| 104 | + }) |
| 105 | + entries = rankEntries(entries) |
| 106 | + return entries |
| 107 | +} |
| 108 | + |
| 109 | +gulp.task('bower', function () { |
| 110 | + return bower() |
| 111 | + .pipe(gulp.dest('./' + build_dir + 'bower_components/')) |
| 112 | +}) |
| 113 | + |
| 114 | +gulp.task('image', function () { |
| 115 | + return gulp.src('./views/images/*') |
| 116 | + .pipe(image()) |
| 117 | + .pipe(gulp.dest('./' + build_dir)) |
| 118 | +}) |
| 119 | + |
| 120 | +gulp.task('scrape_website', function (cb) { |
| 121 | + var Nightmare = require('nightmare') |
| 122 | + var fs = require('fs') |
| 123 | + var parse |
| 124 | + var nightmare = new Nightmare({ |
| 125 | + switches: { |
| 126 | + 'ignore-certificate-errors': true |
| 127 | + } |
| 128 | + }) |
| 129 | + nightmare.goto('https://worksheets.codalab.org/worksheets/0x62eefc3e64e04430a1a24785a9293fff/') |
| 130 | + .wait(2000) |
| 131 | + .evaluate(function () { |
| 132 | + return document.body.innerHTML |
| 133 | + }) |
| 134 | + .end() |
| 135 | + .then(function (result) { |
| 136 | + var jsonfile = require('jsonfile') |
| 137 | + var after = parseEntries(result) |
| 138 | + jsonfile.writeFile('./test.json', after, cb) |
| 139 | + }) |
| 140 | +}) |
| 141 | + |
| 142 | +gulp.task('connect', function () { |
| 143 | + connect.server({ |
| 144 | + host: '0.0.0.0', |
| 145 | + root: '.' |
| 146 | + }) |
| 147 | +}) |
| 148 | + |
| 149 | +var dataset_folder = './dataset/' |
| 150 | +var filepaths = [ |
| 151 | + dataset_folder + 'dev-v1.1.json', |
| 152 | +] |
| 153 | + |
| 154 | +var exploration_tasks = [] |
| 155 | + |
| 156 | +gulp.task('process_comp_output', function (cb) { |
| 157 | + var jsonfile = require('jsonfile') |
| 158 | + var entries1 = parseCompEntries('./leaderboard.json') |
| 159 | + jsonfile.writeFile('./results.json', entries1, cb) |
| 160 | +}) |
| 161 | + |
| 162 | +gulp.task('generate_index', ['process_comp_output'], function () { |
| 163 | + var test = require('./results.json') |
| 164 | + var moment = require('moment') |
| 165 | + return gulp.src('views/index.pug') |
| 166 | + .pipe(data(function () { |
| 167 | + return { 'test': test, |
| 168 | + 'moment': moment} |
| 169 | + })) |
| 170 | + .pipe(pug()) |
| 171 | + .pipe(gulp.dest('./' + build_dir)) |
| 172 | +}) |
| 173 | + |
| 174 | +gulp.task('correct_link_paths', ['generate'], function () { |
| 175 | + return gulp.src('./' + build_dir + '**/*.html') |
| 176 | + .pipe(replace(/([^-](?:href|src)=[\'\"]\/)([^\'\"]*)([\'\"])/g, '$1' + build_dir + '$2$3')) |
| 177 | + .pipe(gulp.dest('./' + build_dir)) |
| 178 | +}) |
| 179 | + |
| 180 | +gulp.task('css', function () { |
| 181 | + return gulp.src('./views/styles/*.styl') |
| 182 | + .pipe(stylus()) |
| 183 | + .pipe(gulp.dest('./' + build_dir + 'stylesheets')) |
| 184 | +}) |
| 185 | + |
| 186 | +gulp.task('deploy', function () { |
| 187 | + return gulp.src('./' + build_dir + '**/*') |
| 188 | + .pipe(ghPages()) |
| 189 | +}) |
| 190 | + |
| 191 | +gulp.task('generate_exploration', exploration_tasks) |
| 192 | +gulp.task('generate', ['bower', 'generate_exploration', 'generate_index', 'process_comp_output']) |
| 193 | +gulp.task('default', ['generate', 'correct_link_paths', 'image', 'css']) |
0 commit comments