-
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
make crawler opensource and convert to monorepo
- Loading branch information
1 parent
767ac13
commit 616b43b
Showing
56 changed files
with
902 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,13 @@ | ||
# Folders | ||
build/ | ||
.vercel/ | ||
.vscode/ | ||
.history/ | ||
node_modules/ | ||
|
||
# Files | ||
yarn.lock | ||
data.json | ||
dataTest.json | ||
pnpm-lock.yaml | ||
package-lock.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
const rateLimit = require('../struct/ratelimiter'); | ||
|
||
module.exports = async (req, res) => { | ||
try { | ||
await rateLimit(30, req.headers['x-real-ip']); | ||
} catch (error) { | ||
return res.status(429).send({ | ||
message: 'Too many requests' | ||
}); | ||
} | ||
|
||
return res.status(404).send({ | ||
message: 'Not found' | ||
}); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
const config = require('../config.json'); | ||
const rateLimit = require('../struct/ratelimiter'); | ||
|
||
module.exports = async (req, res) => { | ||
try { | ||
await rateLimit(30, req.headers['x-real-ip']); | ||
} catch (error) { | ||
return res.status(429).send({ | ||
message: 'Too many requests' | ||
}); | ||
} | ||
|
||
return res.status(200).send(config.helloworld); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
const fetch = require('node-fetch'); | ||
const rateLimit = require('../struct/ratelimiter'); | ||
|
||
module.exports = async (req, res) => { | ||
try { | ||
await rateLimit(50, req.headers['x-real-ip']); | ||
} catch (error) { | ||
return res.status(429).send({ | ||
message: 'Too many requests', | ||
}); | ||
} | ||
|
||
if (!req.query.slug) { | ||
return res.status(401).send({ | ||
message: 'Input param required', | ||
}); | ||
} | ||
|
||
const data = await ( | ||
await fetch( | ||
`https://myanimelist.net/search/prefix.json?type=manga&keyword=${req.query.slug}` | ||
) | ||
).json(); | ||
|
||
return res.redirect(data.categories[0].items[0].url, 302); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
const data = require('../data.json'); | ||
const dataLightNovel = require('../dataLightNovel.json'); | ||
const dataManhwa = require('../dataManhwa.json'); | ||
const dataManhua = require('../dataManhua.json'); | ||
const rateLimit = require('../struct/ratelimiter'); | ||
|
||
module.exports = async (req, res) => { | ||
try { | ||
await rateLimit(100, req.headers['x-real-ip']); | ||
} catch (error) { | ||
return res.status(429).send({ | ||
message: 'Too many requests', | ||
}); | ||
} | ||
|
||
let use = data; | ||
switch (req.query.type) { | ||
case 'lightnovel': | ||
use = dataLightNovel; | ||
break; | ||
case 'manhwa': | ||
use = dataManhwa; | ||
break; | ||
case 'manhua': | ||
use = dataManhua; | ||
break; | ||
default: | ||
break; | ||
} | ||
|
||
res.setHeader('Access-Control-Allow-Origin', '*'); | ||
return res.status(200).send(use.sort(() => Math.random() - 0.5).slice(-4)); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
const data = require('../data.json'); | ||
const dataLightNovel = require('../dataLightNovel.json'); | ||
const dataManhwa = require('../dataManhwa.json'); | ||
const dataManhua = require('../dataManhua.json'); | ||
const { matchSorter } = require('match-sorter'); | ||
const rateLimit = require('../struct/ratelimiter'); | ||
|
||
module.exports = async (req, res) => { | ||
try { | ||
await rateLimit(500, req.headers['x-real-ip']); | ||
} catch (error) { | ||
return res.status(429).send({ | ||
message: 'Too many requests' | ||
}); | ||
} | ||
|
||
let use = data; | ||
switch (req.query.type) { | ||
case 'lightnovel': | ||
use = dataLightNovel; | ||
break; | ||
case 'manhwa': | ||
use = dataManhwa; | ||
break; | ||
case 'manhua': | ||
use = dataManhua; | ||
break; | ||
default: | ||
break; | ||
} | ||
|
||
const mangaResults = req.query.input ? matchSorter(use, req.query.input, { | ||
keys: ['title', 'site'], | ||
threshold: matchSorter.rankings.WORD_STARTS_WITH | ||
}) : { message: 'Input query required' }; | ||
|
||
res.setHeader('Access-Control-Allow-Origin', '*'); | ||
return res.status(200).send(mangaResults.slice(0, 300)); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"helloworld": { | ||
"version": "1.0.0", | ||
"message": "Hello World!" | ||
}, | ||
"ratelimit_time": 60 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"dependencies": { | ||
"lambda-rate-limiter": "^3.0.1", | ||
"match-sorter": "^6.3.1", | ||
"node-fetch": "^2.6.7" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
const config = require('../config.json'); | ||
const ratelimit = require('lambda-rate-limiter'); | ||
|
||
module.exports = ratelimit({ | ||
interval: config.ratelimit.time * 1000 | ||
}).check; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
{ | ||
"rewrites": [ | ||
{ | ||
"source": "/", | ||
"destination": "/api/index" | ||
}, | ||
{ | ||
"source": "/random", | ||
"destination": "/api/random" | ||
}, | ||
{ | ||
"source": "/search", | ||
"destination": "/api/search" | ||
}, | ||
{ | ||
"source": "/mal/:slug", | ||
"destination": "/api/mal?slug=:slug" | ||
}, | ||
{ | ||
"source": "/(.*)", | ||
"destination": "/api/404" | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
{ | ||
"scripts": { | ||
"start": "cd src && node index.js" | ||
}, | ||
"dependencies": { | ||
"cheerio": "^1.0.0-rc.10", | ||
"esm": "^3.2.25", | ||
"node-fetch-cookies": "^2.0.3", | ||
"puppeteer": "^10.2.0", | ||
"puppeteer-autoscroll-down": "^0.1.7", | ||
"puppeteer-extra": "^3.1.18", | ||
"puppeteer-extra-plugin-block-resources": "^2.2.9", | ||
"puppeteer-extra-plugin-stealth": "^2.7.8" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"port": 80, | ||
"ratelimit": { | ||
"max": 500, | ||
"per": "1 minute" | ||
}, | ||
"log": { | ||
"level": "info", | ||
"prettyPrint": true | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
const puppeteer = require('puppeteer-extra'); | ||
const StealthPlugin = require('puppeteer-extra-plugin-stealth'); | ||
const BlockResourcesPlugin = require('puppeteer-extra-plugin-block-resources'); | ||
const sleep = require('../util/sleep'); | ||
|
||
puppeteer.use(StealthPlugin()); | ||
puppeteer.use(BlockResourcesPlugin(new Set(['image', 'stylesheet', 'script', 'font']))); | ||
|
||
module.exports = async () => { | ||
// azuki has no api, unfortunately | ||
const browser = await puppeteer.launch(); | ||
|
||
// todo: automate this | ||
let currentPages = 4; | ||
let newArray = []; | ||
|
||
const page = await browser.newPage(); | ||
|
||
for (let i = 0; i < currentPages; i++) { | ||
await sleep(); | ||
await page.goto('https://www.azuki.co/series/' + (i + 1)); | ||
|
||
const list = await page.evaluate(() => { | ||
const titles = document.querySelectorAll('.a-card-link'); | ||
let array = []; | ||
|
||
titles.forEach(title => { | ||
array.push({ | ||
title: title.innerText, | ||
site: 'azuki', | ||
url: 'https://www.azuki.co/series/' + title.innerText.toLowerCase().replaceAll(' ', '-').replace(/[^a-zA-Z ]/g, '') | ||
}); | ||
}); | ||
return array; | ||
}); | ||
newArray = newArray.concat(list); | ||
} | ||
|
||
await browser.close(); | ||
|
||
return newArray; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
const puppeteer = require('puppeteer-extra'); | ||
const StealthPlugin = require('puppeteer-extra-plugin-stealth'); | ||
const BlockResourcesPlugin = require('puppeteer-extra-plugin-block-resources'); | ||
const scrollPageToBottom = require('puppeteer-autoscroll-down'); | ||
const sleep = require('../util/sleep'); | ||
|
||
puppeteer.use(StealthPlugin()); | ||
puppeteer.use(BlockResourcesPlugin(new Set(['image', 'stylesheet', 'script', 'font']))); | ||
|
||
module.exports = async () => { | ||
// until i can figure out how their api works, we will have to do with using puppeteer | ||
// slower, but works | ||
const browser = await puppeteer.launch(); | ||
const page = await browser.newPage(); | ||
await page.goto('https://www.bilibilicomics.com/genre'); | ||
|
||
await scrollPageToBottom(page); | ||
await sleep(); | ||
await scrollPageToBottom(page); | ||
|
||
const list = await page.evaluate(() => { | ||
let newArray = []; | ||
// this will probably need changing often | ||
const divs = document.querySelectorAll('.text-info-section'); | ||
|
||
divs.forEach(div => { | ||
newArray.push({ | ||
title: div.querySelector('.manga-title').innerText, | ||
site: 'bilibili', | ||
url: div.querySelector('a').href, | ||
}); | ||
}); | ||
|
||
return newArray; | ||
}); | ||
|
||
await browser.close(); | ||
|
||
return list; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
const puppeteer = require('puppeteer-extra'); | ||
const StealthPlugin = require('puppeteer-extra-plugin-stealth'); | ||
const BlockResourcesPlugin = require('puppeteer-extra-plugin-block-resources'); | ||
const sleep = require('../util/sleep'); | ||
|
||
puppeteer.use(StealthPlugin()); | ||
puppeteer.use(BlockResourcesPlugin(new Set(['image', 'stylesheet', 'script', 'font']))); | ||
|
||
module.exports = async (novel) => { | ||
// bookwalker also has no api, unfortunately | ||
// it also has 40 pages! do you have any idea how long it takes to load all this? | ||
const browser = await puppeteer.launch(); | ||
|
||
// todo: automate this | ||
let currentPages = 40; | ||
let newArray = []; | ||
|
||
let baseUrl = 'https://global.bookwalker.jp/categories/2/?np=0&page='; | ||
if (novel === true) { | ||
baseUrl = 'https://global.bookwalker.jp/categories/3/?np=0&page='; | ||
currentPages = 7; | ||
} | ||
|
||
const page = await browser.newPage(); | ||
for (let i = 0; i < currentPages; i++) { | ||
await sleep(); | ||
await page.goto(baseUrl + (i + 1)); | ||
|
||
const list = await page.evaluate(() => { | ||
const titles = document.querySelectorAll('.a-tile-ttl a'); | ||
let array = []; | ||
|
||
titles.forEach(title => { | ||
array.push({ | ||
title: title.innerText, | ||
site: 'bookwalker', | ||
url: title.href | ||
}); | ||
}); | ||
return array; | ||
}); | ||
newArray = newArray.concat(list); | ||
} | ||
|
||
await browser.close(); | ||
|
||
return newArray; | ||
}; |
Oops, something went wrong.