forked from 6677-ai/tap4-ai-crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_api.py
59 lines (46 loc) · 1.57 KB
/
main_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import asyncio
import logging
import os
from dotenv import load_dotenv
from flask import Flask, request, jsonify
from website_crawler import WebsitCrawler
app = Flask(__name__)
website_crawler = WebsitCrawler()
load_dotenv()
auth_secret = 'Bearer ' + os.getenv('AUTH_SECRET')
# 设置日志记录
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(filename)s - %(funcName)s - %(lineno)d - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
@app.route('/site/crawl', methods=['POST'])
def scrape():
data = request.get_json()
url = data.get('url')
tags = data.get('tags') # tag数组
languages = data.get('languages') # 需要翻译的多语言列表
auth_header = request.headers.get('Authorization')
if not url:
return jsonify({'error': 'URL is required'}), 400
if not auth_header:
return jsonify({'error': 'Authorization is required'}), 400
if auth_secret != auth_header:
return jsonify({'error': 'Authorization is error'}), 400
loop = asyncio.get_event_loop()
result = loop.run_until_complete(website_crawler.scrape_website(url.strip(), tags, languages))
# 若result为None,则 code="10001",msg="处理异常,请稍后重试"
code = 200
msg = 'success'
if result is None:
code = 10001
msg = 'fail'
# 将数据映射到 'data' 键下
response = {
'code': code,
'msg': msg,
'data': result
}
return jsonify(response)
if __name__ == '__main__':
asyncio.run(app.run(host='0.0.0.0', port=8040, threaded=False))