-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbtdig.py
108 lines (95 loc) · 4.9 KB
/
btdig.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# VERSION: 1.1
#
# LICENSING INFORMATION
# This is free and unencumbered software released into the public domain.
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
# For more information, please refer to <https://unlicense.org>
import urllib.parse
import urllib.request
import re
import math
import time
import gzip
from io import BytesIO
from novaprinter import prettyPrinter
import urllib.parse
class btdig(object):
url = 'https://www.btdig.com'
name = 'btdig'
supported_categories = {'all': '0'}
def search(self, what, cat='all'):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8',
'Accept-Language': 'en-GB,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'DNT': '1',
'Sec-GPC': '1',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'TE': 'trailers'
}
url = f"{self.url}/search?q={what.replace(' ', '+')}&order=0"
response = self.get_response(urllib.request.Request(url, headers=headers))
results_match = re.search(r'<span style="color:rgb\(100, 100, 100\);padding:2px 10px">(\d+) results found', response)
if results_match:
total_results = int(results_match.group(1))
total_pages = math.ceil(total_results / 10)
else:
total_pages = 1 # assuming single page
self.parse_page(response)
for page in range(1, total_pages):
time.sleep(1) # Sleep for 1 second between requests
url = f"{self.url}/search?q={what.replace(' ', '+')}&p={page}&order=0"
response = self.get_response(urllib.request.Request(url, headers=headers))
self.parse_page(response)
def get_response(self, req):
try:
with urllib.request.urlopen(req) as response:
if response.info().get('Content-Encoding') == 'gzip':
gzip_file = gzip.GzipFile(fileobj=BytesIO(response.read()))
return gzip_file.read().decode('utf-8', errors='ignore')
return response.read().decode('utf-8', errors='ignore')
except Exception as e:
return ""
def parse_page(self, html_content):
result_blocks = re.finditer(r'<div class="one_result".*?(?=<div class="one_result"|$)', html_content, re.DOTALL)
for block in result_blocks:
result = {}
block_content = block.group(0)
magnet_match = re.search(r'<a href="(magnet:\?xt=urn:btih:[^"]+)"', block_content)
name_match = re.search(r'<div class="torrent_name".*?><a.*?>(.*?)</a>', block_content, re.DOTALL)
size_match = re.search(r'<span class="torrent_size"[^>]*>(.*?)</span>', block_content)
desc_link_match = re.search(r'<div class="torrent_name".*?><a href="([^"]+)"', block_content, re.DOTALL) # could implement retrieving further info on torrent later
if magnet_match and name_match and size_match and desc_link_match:
result['link'] = magnet_match.group(1)
result['name'] = re.sub(r'<.*?>', '', name_match.group(1)).strip()
result['size'] = size_match.group(1).strip().replace(' ', ' ')
result['desc_link'] = desc_link_match.group(1)
result['engine_url'] = self.url
result['seeds'] = '-1'
result['leech'] = '-1'
prettyPrinter(result)