Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions pt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/python3
# -*- coding:utf-8 -*-
# change:添加至种子篮以自动下载
import random
import smtplib
import datetime
Expand All @@ -10,6 +11,7 @@
import re
import requests
from email.mime.text import MIMEText
from urllib.parse import *

# 全局变量
global mylog
Expand All @@ -34,6 +36,15 @@ def ExtractDownloadHref( html ): # 提取种子的超链接
else:
return( m.group(1) )

def ExtractID(url): # 提取种子的超链接
query = urlparse(url).query
params = dict([(k, v[0]) for k, v in parse_qs(query).items()])
try:
return params["id"]
except:
return None


def ExtractHTML( name, s1, s2, html ): # 配对s1+name|s2标签,提取该标签中所有内容,可处理嵌套结构
l = len( html[0] )
ls1 = len( s1 )
Expand Down Expand Up @@ -107,6 +118,7 @@ def main():

mylog.debug( 'initialize .. done.' )
torrents_found = '' # 每个站筛出的种子累加到这里
# torrents_rss = []
for pt_site in PT_Sites: # 根据上面的数据结构循环遍历各pt站
url = pt_site['address'] + pt_site['torrents']
headers = pt_site['headers']
Expand Down Expand Up @@ -150,11 +162,13 @@ def main():
continue
c1 = 0; c2 = 0; c3 = 0
content = '' # 找到的种子存在这里
tids = []
for i in range(100): # 前100个种子
pstr2[0] = ExtractHTML( r'', r'<tr', r'/tr>', pstr1 ) # 种子表里提取1行
tc = ['' for j in range(8)]
for j in range(8): tc[j] = ExtractHTML( r'', r'<td', r'/td>', pstr2 ) # 1行里提取8列
torrenturl = pt_site['address'] + ExtractDownloadHref( tc[1] )
torrentid = ExtractID(torrenturl)
for j in range(8): tc[j] = StripallTags( tc[j] )
if ( ( tc[1] == '' ) or ( tc[3] == '' ) or ( tc[4] == '' ) or ( tc[5] == '' ) ): continue
tc[0] = ''
Expand All @@ -180,8 +194,15 @@ def main():
if ( torrents_log.find( torrenturl + '\t' ) < 0 ): # 比对是否曾经提取过该种子
content += tc[0] + '\n'
c3 += 1
tids.append(torrentid)
mylog.debug( str(c1) + ':' + str(c2) + ':' + str(c3) )
torrents_found += content # 各个站提取的种子累加到这个变量里
# torrents_rss+=[f"{pt_site['address']}myrss.php?add={torrentid}" for torrentid in tid]
for torrentid in tids:
url = f"{pt_site['address']}myrss.php?add={torrentid}"
headers = pt_site['headers']
encoding = pt_site['encoding']
html = GetHTML(url, headers, encoding)
# 循环结束
if ( torrents_found == '' ):
mylog.info( 'found nothing new.' )
Expand Down