diff --git a/pt.py b/pt.py index 798d115..73231b3 100644 --- a/pt.py +++ b/pt.py @@ -1,5 +1,6 @@ #!/usr/bin/python3 # -*- coding:utf-8 -*- +# change:添加至种子篮以自动下载 import random import smtplib import datetime @@ -10,6 +11,7 @@ import re import requests from email.mime.text import MIMEText +from urllib.parse import * # 全局变量 global mylog @@ -34,6 +36,15 @@ def ExtractDownloadHref( html ): # 提取种子的超链接 else: return( m.group(1) ) +def ExtractID(url): # 提取种子的超链接 + query = urlparse(url).query + params = dict([(k, v[0]) for k, v in parse_qs(query).items()]) + try: + return params["id"] + except: + return None + + def ExtractHTML( name, s1, s2, html ): # 配对s1+name|s2标签,提取该标签中所有内容,可处理嵌套结构 l = len( html[0] ) ls1 = len( s1 ) @@ -107,6 +118,7 @@ def main(): mylog.debug( 'initialize .. done.' ) torrents_found = '' # 每个站筛出的种子累加到这里 + # torrents_rss = [] for pt_site in PT_Sites: # 根据上面的数据结构循环遍历各pt站 url = pt_site['address'] + pt_site['torrents'] headers = pt_site['headers'] @@ -150,11 +162,13 @@ def main(): continue c1 = 0; c2 = 0; c3 = 0 content = '' # 找到的种子存在这里 + tids = [] for i in range(100): # 前100个种子 pstr2[0] = ExtractHTML( r'', r'', pstr1 ) # 种子表里提取1行 tc = ['' for j in range(8)] for j in range(8): tc[j] = ExtractHTML( r'', r'', pstr2 ) # 1行里提取8列 torrenturl = pt_site['address'] + ExtractDownloadHref( tc[1] ) + torrentid = ExtractID(torrenturl) for j in range(8): tc[j] = StripallTags( tc[j] ) if ( ( tc[1] == '' ) or ( tc[3] == '' ) or ( tc[4] == '' ) or ( tc[5] == '' ) ): continue tc[0] = '' @@ -180,8 +194,15 @@ def main(): if ( torrents_log.find( torrenturl + '\t' ) < 0 ): # 比对是否曾经提取过该种子 content += tc[0] + '\n' c3 += 1 + tids.append(torrentid) mylog.debug( str(c1) + ':' + str(c2) + ':' + str(c3) ) torrents_found += content # 各个站提取的种子累加到这个变量里 + # torrents_rss+=[f"{pt_site['address']}myrss.php?add={torrentid}" for torrentid in tid] + for torrentid in tids: + url = f"{pt_site['address']}myrss.php?add={torrentid}" + headers = pt_site['headers'] + encoding = pt_site['encoding'] + html = GetHTML(url, headers, encoding) # 循环结束 if ( torrents_found == '' ): mylog.info( 'found nothing new.' )