diff --git a/.gitignore b/.gitignore index f5d57c5..3ef1ec5 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ test.py *.py[cod] common/__pycache__ GetQuestionApi.py +dist/ +build/ diff --git a/GetQuestionTessAndroid.py b/GetQuestionTessAndroid.py index 76a50ad..34a3486 100644 --- a/GetQuestionTessAndroid.py +++ b/GetQuestionTessAndroid.py @@ -10,14 +10,139 @@ from threading import Thread import time -while True: +import requests +import time +import sys +import os +import random + +global AUTO +global current_question +current_question = '' +global answers +answers = [] +global current_answer +current_answer = '' +global current_bigData +current_bigData = 0 + +global running +running = True + +global status +status = 'waiting' + +global touch_start + +global platform +from colorama import init,Fore +init() + + +GUI = False +AUTO = True + +global count +count = 0 + +def hit_me(): + global running + global touch_start + global save_choices + global AUTO + global current_question + global answers + global current_answer + global current_bigData + global status + global count + global getMax + global platform + getMax=False + + token = '88QUg$!pmf!TAY5r'; + + if platform=='auto': + go = input('输入回车运行,输入 n 回车结束运行: ') + if go == 'n': + return + + t = time.clock() # 截图 screenshot.check_screenshot() img = Image.open("./screenshot.png") + print('截图耗时: {0}'.format(time.clock() - t)) + t = time.clock() # 文字识别 - question, choices = ocr.ocr_img(img) + question, choices, selection_start = ocr.ocr_img(img, platform) + + print('文字识别耗时: {0}'.format(time.clock() - t)) + + if (count > 6000): + print('退出') + os.system('adb shell input keyevent 4 && adb shell input keyevent 4') + return + + if (question=='' or len(choices)==0 or selection_start==0): + status = 'waiting' + print('未检测到题板') + count = count + 1 + # print('休息1s') + # time.sleep(1) + os.system('adb shell input tap 930 300'); + return hit_me() + # if running: + # return hit_me() + + touch_start = selection_start + containSum = 0 + for q in question: + if (q in current_question): + containSum = containSum + 1 + + if (containSum*100/len(question)>80): + if (status != 'waiting'): + print('此题已答过') + print('休息5s') + time.sleep(5) + return hit_me() + # if running: + # return hit_me() + if (touch_start==-1): + return hit_me() + current_answer = ocr.get_question(img, touch_start, platform) + if (current_answer < len(answers)): + print('正确答案为:'+answers[current_answer]) + # req = requests.get(url='http://localhost:3000/add', params={'question': current_question, 'answer': answers[current_answer], 'bigdata':answers[current_bigData]}) + # print(req) + req = requests.get(url='http://hj.chenzhicheng.com/update', params={'question': current_question, 'right': current_answer, 'token':token}) + print(req.text) + return hit_me() + + # if running: + # return hit_me() + t = time.clock() + count = 0 + status = 'answered' + req = requests.get(url='http://hj.chenzhicheng.com/add', params={'question': question, 'answer': choices, 'token':token}) + print(req.text) + print('发送问题耗时: {0}'.format(time.clock() - t)) + + current_question = question + answers = choices + + save_choices = choices + + result = '问题:'+question+'\n\n'; + + for choice in choices: + result += choice + '\n' + + if GUI: + var.set(result) # 设置标签的文字为 'you hit me' + # t = time.clock() # 用不同方法输出结果,取消某个方法在前面加上# @@ -26,20 +151,126 @@ # # 将问题与选项一起搜索方法,并获取搜索到的结果数目 # methods.run_algorithm(1, question, choices) # # 用选项在问题页面中计数出现词频方法 - # methods.run_algorithm(2, question, choices) - - # 多线程 - m1 = Thread(methods.run_algorithm(0, question, choices)) - m2 = Thread(methods.run_algorithm(1, question, choices)) - m3 = Thread(methods.run_algorithm(2, question, choices)) - m1.start() - m2.start() - m3.start() - - # end_time = time.clock() - # print(end_time - t) - go = input('输入回车继续运行,输入 n 回车结束运行: ') - if go == 'n': - break - - print('------------------------') + # methods.run_algorithm(2, question, choices, q) + + def count_base(question,choices): + global getMax + print('\n-- 方法3: 题目搜索结果包含选项词频计数法 --\n') + # q.put('\n-- 方法3: 题目搜索结果包含选项词频计数法 --\n') + # 请求 + req = requests.get(url='http://www.baidu.com/s', params={'wd':question}) + content = req.text + #print(content) + counts = [] + print('Question: '+question) + if ('不' in question or '错误' in question): + print('**请注意此题为否定题,选计数最少的**') + getMax = False + else: + getMax = True + + for i in range(len(choices)): + counts.append(content.count(choices[i])) + #print(choices[i] + " : " + str(counts[i])) + return output(choices, counts,'count_base', question) + + def open_webbrowser_count(question,choices): + global getMax + print('\n-- 方法2: 题目+选项搜索结果计数法 --\n') + # q.put('\n-- 方法2: 题目+选项搜索结果计数法 --\n') + print('Question: ' + question) + if ('不' in question or '错误' in question): + getMax = False + print('**请注意此题为否定题,选计数最少的**') + else: + getMax = True + + counts = [] + for i in range(len(choices)): + # 请求 + req = requests.get(url='http://www.baidu.com/s', params={'wd': question + choices[i]}) + content = req.text + index = content.find('百度为您找到相关结果约') + 11 + content = content[index:] + index = content.find('个') + count = content[:index].replace(',', '') + counts.append(count) + return output(choices, counts, 'open_webbrowser_count', question) + + def output(choices, counts, al_num, question): + global getMax + if (len(counts)==0): + return random.choice([0,1,2]); + counts = list(map(int, counts)) + + # 计数最高 + index_max = counts.index(max(counts)) + + # 计数最少 + index_min = counts.index(min(counts)) + + if index_max == index_min: + print(Fore.RED + "高低计数相等此方法失效!" + Fore.RESET) + if al_num=='count_base': + return open_webbrowser_count(question, choices) + else: + return random.choice([0,1,2]) + + result = 0; + for i in range(len(choices)): + if i == index_max: + # 绿色为计数最高的答案 + print(Fore.GREEN + "{0} : {1} ".format(choices[i], counts[i]) + Fore.RESET) + if getMax: + result=i + + elif i == index_min: + # 红色为计数最低的答案 + print(Fore.MAGENTA + "{0} : {1}".format(choices[i], counts[i]) + Fore.RESET) + if (getMax==False): + result=i + + else: + print("{0} : {1}".format(choices[i], counts[i])) + # q.put(' '+str(choices[i]) + ':' + str(counts[i]) + '\n') + return result + + t = time.clock() + selection = count_base(question, choices) + print('寻找答案耗时: {0}'.format(time.clock() - t)) + + if (len(choices) <3 or selection>2): + return hit_me(); + print('\n我要选'+choices[selection]) + if (platform=='zs'): + #芝士超人 + answer_height = 200 + elif (platform=='hj' or platform =='cd'): + #花椒直播 + # 冲顶大会 + answer_height = 150 + + if (platform!='auto'): + os.system('adb shell input tap 250 ' + str(touch_start+(selection+0.5)*answer_height)) + # if running: + # hit_me() + req = requests.get(url='http://hj.chenzhicheng.com/update', params={'question': current_question, 'advise': selection, 'token':token}) + print(req.text) + print('休息5s') + time.sleep(5) + return hit_me() + +if (len(sys.argv)>1): + go = sys.argv[1] +else: + go = input('请选择平台:\n1. 芝士超人\n2. 花椒直播 \n3. 冲顶大会 \n4. 普遍适应\n') +if go=='1': + platform = 'zs' +elif go=='2': + platform = 'hj' +elif go=='3': + platform = 'cd' +elif go=='4': + platform = 'auto' +# while running: +hit_me() \ No newline at end of file diff --git a/README.md b/README.md index 0c88bbf..ea3e66f 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,15 @@ > > [程序员如何玩转《冲顶大会》?](https://livc.io/blog/204) +## FORK 更新 + +- 自动识别问题为多行情况 +- 无需按回车,题板出现时,自动答题 +- 无需手动操作手机,自动选择正确答案 +- 公布答案时,显示正确答案 +- 同步答案到网络上,方便大家参考 +- 发布了 release + ## 更新日志 - 2018.01.12 diff --git a/common/methods.py b/common/methods.py index 6db0cb4..e2464f1 100644 --- a/common/methods.py +++ b/common/methods.py @@ -7,19 +7,48 @@ import requests import webbrowser import urllib.parse +import json +import time # # 颜色兼容Win 10 from colorama import init,Fore init() -def open_webbrowser(question): - webbrowser.open('https://baidu.com/s?wd=' + urllib.parse.quote(question)) +global getMax +getMax = True + +class JSONObject: + def __init__(self, d): + self.__dict__ = d -def open_webbrowser_count(question,choices): +def open_webbrowser(question): + # print("Cancel") + # webbrowser.open('https://www.google.co.uk/search?q=' + urllib.parse.quote(question)) + # webbrowser.open('https://www.sogou.com/web?query=' + urllib.parse.quote(question)) + webbrowser.open('https://www.baidu.com/s?wd=' + urllib.parse.quote(question)) + +def cloud_count(question, q): + print('\n-- 方法1: 题库筛选 --\n') + # q.put('\n-- 方法1: 题库筛选 --\n') + req = requests.get(url='http://localhost:3000/query', params={'question': question}) + result = json.loads(req.text, object_hook=JSONObject) + try: + q.put({ 'name':'cloud_count', 'value':result.answer}) + print(result.answer) + except AttributeError as e: + pass + q.put('END') + +def open_webbrowser_count(question,choices,q): + global getMax print('\n-- 方法2: 题目+选项搜索结果计数法 --\n') + # q.put('\n-- 方法2: 题目+选项搜索结果计数法 --\n') print('Question: ' + question) - if '不是' in question: + if ('不' in question or '错误' in question): + getMax = False print('**请注意此题为否定题,选计数最少的**') + else: + getMax = True counts = [] for i in range(len(choices)): @@ -32,24 +61,33 @@ def open_webbrowser_count(question,choices): count = content[:index].replace(',', '') counts.append(count) #print(choices[i] + " : " + count) - output(choices, counts) + output(choices, counts, q, 'open_webbrowser_count') -def count_base(question,choices): +def count_base(question,choices,q): + global getMax print('\n-- 方法3: 题目搜索结果包含选项词频计数法 --\n') + # q.put('\n-- 方法3: 题目搜索结果包含选项词频计数法 --\n') # 请求 req = requests.get(url='http://www.baidu.com/s', params={'wd':question}) content = req.text #print(content) counts = [] print('Question: '+question) - if '不是' in question: + if ('不' in question or '错误' in question): print('**请注意此题为否定题,选计数最少的**') + getMax = False + else: + getMax = True + for i in range(len(choices)): counts.append(content.count(choices[i])) #print(choices[i] + " : " + str(counts[i])) - output(choices, counts) + output(choices, counts, q, 'count_base') -def output(choices, counts): +def output(choices, counts, q, al_num): + global getMax + if (len(counts)==0): + return counts = list(map(int, counts)) #print(choices, counts) @@ -61,27 +99,37 @@ def output(choices, counts): if index_max == index_min: print(Fore.RED + "高低计数相等此方法失效!" + Fore.RESET) + q.put('END') return for i in range(len(choices)): - print() if i == index_max: # 绿色为计数最高的答案 print(Fore.GREEN + "{0} : {1} ".format(choices[i], counts[i]) + Fore.RESET) + if getMax: + q.put({ 'name':al_num, 'value':str(choices[i])}) + elif i == index_min: # 红色为计数最低的答案 print(Fore.MAGENTA + "{0} : {1}".format(choices[i], counts[i]) + Fore.RESET) + if (getMax==False): + q.put({ 'name':al_num, 'value':str(choices[i])}) + else: print("{0} : {1}".format(choices[i], counts[i])) + # q.put(' '+str(choices[i]) + ':' + str(counts[i]) + '\n') + q.put('END') -def run_algorithm(al_num, question, choices): +def run_algorithm(al_num, question, choices, q): if al_num == 0: open_webbrowser(question) - elif al_num == 1: - open_webbrowser_count(question, choices) elif al_num == 2: - count_base(question, choices) + open_webbrowser_count(question, choices, q) + elif al_num == 3: + count_base(question, choices, q) + elif al_num == 1: + cloud_count(question, q) if __name__ == '__main__': question = '新装修的房子通常哪种化学物质含量会比较高?' diff --git a/common/ocr.py b/common/ocr.py index 02fb2f9..30f78f1 100644 --- a/common/ocr.py +++ b/common/ocr.py @@ -7,6 +7,7 @@ from PIL import Image import pytesseract from PIL import ImageFilter +import datetime # 二值化算法 def binarizing(img,threshold): @@ -40,13 +41,233 @@ def depoint(img): #input: gray image pixdata[x,y] = 255 return img -def ocr_img(image): +global os - # 切割题目和选项位置,左上角坐标和右下角坐标,自行测试分辨率 - question_im = image.crop((50, 350, 1000, 560)) # 坚果 pro1 - choices_im = image.crop((75, 535, 990, 1150)) - # question = image.crop((75, 315, 1167, 789)) # iPhone 7P +def get_question(image, question_end, platform): + global os + os = platform + + w,h = image.size + # 支持平板 + if (w>h): + image = image.transpose(Image.ROTATE_270) + temp = w + w = h + h = temp + + im_pixel = image.load() + + image.save(datetime.datetime.now().strftime("%Y%m%d%H%M%S")+os+'.png'); + + if (os=='zs'): + # 芝士超人 + answer_height = 100 + elif (os=='hj' or os =='cd'): + # 花椒直播 + # 冲顶大会 + answer_height = 150 + + # 自动识别答案位置 + piece_y_max = question_end + scan_x_border = 75 + scan_start_y = piece_y_max + for i in range(scan_start_y, h): + if (piece_y_max != question_end): + break + for j in range(scan_x_border, scan_x_border+100): + pixel = im_pixel[j, i] + if (abs(pixel[1]-pixel[0])>50 or abs(pixel[1]-pixel[2])>50): + piece_y_max = max(i, piece_y_max) + break + return round((piece_y_max-question_end)/answer_height) + + +def ocr_img(image, platform): + global os + os = platform + + w,h = image.size + #读取图像 + + #支持平板 + if (w>h): + image = image.transpose(Image.ROTATE_270) + temp = w + w = h + h = temp + + im_pixel = image.load() + + if (os=='zs'): + #芝士超人 + # 自动识别问题开始 + scan_x_border = int(w / 4) + scan_start_y = 200 + question_start = 200 + for i in range(scan_start_y, h): + white_sum=0 + for j in range(scan_x_border-50, scan_x_border+50): + pixel = im_pixel[j, i] + if (pixel[1]==255 and pixel[1]==255 and pixel[2]==255): + white_sum = white_sum + 1 + if (white_sum<90): + question_start = max(i, question_start) + break; + + + # 自动识别问题结束 + scan_x_border = int(w / 4) + scan_start_y = question_start + question_end = question_start + for i in range(scan_start_y, h): + white_sum=0 + for j in range(scan_x_border-50, scan_x_border+50): + pixel = im_pixel[j, i] + if (abs(200-pixel[0])<50 and (200-pixel[1])<50 and (200-pixel[2])<50): + white_sum = white_sum + 1 + + if (white_sum==100): + question_end = max(i, question_end) + break; + + if((question_end - question_start)<10): + return '', [], 0 + + question_im = image.crop((50, question_start, 1000, question_end)) + + # 自动识别题板高度 + piece_y_max = question_end + scan_x_border = int(w / 2) + scan_start_y = piece_y_max + for i in range(scan_start_y, h): + if (piece_y_max != question_end): + break + for j in range(scan_x_border-50, scan_x_border+50): + pixel = im_pixel[j, i] + if (abs(pixel[1]-pixel[0])>3 or abs(pixel[1]-pixel[2])>3): + piece_y_max = max(i, piece_y_max) + break + + choices_im = image.crop((75, question_end, 1000, piece_y_max)) + + answer_height = 100 + + if (os=='hj'): + #花椒直播 + # 自动识别问题开始 + + scan_x_border = int(w / 2) + scan_start_y =500 + question_start = 500 + for i in range(scan_start_y, h): + white_sum=0 + for j in range(scan_x_border-50, scan_x_border+50): + pixel = im_pixel[j, i] + if ((255-pixel[0])<10 and (255-pixel[1])<10 and (255-pixel[2])<10): + white_sum = white_sum + 1 + if (white_sum<90): + question_start = max(i, question_start) + break; + + # 自动识别问题结束 + scan_x_border = int(w / 2) + scan_start_y = question_start + question_end = question_start + for i in range(scan_start_y, h): + white_sum=0 + for j in range(scan_x_border-50, scan_x_border+50): + pixel = im_pixel[j, i] + if (abs(220-pixel[0])<20 or abs(220-pixel[1])<20 or abs(220-pixel[2])<20 ): + white_sum = white_sum + 1 + if (white_sum>90): + question_end = max(i, question_end) + break; + + if((question_end - question_start)<100): + return '', [], 0 + + question_im = image.crop((50, question_start, 1000, question_end)) + + # 自动识别题板高度 + piece_y_max = question_end + scan_x_border = int(w / 2) + for i in range(question_end, h): + white_sum=0 + for j in range(scan_x_border-50, scan_x_border+50): + pixel = im_pixel[j, i] + if (pixel[0]<100 and pixel[1]<100 and pixel[2]<100): + white_sum = white_sum + 1 + if (white_sum>90): + piece_y_max = max(i, piece_y_max) + break; + + choices_im = image.crop((75, question_end, 1000, piece_y_max)) + + answer_height = 150 + + if (os=='cd'): + # 冲顶大会 + # 自动识别问题开始 + scan_x_border = int(w / 2) + scan_start_y = 100 + question_start = 100 + for i in range(scan_start_y, h): + white_sum=0 + for j in range(scan_x_border-50, scan_x_border+50): + pixel = im_pixel[j, i] + if ((255-pixel[0])<10 and (255-pixel[1])<10 and (255-pixel[2])<10): + white_sum = white_sum + 1 + if (white_sum!=100): + question_start = max(i, question_start) + break; + + + # 自动识别问题结束 + scan_x_border = int(w / 2) + scan_start_y = question_start + question_end = question_start + for i in range(scan_start_y, h): + white_sum=0 + for j in range(scan_x_border-50, scan_x_border+50): + pixel = im_pixel[j, i] + if (abs(200-pixel[0])<10 and abs(200-pixel[1])<10 and abs(200-pixel[2])<10): + white_sum = white_sum + 1 + if (white_sum==100): + question_end = max(i, question_end) + break; + + question_im = image.crop((50, question_start, 1000, question_end)) + + # 自动识别题板高度 + piece_y_max = question_end + scan_x_border = int(w / 2) + for i in range(question_end, h): + white_sum=0 + for j in range(scan_x_border-50, scan_x_border+50): + pixel = im_pixel[j, i] + if (pixel[0]<100 and pixel[1]<100 and pixel[2]<100): + white_sum = white_sum + 1 + if (white_sum>10): + piece_y_max = max(i, piece_y_max) + break; + + choices_im = image.crop((75, question_end, 1000, piece_y_max)) + + answer_height = 150 + + if (os!='auto'): + if(question_start == question_end or question_end == piece_y_max): + return '', [], 0 + + if (os=='auto'): + question_im = image.crop((50, 350, 1000, 560)) # 坚果 pro1 + choices_im = image.crop((75, 535, 990, 1150)) + question_end = -1 + + # question = image.crop((75, 315, 1167, 789)) # iPhone 7P + # question_im.save('question.jpg'); + # choices_im.save('choices.jpg'); # 边缘增强滤波,不一定适用 #question_im = question_im.filter(ImageFilter.EDGE_ENHANCE) #choices_im = choices_im.filter(ImageFilter.EDGE_ENHANCE) @@ -78,23 +299,16 @@ def ocr_img(image): # 处理将"一"识别为"_"的问题 question = question.replace("_", "一") - choice = pytesseract.image_to_string(choices_im, lang='chi_sim', config=tessdata_dir_config) # 处理将"一"识别为"_"的问题 - choices = choice.strip().replace("_", "一").split("\n") - choices = [ x for x in choices if x != '' ] - - # 兼容截图设置不对,意外出现问题为两行或三行 - if (choices[0].endswith('?')): - question += choices[0] - choices.pop(0) - if (choices[1].endswith('?')): - question += choices[0] - question += choices[1] - choices.pop(0) - choices.pop(1) - - return question, choices + choices = choice.strip().replace("_", "一").replace(" ", "").split("\n") + + choices = [ x for x in choices if x != '' ] + + if (len(choices)): + choices = choices[:3] + + return question, choices, question_end if __name__ == '__main__': diff --git a/index.html b/index.html new file mode 100644 index 0000000..d04141f --- /dev/null +++ b/index.html @@ -0,0 +1,54 @@ + + + + + + + + + +蚂蚁部落 + + + + + +
+ + \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index bff61d7..0c2a203 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ matplotlib==2.0.2 Pillow==4.1.1 colorama==0.3.9 pytesseract==0.1.7 +pypiwin32 diff --git a/screenshot.png b/screenshot.png index b2477ee..755804f 100644 Binary files a/screenshot.png and b/screenshot.png differ