diff --git a/scraper.py b/scraper.py old mode 100644 new mode 100755 index e8cfef5..8240383 --- a/scraper.py +++ b/scraper.py @@ -1,5 +1,6 @@ #!/usr/bin/env python import os, imghdr, urllib, urllib2, sys, Image, argparse, zlib, unicodedata, re +import subprocess import difflib from xml.etree import ElementTree as ET from xml.etree.ElementTree import Element, SubElement @@ -21,7 +22,7 @@ def normalize(s): return ''.join((c for c in unicodedata.normalize('NFKD', unicode(s)) if unicodedata.category(c) != 'Mn')) -def fixExtension(file): +def fixExtension(file): newfile="%s.%s" % (os.path.splitext(file)[0],imghdr.what(file)) os.rename(file, newfile) return newfile @@ -37,14 +38,19 @@ def readConfig(file): name=line.split('=')[1] if "PATH=" in line: path=line.split('=')[1] + if "COMMAND=" in line: + command=line.split('=')[1] + command=re.sub(r'.*?runcommand.sh\s*[0-9]*\s*', r'', command) + command=re.sub(r'\s*%.*?%\s*', r'', command) + command=command.replace('"','') elif "EXTENSION" in line: ext=line.split('=')[1] elif "PLATFORMID" in line: - pid=line.split('=')[1] + pid=int(line.split('=')[1]) if not pid: continue else: - system=(name,path,ext,pid) + system=(name,path,command,ext,pid) systems.append(system) config.close() return systems @@ -98,8 +104,9 @@ def getFiles(base): dict.add(filepath) return dict -def getGameInfo(file,platformID): +def getGameInfo(file,platformID,titlesDict): title=re.sub(r'\[.*?\]|\(.*?\)', '', os.path.splitext(os.path.basename(file))[0]).strip() + keeptitle = False if args.crc: crcvalue=crc(file) if args.v: @@ -112,24 +119,29 @@ def getGameInfo(file,platformID): else: URL = "http://thegamesdb.net/api/GetGame.php" platform = getPlatformName(platformID) - if SCUMMVM: + if SCUMMVM: title = getScummvmTitle(title) args.fix = True #Scummvm doesn't have a proper platformID so we search all - if platform == "Arcade" or platform == "NeoGeo": title = getRealArcadeTitle(title) - + if platformID == 23 or platformID == 24: + if title in titlesDict: + title = titlesDict[title] + keeptitle = True + if args.fix: - try: + try: fixreq = urllib2.Request("http://thegamesdb.net/api/GetGamesList.php", urllib.urlencode({'name' : title, 'platform' : platform}), headers={'User-Agent' : "RetroPie Scraper Browser"}) fixdata=ET.parse(urllib2.urlopen(fixreq)).getroot() - if fixdata.find("Game") is not None: + if fixdata.find("Game") is not None: #values={ 'id': fixdata.findall("Game/id")[chooseResult(fixdata)].text if args.m else fixdata.find("Game/id").text } values={ 'id': fixdata.findall("Game/id")[chooseResult(fixdata)].text if args.m else fixdata.findall("Game/id")[autoChooseBestResult(fixdata,title)].text } - + except: return None else: - values={'name':title,'platform':platform} + searchTitle = title.split('(',1) + searchTitle = searchTitle[0].strip() + values={'name':searchTitle,'platform':platform} try: req = urllib2.Request(URL,urllib.urlencode(values), headers={'User-Agent' : "RetroPie Scraper Browser"}) @@ -145,9 +157,18 @@ def getGameInfo(file,platformID): if result is not None and result.find("title").text is not None: return result elif data.find("Game") is not None: - return data.findall("Game")[chooseResult(data)] if args.m else data.findall("Game")[autoChooseBestResult(data,title)] + if args.m: + game = data.findall("Game")[chooseResult(data)] + else: + game = data.findall("Game")[autoChooseBestResult(data,title)] + if keeptitle: + game.find("GameTitle").text = title + return game else: - return None + game = ET.Element('Game') + gameTitle = ET.SubElement(game, 'GameTitle') + gameTitle.text = title + return game except Exception, err: print "Skipping game..(%s)" % str(err) return None @@ -166,7 +187,7 @@ def getGamePlatform(nodes): return getText(nodes.find("system_title")) else: return getText(nodes.find("Platform")) - + def getScummvmTitle(title): print "Fetching real title for %s from scummvm.org" % title URL = "http://scummvm.org/compatibility/DEV/%s" % title.split("-")[0] @@ -177,18 +198,6 @@ def getScummvmTitle(title): return m.groups()[0] else: print "No title found for %s on scummvm.org" % title - return title - -def getRealArcadeTitle(title): - print "Fetching real title for %s from mamedb.com" % title - URL = "http://www.mamedb.com/game/%s" % title - data = "".join(urllib2.urlopen(URL).readlines()) - m = re.search('Name:.*(.+) .*
Year', data) - if m: - print "Found real title %s for %s on mamedb.com" % (m.group(1), title) - return m.group(1) - else: - print "No title found for %s on mamedb.com" % title return title def getDescription(nodes): @@ -267,8 +276,8 @@ def chooseResult(nodes): return int(raw_input("Select a result (or press Enter to skip): ")) else: return 0 - - + + def autoChooseBestResult(nodes,t): results=nodes.findall('Game') t = t.split('(', 1)[0] @@ -296,14 +305,42 @@ def autoChooseBestResult(nodes,t): else: return 0 +def getMameTitles(command): + titlesDict = {} + if "mame4all" in command: + output = subprocess.check_output([command, '-listfull']) + r = re.compile('\s+') + for line in output.splitlines(): + file = line[0:10].strip() + title = line[10:].strip('"') + titlesDict[file] = title + if "fba2x" in command: + subprocess.call([command, '--gamelist']) + file = os.path.join(os.path.dirname(command), 'gamelist.txt') + with open(file) as f: + output = f.readlines() + for line in output: + entries = line.split('|') + if len(entries) == 10: + titlesDict[entries[1].strip()] = entries[3].strip() + + return titlesDict + def scanFiles(SystemInfo): + status = "ok" + name=SystemInfo[0] if name == "scummvm": global SCUMMVM SCUMMVM = True folderRoms=SystemInfo[1] - extension=SystemInfo[2] - platformID=SystemInfo[3] + command=SystemInfo[2] + extension=SystemInfo[3] + platformID=SystemInfo[4] + + titlesDict = {} + if platformID == 23 or platformID == 24: + titlesDict = getMameTitles(command) global gamelistExists global existinglist @@ -341,20 +378,20 @@ def scanFiles(SystemInfo): try: filepath=os.path.abspath(os.path.join(root, files)) filename = os.path.splitext(files)[0] - + if gamelistExists and not args.f: if skipGame(existinglist,filepath): continue - + print "Trying to identify %s.." % files - - data=getGameInfo(filepath, platformID) - + + data=getGameInfo(filepath, platformID, titlesDict) + if data is None: continue else: result=data - + str_title=getTitle(result) str_des=getDescription(result) str_img=getImage(result) @@ -362,7 +399,7 @@ def scanFiles(SystemInfo): str_pub=getPublisher(result) str_dev=getDeveloper(result) lst_genres=getGenres(result) - + if str_title is not None: game = SubElement(gamelist, 'game') path = SubElement(game, 'path') @@ -373,49 +410,54 @@ def scanFiles(SystemInfo): publisher=SubElement(game, 'publisher') developer=SubElement(game, 'developer') genres=SubElement(game, 'genres') - + path.text=filepath name.text=str_title print "Game Found: %s" % str_title - + if str_des is not None: desc.text=str_des - + if str_img is not None and args.noimg is False: if args.newpath is True: imgpath="./" + filename+os.path.splitext(str_img)[1] else: imgpath=os.path.abspath(os.path.join(root, filename+os.path.splitext(str_img)[1])) - + print "Downloading boxart.." - + downloadBoxart(str_img,imgpath) imgpath=fixExtension(imgpath) image.text=imgpath - + if args.w: try: resizeImage(Image.open(imgpath),imgpath) except: print "Image resize error" - + if str_rd is not None: releasedate.text=str_rd - + if str_pub is not None: publisher.text=str_pub - + if str_dev is not None: developer.text=str_dev - + if lst_genres is not None: for genre in lst_genres: newgenre = SubElement(genres, 'genre') newgenre.text=genre.strip() except KeyboardInterrupt: print "Ctrl+C detected. Closing work now..." + status = "break" + break except Exception as e: print "Exception caught! %s" % e + else: + continue + break if gamelist.find("game") is None: print "No new games added." @@ -423,6 +465,8 @@ def scanFiles(SystemInfo): print "{} games added.".format(len(gamelist)) exportList(gamelist) + return status + try: if os.getuid()==0: os.environ['HOME']="/home/"+os.getenv("SUDO_USER") @@ -454,6 +498,8 @@ def scanFiles(SystemInfo): sys.exit() else: for i,v in enumerate(ES_systems): - scanFiles(ES_systems[i]) + result = scanFiles(ES_systems[i]) + if result == "break": + break print "All done!"