Skip to content
142 changes: 94 additions & 48 deletions scraper.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env python
import os, imghdr, urllib, urllib2, sys, Image, argparse, zlib, unicodedata, re
import subprocess
import difflib
from xml.etree import ElementTree as ET
from xml.etree.ElementTree import Element, SubElement
Expand All @@ -21,7 +22,7 @@
def normalize(s):
return ''.join((c for c in unicodedata.normalize('NFKD', unicode(s)) if unicodedata.category(c) != 'Mn'))

def fixExtension(file):
def fixExtension(file):
newfile="%s.%s" % (os.path.splitext(file)[0],imghdr.what(file))
os.rename(file, newfile)
return newfile
Expand All @@ -37,14 +38,19 @@ def readConfig(file):
name=line.split('=')[1]
if "PATH=" in line:
path=line.split('=')[1]
if "COMMAND=" in line:
command=line.split('=')[1]
command=re.sub(r'.*?runcommand.sh\s*[0-9]*\s*', r'', command)
command=re.sub(r'\s*%.*?%\s*', r'', command)
command=command.replace('"','')
elif "EXTENSION" in line:
ext=line.split('=')[1]
elif "PLATFORMID" in line:
pid=line.split('=')[1]
pid=int(line.split('=')[1])
if not pid:
continue
else:
system=(name,path,ext,pid)
system=(name,path,command,ext,pid)
systems.append(system)
config.close()
return systems
Expand Down Expand Up @@ -98,8 +104,9 @@ def getFiles(base):
dict.add(filepath)
return dict

def getGameInfo(file,platformID):
def getGameInfo(file,platformID,titlesDict):
title=re.sub(r'\[.*?\]|\(.*?\)', '', os.path.splitext(os.path.basename(file))[0]).strip()
keeptitle = False
if args.crc:
crcvalue=crc(file)
if args.v:
Expand All @@ -112,24 +119,29 @@ def getGameInfo(file,platformID):
else:
URL = "http://thegamesdb.net/api/GetGame.php"
platform = getPlatformName(platformID)
if SCUMMVM:
if SCUMMVM:
title = getScummvmTitle(title)
args.fix = True #Scummvm doesn't have a proper platformID so we search all
if platform == "Arcade" or platform == "NeoGeo": title = getRealArcadeTitle(title)

if platformID == 23 or platformID == 24:
if title in titlesDict:
title = titlesDict[title]
keeptitle = True

if args.fix:
try:
try:
fixreq = urllib2.Request("http://thegamesdb.net/api/GetGamesList.php", urllib.urlencode({'name' : title, 'platform' : platform}), headers={'User-Agent' : "RetroPie Scraper Browser"})
fixdata=ET.parse(urllib2.urlopen(fixreq)).getroot()
if fixdata.find("Game") is not None:
if fixdata.find("Game") is not None:

#values={ 'id': fixdata.findall("Game/id")[chooseResult(fixdata)].text if args.m else fixdata.find("Game/id").text }
values={ 'id': fixdata.findall("Game/id")[chooseResult(fixdata)].text if args.m else fixdata.findall("Game/id")[autoChooseBestResult(fixdata,title)].text }

except:
return None
else:
values={'name':title,'platform':platform}
searchTitle = title.split('(',1)
searchTitle = searchTitle[0].strip()
values={'name':searchTitle,'platform':platform}

try:
req = urllib2.Request(URL,urllib.urlencode(values), headers={'User-Agent' : "RetroPie Scraper Browser"})
Expand All @@ -145,9 +157,18 @@ def getGameInfo(file,platformID):
if result is not None and result.find("title").text is not None:
return result
elif data.find("Game") is not None:
return data.findall("Game")[chooseResult(data)] if args.m else data.findall("Game")[autoChooseBestResult(data,title)]
if args.m:
game = data.findall("Game")[chooseResult(data)]
else:
game = data.findall("Game")[autoChooseBestResult(data,title)]
if keeptitle:
game.find("GameTitle").text = title
return game
else:
return None
game = ET.Element('Game')
gameTitle = ET.SubElement(game, 'GameTitle')
gameTitle.text = title
return game
except Exception, err:
print "Skipping game..(%s)" % str(err)
return None
Expand All @@ -166,7 +187,7 @@ def getGamePlatform(nodes):
return getText(nodes.find("system_title"))
else:
return getText(nodes.find("Platform"))

def getScummvmTitle(title):
print "Fetching real title for %s from scummvm.org" % title
URL = "http://scummvm.org/compatibility/DEV/%s" % title.split("-")[0]
Expand All @@ -177,18 +198,6 @@ def getScummvmTitle(title):
return m.groups()[0]
else:
print "No title found for %s on scummvm.org" % title
return title

def getRealArcadeTitle(title):
print "Fetching real title for %s from mamedb.com" % title
URL = "http://www.mamedb.com/game/%s" % title
data = "".join(urllib2.urlopen(URL).readlines())
m = re.search('<b>Name:.*</b>(.+) .*<br/><b>Year', data)
if m:
print "Found real title %s for %s on mamedb.com" % (m.group(1), title)
return m.group(1)
else:
print "No title found for %s on mamedb.com" % title
return title

def getDescription(nodes):
Expand Down Expand Up @@ -267,8 +276,8 @@ def chooseResult(nodes):
return int(raw_input("Select a result (or press Enter to skip): "))
else:
return 0


def autoChooseBestResult(nodes,t):
results=nodes.findall('Game')
t = t.split('(', 1)[0]
Expand Down Expand Up @@ -296,14 +305,42 @@ def autoChooseBestResult(nodes,t):
else:
return 0

def getMameTitles(command):
titlesDict = {}
if "mame4all" in command:
output = subprocess.check_output([command, '-listfull'])
r = re.compile('\s+')
for line in output.splitlines():
file = line[0:10].strip()
title = line[10:].strip('"')
titlesDict[file] = title
if "fba2x" in command:
subprocess.call([command, '--gamelist'])
file = os.path.join(os.path.dirname(command), 'gamelist.txt')
with open(file) as f:
output = f.readlines()
for line in output:
entries = line.split('|')
if len(entries) == 10:
titlesDict[entries[1].strip()] = entries[3].strip()

return titlesDict

def scanFiles(SystemInfo):
status = "ok"

name=SystemInfo[0]
if name == "scummvm":
global SCUMMVM
SCUMMVM = True
folderRoms=SystemInfo[1]
extension=SystemInfo[2]
platformID=SystemInfo[3]
command=SystemInfo[2]
extension=SystemInfo[3]
platformID=SystemInfo[4]

titlesDict = {}
if platformID == 23 or platformID == 24:
titlesDict = getMameTitles(command)

global gamelistExists
global existinglist
Expand Down Expand Up @@ -341,28 +378,28 @@ def scanFiles(SystemInfo):
try:
filepath=os.path.abspath(os.path.join(root, files))
filename = os.path.splitext(files)[0]

if gamelistExists and not args.f:
if skipGame(existinglist,filepath):
continue

print "Trying to identify %s.." % files
data=getGameInfo(filepath, platformID)

data=getGameInfo(filepath, platformID, titlesDict)

if data is None:
continue
else:
result=data

str_title=getTitle(result)
str_des=getDescription(result)
str_img=getImage(result)
str_rd=getRelDate(result)
str_pub=getPublisher(result)
str_dev=getDeveloper(result)
lst_genres=getGenres(result)

if str_title is not None:
game = SubElement(gamelist, 'game')
path = SubElement(game, 'path')
Expand All @@ -373,56 +410,63 @@ def scanFiles(SystemInfo):
publisher=SubElement(game, 'publisher')
developer=SubElement(game, 'developer')
genres=SubElement(game, 'genres')

path.text=filepath
name.text=str_title
print "Game Found: %s" % str_title

if str_des is not None:
desc.text=str_des

if str_img is not None and args.noimg is False:
if args.newpath is True:
imgpath="./" + filename+os.path.splitext(str_img)[1]
else:
imgpath=os.path.abspath(os.path.join(root, filename+os.path.splitext(str_img)[1]))

print "Downloading boxart.."

downloadBoxart(str_img,imgpath)
imgpath=fixExtension(imgpath)
image.text=imgpath

if args.w:
try:
resizeImage(Image.open(imgpath),imgpath)
except:
print "Image resize error"

if str_rd is not None:
releasedate.text=str_rd

if str_pub is not None:
publisher.text=str_pub

if str_dev is not None:
developer.text=str_dev

if lst_genres is not None:
for genre in lst_genres:
newgenre = SubElement(genres, 'genre')
newgenre.text=genre.strip()
except KeyboardInterrupt:
print "Ctrl+C detected. Closing work now..."
status = "break"
break
except Exception as e:
print "Exception caught! %s" % e
else:
continue
break

if gamelist.find("game") is None:
print "No new games added."
else:
print "{} games added.".format(len(gamelist))
exportList(gamelist)

return status

try:
if os.getuid()==0:
os.environ['HOME']="/home/"+os.getenv("SUDO_USER")
Expand Down Expand Up @@ -454,6 +498,8 @@ def scanFiles(SystemInfo):
sys.exit()
else:
for i,v in enumerate(ES_systems):
scanFiles(ES_systems[i])
result = scanFiles(ES_systems[i])
if result == "break":
break

print "All done!"