Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 113 additions & 8 deletions core/python/Sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import random
from array import array
from math import sqrt
import subprocess

# Logging
import logging
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -206,17 +208,26 @@ def fromDPMDirectory(cls, name, directory, redirector='root://hephyse.oeaw.ac.at
logger.info("Not checking your proxy. Asuming you know it's still valid.")
logger.info( "Using proxy %s"%proxy )

import subprocess

files = []
for d in directories:
cmd = [ "xrdfs", redirector, "ls", d ]
fileList = subprocess.check_output( cmd ).split("\n")[:-1]
fileList = []
for i in range(10):
try:
fileList = [ file for file in subprocess.check_output( cmd ).split("\n")[:-1] ]
break
except:
if i<9: pass
counter = 0
for filename in fileList:
if filename.endswith(".root"):
files.append( redirector + os.path.join( d, filename ) )
counter += 1
if maxN is not None and maxN>0 and len(files)>=maxN:
break
if counter==0:
raise helpers.EmptySampleError( "No root files found in directory %s." %d )

sample = cls(name = name, treeName = treeName, files = files, normalization = normalization, xSection = xSection,\
selectionString = selectionString, weightString = weightString,
isData = isData, color=color, texName = texName)
Expand Down Expand Up @@ -269,7 +280,6 @@ def nanoAODfromDAS(cls, name, DASname, instance = 'global', redirector='root://h
get nanoAOD from DAS and make a local copy on afs
if overwrite is true, old entries will be overwritten, no matter what the old entry contains. if overwrite=='update', file-list and normalization are checked, and only if they potentially changed the old entry is overwritten.
'''
from multiprocessing import Pool
from RootTools.fwlite.Database import Database
import json

Expand Down Expand Up @@ -390,13 +400,107 @@ def _dasPopen(dbs):
sample.nEvents = int(nEvents)
return sample

@classmethod
def nanoAODfromDPM(cls, name, directory, redirector='root://hephyse.oeaw.ac.at/', dbFile=None, overwrite=False, treeName = "Events", maxN = None, \
selectionString = None, weightString = None, xSection=-1,
isData = False, color = 0, texName = None, multithreading=True, genWeight='genWeight', json=None, localSite='T2_AT_Vienna'):
'''
get nanoAOD from DPM, similar to nanoAODfromDAS but for local files, the "DAS" entry in the database is kept for compatibility
if overwrite is true, old entries will be overwritten, no matter what the old entry contains. if overwrite=='update', file-list and normalization are checked, and only if they potentially changed the old entry is overwritten.
'''
from RootTools.fwlite.Database import Database
import json

maxN = maxN if maxN is not None and maxN>0 else None
limit = maxN if maxN else 0

n_cache_files = 0
# Don't use the cache on partial queries
if dbFile is not None and ( maxN<0 or maxN is None ):
# the column DAS will still be called DAS (not dir or directory) otherwise we run into problems in having "fromDPM" and "fromDAS" samples in one cache file
cache = Database(dbFile, "fileCache", ["name", "DAS", "normalization", "nEvents"])
n_cache_files = cache.contains({'name':name, 'DAS':directory})
else:
cache = None

# first check if there are already files in the cache
normalizationFromCache = 0.
if n_cache_files:
filesFromCache = [ f["value"] for f in cache.getDicts({'name':name, 'DAS':directory}) ]
normalizationFromCache = cache.getDicts({'name':name, 'DAS':directory})[0]["normalization"]
nEventsFromCache = cache.getDicts({'name':name, 'DAS':directory})[0]["nEvents"]
else:
filesFromCache = []

# if we don't want to overwrite, and there's a filelist in the cache we're already done
if n_cache_files and not overwrite:
files = filesFromCache
normalization = normalizationFromCache
nEvents = nEventsFromCache

logger.info('Found sample %s in cache %s, return %i files.', name, dbFile, len(files))

else:
# only entered if overwrite is not set or sample not in the cache yet

sampleName = directory.rstrip('/')
query, qwhat = sampleName, "dataset"

files = []
cmd = [ "xrdfs", redirector, "ls", directory ]
fileList = [ file for file in subprocess.check_output( cmd ).split("\n")[:-1] ]

for filename in fileList:
if filename.endswith(".root"):
# files.append( redirector + os.path.join( directory, filename ) )
files.append( os.path.join( directory, filename ) )
if maxN is not None and maxN>0 and len(files)>=maxN:
break

if (sorted(files) == sorted(filesFromCache)) and float(normalizationFromCache) > 0.0 and overwrite=='update':
# if the files didn't change we don't need to read the normalization again (slowest part!). If the norm was 0 previously, also get it again.
logger.info("File list for %s didn't change. Skipping.", name)
normalization = normalizationFromCache
nEvents = nEventsFromCache
logger.info('Sample %s from cache %s returned %i files.', name, dbFile, len(files))

else:
if overwrite:
# remove old entry
cache.removeObjects({"name":name, 'DAS':directory})
logger.info("Removed old DB entry.")

# need to read the proper normalization for MC
logger.info("Reading normalization. This is slow, so grab a coffee.")
tmp_sample = cls(name=name, files=[ redirector + f for f in files], treeName = treeName, selectionString = selectionString, weightString = weightString,
isData = isData, color=color, texName = texName, xSection = xSection, normalization=1)
normalization = tmp_sample.getYieldFromDraw('(1)', genWeight if directory.endswith('SIM') or not 'Run20' in directory else "1")['val']
logger.info("Got normalization %s", normalization)
nEvents = int(tmp_sample.getEventList().GetN())
logger.info("Got number of events %s", nEvents)

for f in files:
if cache is not None:
cache.add({"name":name, 'DAS':directory, 'normalization':str(normalization), 'nEvents':nEvents}, f, save=True)

logger.info('Found sample %s in cache %s, return %i files.', name, dbFile, len(files))


if limit>0: files=files[:limit]
sample = cls(name=name, files=[ redirector+'/'+f for f in files], treeName = treeName, selectionString = selectionString, weightString = weightString,
isData = isData, color=color, texName = texName, normalization=float(normalization), xSection = xSection)
sample.DAS = directory
sample.json = json
sample.nEvents = int(nEvents)
return sample

@classmethod
def fromCMGOutput(cls, name, baseDirectory, treeFilename = 'tree.root', chunkString = None, treeName = 'tree', maxN = None, \
selectionString = None, xSection = -1, weightString = None,
isData = False, color = 0, texName = None):
'''Load a CMG output directory from e.g. unzipped crab output in the 'Chunks' directory structure.
Expects the presence of the tree root file and the SkimReport.txt
'''
''' Load a CMG output directory from e.g. unzipped crab output in the 'Chunks' directory structure.
Expects the presence of the tree root file and the SkimReport.txt
'''
from cmg_helpers import read_cmg_normalization
maxN = maxN if maxN is not None and maxN>0 else None

Expand Down Expand Up @@ -612,7 +716,8 @@ def __loadChain(self):
except IOError as e:
logger.error( "Could not load file %s", f )
#raise e

if counter==0:
raise helpers.EmptySampleError( "No root files for sample %s." %self.name )
logger.debug( "Loaded %i files for sample '%s'.", counter, self.name )

# Add friends
Expand Down
5 changes: 4 additions & 1 deletion examples/example_treeMaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import sys
import logging
import ROOT
import os

#RootTools
from RootTools.core.standard import *
Expand All @@ -25,7 +26,7 @@


# from files
s0 = Sample.fromFiles("s0", files = ["example_data/file_0.root"], treeName = "Events")
s0 = Sample.fromFiles("s0", files = [os.path.expandvars("$CMSSW_BASE/src/RootTools/examples/example_data/file_0.root")], treeName = "Events")

read_variables = [ TreeVariable.fromString( "nJet/I"), TreeVariable.fromString('Jet[pt/F,eta/F,phi/F]' ) ] \
+ [ TreeVariable.fromString(x) for x in [ 'met_pt/F', 'met_phi/F' ] ]
Expand All @@ -51,3 +52,5 @@ def filler(event):
maker.start()
while reader.run():
maker.run()

logger.info("Success!")
2 changes: 1 addition & 1 deletion fwlite/python/Database.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def getObjects(self, key):
return objs

except sqlite3.DatabaseError as e:
logger.error( "There seems to be an issue with the database, trying to read again." )
logger.error( "There seems to be an issue with the database, trying to read again from %s.", self.database_file )
logger.info( "Attempt no %i", i )
self.close()
self.connect()
Expand Down
51 changes: 51 additions & 0 deletions fwlite/python/FWLiteSample.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,57 @@ def fromDirectory(cls, name, directory, color = 0, texName = None, maxN = None):

return cls(name = name, files = files, color=color, texName = texName)

@classmethod
def fromDPMDirectory(cls, name, directory, prefix='root://hephyse.oeaw.ac.at/', texName = None, maxN = None, dbFile=None, overwrite=False, skipCheck = False):

maxN = maxN if maxN is not None and maxN>0 else None
limit = maxN if maxN else 0

n_cache_files = 0
# Don't use the cache on partial queries
if dbFile is not None and ( maxN<0 or maxN is None ):
cache = Database(dbFile, "fileCache", ["name"])
n_cache_files = cache.contains({'name':name})
else:
cache = None

if n_cache_files and not overwrite:
files = [ f["value"] for f in cache.getDicts({'name':name}) ]
logger.info('Found sample %s in cache %s, return %i files.', name, dbFile, len(files))
else:
if overwrite:
cache.removeObjects({"name":name})

def _dasPopen(dbs):
if 'LSB_JOBID' in os.environ:
raise RuntimeError, "Trying to do a DAS query while in a LXBatch job (env variable LSB_JOBID defined)\nquery was: %s" % dbs
logger.info('DAS query\t: %s', dbs)
return os.popen(dbs)

files = []
dbs='xrdfs %s ls %s'%(prefix,directory)
dbsOut = _dasPopen(dbs).readlines()

for line in dbsOut:
if line.startswith('/store/'):
line = line.rstrip()
filename = line
try:
if skipCheck or helpers.checkRootFile(prefix+filename):
files.append(filename)
except IOError:
logger.warning( "IOError for file %s. Skipping.", filename )

if cache is not None:
cache.add({"name":name}, filename, save=True)

if limit>0: files=files[:limit]

result = cls(name, files=[prefix+file for file in files], texName = texName)
result.DASname = prefix + directory.rstrip("/")
return result


@classmethod
def fromDAS(cls, name, dataset, instance = 'global', prefix='root://cms-xrd-global.cern.ch/', texName = None, maxN = None, dbFile=None, overwrite=False, skipCheck = False):
''' Make sample from DAS.
Expand Down
35 changes: 27 additions & 8 deletions plot/python/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ def draw(plot, \

# Draw the histos
same = ""
stuff = []
for i, l in enumerate(histos):
for j, h in enumerate(l):
# Get draw option. Neither Clone nor copy preserves attributes of histo
Expand All @@ -438,11 +439,12 @@ def draw(plot, \
h.GetYaxis().SetTitleOffset( 1.6 )

for modification in histModifications: modification(h)
#if drawOption=="e1": dataHist = h
if drawOption=="e1" or drawOption=="e0": stuff.append(h)#dataHist = h
h.Draw(drawOption+same)
same = "same"

topPad.RedrawAxis()
if not drawOption == 'AH':
topPad.RedrawAxis()
# Make the legend
if legend is not None:
legend_ = ROOT.TLegend(*legendCoordinates)
Expand Down Expand Up @@ -473,12 +475,21 @@ def draw(plot, \

for o in drawObjects:
if o:
if type(o) in [ ROOT.TF1, ROOT.TGraph, ROOT.TEfficiency ]:
o.Draw('same')
if type(o) in [ ROOT.TF1, ROOT.TGraph, ROOT.TEfficiency, ROOT.TH1F, ROOT.TH1D ]:
if hasattr(o, 'drawOption'):
o.Draw('same '+o.drawOption)
else:
o.Draw('same')
else:
o.Draw()
else:
logger.debug( "drawObjects has something I can't Draw(): %r", o)

# re-draw the main objects (ratio histograms) after the objects, otherwise they might be hidden
for h_main in stuff:
drawOption = h_main.drawOption if hasattr(h_main, "drawOption") else "hist"
h_main.Draw(drawOption+same)

# Make a ratio plot
if ratio is not None:
bottomPad.cd()
Expand Down Expand Up @@ -526,19 +537,20 @@ def draw(plot, \
if ratio.has_key('histModifications'):
for modification in ratio['histModifications']: modification(h_ratio)
drawOption = h_ratio.drawOption if hasattr(h_ratio, "drawOption") else "hist"
if drawOption == "e1": # hacking to show error bars within panel when central value is off scale
if drawOption == "e1" or drawOption == "e0": # hacking to show error bars within panel when central value is off scale
graph = ROOT.TGraphAsymmErrors(h_ratio) # cloning in order to get layout
graph.Set(0)
for bin in range(1, h_ratio.GetNbinsX()+1): # do not show error bars on hist
h_ratio.SetBinError(bin, 0.0001)
center = h_ratio.GetBinCenter(bin)
val = h_ratio.GetBinContent(bin)
errUp = num.GetBinErrorUp(bin)/den.GetBinContent(bin) if val > 0 else 0
errDown = num.GetBinErrorLow(bin)/den.GetBinContent(bin) if val > 0 else 0
errUp = num.GetBinErrorUp(bin)/den.GetBinContent(bin) if den.GetBinContent(bin) > 0 else 0
errDown = num.GetBinErrorLow(bin)/den.GetBinContent(bin) if den.GetBinContent(bin) > 0 else 0
graph.SetPoint(bin, center, val)
graph.SetPointError(bin, 0, 0, errDown, errUp)
h_ratio.Draw("e0"+same)
graph.Draw("P0 same")
graph.drawOption = "P0"
stuff.append( graph )
else:
h_ratio.Draw(drawOption+same)
Expand All @@ -555,9 +567,16 @@ def draw(plot, \

for o in ratio['drawObjects']:
if o:
o.Draw()
if hasattr(o, 'drawOption'):
o.Draw(o.drawOption)
else:
o.Draw()
else:
logger.debug( "ratio['drawObjects'] has something I can't Draw(): %r", o)
# re-draw the main objects (ratio histograms) after the objects, otherwise they might be hidden
for h_ratio in stuff:
drawOption = h_ratio.drawOption if hasattr(h_ratio, "drawOption") else "hist"
h_ratio.Draw(drawOption+same)

if not os.path.exists(plot_directory):
try:
Expand Down
4 changes: 2 additions & 2 deletions plot/python/styles.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import ROOT

def errorStyle( color, markerStyle = 20, markerSize = 1, width = 1):
def errorStyle( color, markerStyle = 20, markerSize = 1, width = 1, drawOption='e1'):
def func( histo ):
histo.SetLineColor( color )
histo.SetMarkerSize( markerSize )
histo.SetMarkerStyle( markerStyle )
histo.SetMarkerColor( color )
# histo.SetFillColor( color )
histo.SetLineWidth( width )
histo.drawOption = "e1"
histo.drawOption = drawOption
return
return func

Expand Down