diff --git a/core/python/Sample.py b/core/python/Sample.py index c8fc40b..9a60d1f 100644 --- a/core/python/Sample.py +++ b/core/python/Sample.py @@ -9,6 +9,8 @@ import random from array import array from math import sqrt +import subprocess + # Logging import logging logger = logging.getLogger(__name__) @@ -206,17 +208,26 @@ def fromDPMDirectory(cls, name, directory, redirector='root://hephyse.oeaw.ac.at logger.info("Not checking your proxy. Asuming you know it's still valid.") logger.info( "Using proxy %s"%proxy ) - import subprocess - files = [] for d in directories: cmd = [ "xrdfs", redirector, "ls", d ] - fileList = subprocess.check_output( cmd ).split("\n")[:-1] + fileList = [] + for i in range(10): + try: + fileList = [ file for file in subprocess.check_output( cmd ).split("\n")[:-1] ] + break + except: + if i<9: pass + counter = 0 for filename in fileList: if filename.endswith(".root"): files.append( redirector + os.path.join( d, filename ) ) + counter += 1 if maxN is not None and maxN>0 and len(files)>=maxN: break + if counter==0: + raise helpers.EmptySampleError( "No root files found in directory %s." %d ) + sample = cls(name = name, treeName = treeName, files = files, normalization = normalization, xSection = xSection,\ selectionString = selectionString, weightString = weightString, isData = isData, color=color, texName = texName) @@ -269,7 +280,6 @@ def nanoAODfromDAS(cls, name, DASname, instance = 'global', redirector='root://h get nanoAOD from DAS and make a local copy on afs if overwrite is true, old entries will be overwritten, no matter what the old entry contains. if overwrite=='update', file-list and normalization are checked, and only if they potentially changed the old entry is overwritten. ''' - from multiprocessing import Pool from RootTools.fwlite.Database import Database import json @@ -390,13 +400,107 @@ def _dasPopen(dbs): sample.nEvents = int(nEvents) return sample + @classmethod + def nanoAODfromDPM(cls, name, directory, redirector='root://hephyse.oeaw.ac.at/', dbFile=None, overwrite=False, treeName = "Events", maxN = None, \ + selectionString = None, weightString = None, xSection=-1, + isData = False, color = 0, texName = None, multithreading=True, genWeight='genWeight', json=None, localSite='T2_AT_Vienna'): + ''' + get nanoAOD from DPM, similar to nanoAODfromDAS but for local files, the "DAS" entry in the database is kept for compatibility + if overwrite is true, old entries will be overwritten, no matter what the old entry contains. if overwrite=='update', file-list and normalization are checked, and only if they potentially changed the old entry is overwritten. + ''' + from RootTools.fwlite.Database import Database + import json + + maxN = maxN if maxN is not None and maxN>0 else None + limit = maxN if maxN else 0 + + n_cache_files = 0 + # Don't use the cache on partial queries + if dbFile is not None and ( maxN<0 or maxN is None ): + # the column DAS will still be called DAS (not dir or directory) otherwise we run into problems in having "fromDPM" and "fromDAS" samples in one cache file + cache = Database(dbFile, "fileCache", ["name", "DAS", "normalization", "nEvents"]) + n_cache_files = cache.contains({'name':name, 'DAS':directory}) + else: + cache = None + + # first check if there are already files in the cache + normalizationFromCache = 0. + if n_cache_files: + filesFromCache = [ f["value"] for f in cache.getDicts({'name':name, 'DAS':directory}) ] + normalizationFromCache = cache.getDicts({'name':name, 'DAS':directory})[0]["normalization"] + nEventsFromCache = cache.getDicts({'name':name, 'DAS':directory})[0]["nEvents"] + else: + filesFromCache = [] + + # if we don't want to overwrite, and there's a filelist in the cache we're already done + if n_cache_files and not overwrite: + files = filesFromCache + normalization = normalizationFromCache + nEvents = nEventsFromCache + + logger.info('Found sample %s in cache %s, return %i files.', name, dbFile, len(files)) + + else: + # only entered if overwrite is not set or sample not in the cache yet + + sampleName = directory.rstrip('/') + query, qwhat = sampleName, "dataset" + + files = [] + cmd = [ "xrdfs", redirector, "ls", directory ] + fileList = [ file for file in subprocess.check_output( cmd ).split("\n")[:-1] ] + + for filename in fileList: + if filename.endswith(".root"): +# files.append( redirector + os.path.join( directory, filename ) ) + files.append( os.path.join( directory, filename ) ) + if maxN is not None and maxN>0 and len(files)>=maxN: + break + + if (sorted(files) == sorted(filesFromCache)) and float(normalizationFromCache) > 0.0 and overwrite=='update': + # if the files didn't change we don't need to read the normalization again (slowest part!). If the norm was 0 previously, also get it again. + logger.info("File list for %s didn't change. Skipping.", name) + normalization = normalizationFromCache + nEvents = nEventsFromCache + logger.info('Sample %s from cache %s returned %i files.', name, dbFile, len(files)) + + else: + if overwrite: + # remove old entry + cache.removeObjects({"name":name, 'DAS':directory}) + logger.info("Removed old DB entry.") + + # need to read the proper normalization for MC + logger.info("Reading normalization. This is slow, so grab a coffee.") + tmp_sample = cls(name=name, files=[ redirector + f for f in files], treeName = treeName, selectionString = selectionString, weightString = weightString, + isData = isData, color=color, texName = texName, xSection = xSection, normalization=1) + normalization = tmp_sample.getYieldFromDraw('(1)', genWeight if directory.endswith('SIM') or not 'Run20' in directory else "1")['val'] + logger.info("Got normalization %s", normalization) + nEvents = int(tmp_sample.getEventList().GetN()) + logger.info("Got number of events %s", nEvents) + + for f in files: + if cache is not None: + cache.add({"name":name, 'DAS':directory, 'normalization':str(normalization), 'nEvents':nEvents}, f, save=True) + + logger.info('Found sample %s in cache %s, return %i files.', name, dbFile, len(files)) + + + if limit>0: files=files[:limit] + sample = cls(name=name, files=[ redirector+'/'+f for f in files], treeName = treeName, selectionString = selectionString, weightString = weightString, + isData = isData, color=color, texName = texName, normalization=float(normalization), xSection = xSection) + sample.DAS = directory + sample.json = json + sample.nEvents = int(nEvents) + return sample + @classmethod def fromCMGOutput(cls, name, baseDirectory, treeFilename = 'tree.root', chunkString = None, treeName = 'tree', maxN = None, \ selectionString = None, xSection = -1, weightString = None, isData = False, color = 0, texName = None): - '''Load a CMG output directory from e.g. unzipped crab output in the 'Chunks' directory structure. - Expects the presence of the tree root file and the SkimReport.txt - ''' + ''' Load a CMG output directory from e.g. unzipped crab output in the 'Chunks' directory structure. + Expects the presence of the tree root file and the SkimReport.txt + ''' from cmg_helpers import read_cmg_normalization maxN = maxN if maxN is not None and maxN>0 else None @@ -612,7 +716,8 @@ def __loadChain(self): except IOError as e: logger.error( "Could not load file %s", f ) #raise e - + if counter==0: + raise helpers.EmptySampleError( "No root files for sample %s." %self.name ) logger.debug( "Loaded %i files for sample '%s'.", counter, self.name ) # Add friends diff --git a/examples/example_treeMaker.py b/examples/example_treeMaker.py index e32200b..0978702 100644 --- a/examples/example_treeMaker.py +++ b/examples/example_treeMaker.py @@ -5,6 +5,7 @@ import sys import logging import ROOT +import os #RootTools from RootTools.core.standard import * @@ -25,7 +26,7 @@ # from files -s0 = Sample.fromFiles("s0", files = ["example_data/file_0.root"], treeName = "Events") +s0 = Sample.fromFiles("s0", files = [os.path.expandvars("$CMSSW_BASE/src/RootTools/examples/example_data/file_0.root")], treeName = "Events") read_variables = [ TreeVariable.fromString( "nJet/I"), TreeVariable.fromString('Jet[pt/F,eta/F,phi/F]' ) ] \ + [ TreeVariable.fromString(x) for x in [ 'met_pt/F', 'met_phi/F' ] ] @@ -51,3 +52,5 @@ def filler(event): maker.start() while reader.run(): maker.run() + +logger.info("Success!") diff --git a/fwlite/python/Database.py b/fwlite/python/Database.py index 3363d3a..7c2b50f 100644 --- a/fwlite/python/Database.py +++ b/fwlite/python/Database.py @@ -68,7 +68,7 @@ def getObjects(self, key): return objs except sqlite3.DatabaseError as e: - logger.error( "There seems to be an issue with the database, trying to read again." ) + logger.error( "There seems to be an issue with the database, trying to read again from %s.", self.database_file ) logger.info( "Attempt no %i", i ) self.close() self.connect() diff --git a/fwlite/python/FWLiteSample.py b/fwlite/python/FWLiteSample.py index 8f6be01..f124a67 100644 --- a/fwlite/python/FWLiteSample.py +++ b/fwlite/python/FWLiteSample.py @@ -85,6 +85,57 @@ def fromDirectory(cls, name, directory, color = 0, texName = None, maxN = None): return cls(name = name, files = files, color=color, texName = texName) + @classmethod + def fromDPMDirectory(cls, name, directory, prefix='root://hephyse.oeaw.ac.at/', texName = None, maxN = None, dbFile=None, overwrite=False, skipCheck = False): + + maxN = maxN if maxN is not None and maxN>0 else None + limit = maxN if maxN else 0 + + n_cache_files = 0 + # Don't use the cache on partial queries + if dbFile is not None and ( maxN<0 or maxN is None ): + cache = Database(dbFile, "fileCache", ["name"]) + n_cache_files = cache.contains({'name':name}) + else: + cache = None + + if n_cache_files and not overwrite: + files = [ f["value"] for f in cache.getDicts({'name':name}) ] + logger.info('Found sample %s in cache %s, return %i files.', name, dbFile, len(files)) + else: + if overwrite: + cache.removeObjects({"name":name}) + + def _dasPopen(dbs): + if 'LSB_JOBID' in os.environ: + raise RuntimeError, "Trying to do a DAS query while in a LXBatch job (env variable LSB_JOBID defined)\nquery was: %s" % dbs + logger.info('DAS query\t: %s', dbs) + return os.popen(dbs) + + files = [] + dbs='xrdfs %s ls %s'%(prefix,directory) + dbsOut = _dasPopen(dbs).readlines() + + for line in dbsOut: + if line.startswith('/store/'): + line = line.rstrip() + filename = line + try: + if skipCheck or helpers.checkRootFile(prefix+filename): + files.append(filename) + except IOError: + logger.warning( "IOError for file %s. Skipping.", filename ) + + if cache is not None: + cache.add({"name":name}, filename, save=True) + + if limit>0: files=files[:limit] + + result = cls(name, files=[prefix+file for file in files], texName = texName) + result.DASname = prefix + directory.rstrip("/") + return result + + @classmethod def fromDAS(cls, name, dataset, instance = 'global', prefix='root://cms-xrd-global.cern.ch/', texName = None, maxN = None, dbFile=None, overwrite=False, skipCheck = False): ''' Make sample from DAS. diff --git a/plot/python/plotting.py b/plot/python/plotting.py index cae2653..a5538fd 100644 --- a/plot/python/plotting.py +++ b/plot/python/plotting.py @@ -413,6 +413,7 @@ def draw(plot, \ # Draw the histos same = "" + stuff = [] for i, l in enumerate(histos): for j, h in enumerate(l): # Get draw option. Neither Clone nor copy preserves attributes of histo @@ -438,11 +439,12 @@ def draw(plot, \ h.GetYaxis().SetTitleOffset( 1.6 ) for modification in histModifications: modification(h) - #if drawOption=="e1": dataHist = h + if drawOption=="e1" or drawOption=="e0": stuff.append(h)#dataHist = h h.Draw(drawOption+same) same = "same" - topPad.RedrawAxis() + if not drawOption == 'AH': + topPad.RedrawAxis() # Make the legend if legend is not None: legend_ = ROOT.TLegend(*legendCoordinates) @@ -473,12 +475,21 @@ def draw(plot, \ for o in drawObjects: if o: - if type(o) in [ ROOT.TF1, ROOT.TGraph, ROOT.TEfficiency ]: - o.Draw('same') + if type(o) in [ ROOT.TF1, ROOT.TGraph, ROOT.TEfficiency, ROOT.TH1F, ROOT.TH1D ]: + if hasattr(o, 'drawOption'): + o.Draw('same '+o.drawOption) + else: + o.Draw('same') else: o.Draw() else: logger.debug( "drawObjects has something I can't Draw(): %r", o) + + # re-draw the main objects (ratio histograms) after the objects, otherwise they might be hidden + for h_main in stuff: + drawOption = h_main.drawOption if hasattr(h_main, "drawOption") else "hist" + h_main.Draw(drawOption+same) + # Make a ratio plot if ratio is not None: bottomPad.cd() @@ -526,19 +537,20 @@ def draw(plot, \ if ratio.has_key('histModifications'): for modification in ratio['histModifications']: modification(h_ratio) drawOption = h_ratio.drawOption if hasattr(h_ratio, "drawOption") else "hist" - if drawOption == "e1": # hacking to show error bars within panel when central value is off scale + if drawOption == "e1" or drawOption == "e0": # hacking to show error bars within panel when central value is off scale graph = ROOT.TGraphAsymmErrors(h_ratio) # cloning in order to get layout graph.Set(0) for bin in range(1, h_ratio.GetNbinsX()+1): # do not show error bars on hist h_ratio.SetBinError(bin, 0.0001) center = h_ratio.GetBinCenter(bin) val = h_ratio.GetBinContent(bin) - errUp = num.GetBinErrorUp(bin)/den.GetBinContent(bin) if val > 0 else 0 - errDown = num.GetBinErrorLow(bin)/den.GetBinContent(bin) if val > 0 else 0 + errUp = num.GetBinErrorUp(bin)/den.GetBinContent(bin) if den.GetBinContent(bin) > 0 else 0 + errDown = num.GetBinErrorLow(bin)/den.GetBinContent(bin) if den.GetBinContent(bin) > 0 else 0 graph.SetPoint(bin, center, val) graph.SetPointError(bin, 0, 0, errDown, errUp) h_ratio.Draw("e0"+same) graph.Draw("P0 same") + graph.drawOption = "P0" stuff.append( graph ) else: h_ratio.Draw(drawOption+same) @@ -555,9 +567,16 @@ def draw(plot, \ for o in ratio['drawObjects']: if o: - o.Draw() + if hasattr(o, 'drawOption'): + o.Draw(o.drawOption) + else: + o.Draw() else: logger.debug( "ratio['drawObjects'] has something I can't Draw(): %r", o) + # re-draw the main objects (ratio histograms) after the objects, otherwise they might be hidden + for h_ratio in stuff: + drawOption = h_ratio.drawOption if hasattr(h_ratio, "drawOption") else "hist" + h_ratio.Draw(drawOption+same) if not os.path.exists(plot_directory): try: diff --git a/plot/python/styles.py b/plot/python/styles.py index 950885c..3aad714 100644 --- a/plot/python/styles.py +++ b/plot/python/styles.py @@ -1,6 +1,6 @@ import ROOT -def errorStyle( color, markerStyle = 20, markerSize = 1, width = 1): +def errorStyle( color, markerStyle = 20, markerSize = 1, width = 1, drawOption='e1'): def func( histo ): histo.SetLineColor( color ) histo.SetMarkerSize( markerSize ) @@ -8,7 +8,7 @@ def func( histo ): histo.SetMarkerColor( color ) # histo.SetFillColor( color ) histo.SetLineWidth( width ) - histo.drawOption = "e1" + histo.drawOption = drawOption return return func