diff --git a/Background/test/BiasStudy.cpp b/Background/test/BiasStudy.cpp index 0a4c594b..0727ea9e 100644 --- a/Background/test/BiasStudy.cpp +++ b/Background/test/BiasStudy.cpp @@ -406,7 +406,7 @@ std::cout << "DEBUG LC 4 " <second,mass,80,Form("%s/plots/toys/job%d_toy%d_fit_paul",outDir.c_str(),jobn,toy)); //continue; // -------------------------------- - // cout << "Fitting toy for truth model " << distance(toys.begin(),it) << "/" << toys.size() << " (" << it->first << ") " << endl; + cout << "Fitting toy for truth model " << distance(toys.begin(),it) << "/" << toys.size() << " (" << it->first << ") " << endl; // get Fabian envelope pair > fabianMinNlls = fabianProfiler.profileLikelihood(it->second,mass,mu,mu_low,mu_high,mu_step); pair > fabianEnvelope = fabianProfiler.computeEnvelope(fabianMinNlls,Form("fabEnvelope_job%d_%s_cat%d_toy%d",jobn,it->first.c_str(),cat,toy),0.); diff --git a/Background/test/fTest.cpp b/Background/test/fTest.cpp index 41ebf84f..ad26e824 100644 --- a/Background/test/fTest.cpp +++ b/Background/test/fTest.cpp @@ -19,7 +19,7 @@ #include "RooAbsPdf.h" #include "RooArgSet.h" #include "RooFitResult.h" -// #include "RooMinuit.h" +#include "RooMinuit.h" #include "RooMinimizer.h" #include "RooMsgService.h" #include "RooDataHist.h" diff --git a/Background/test/fTest2D.cpp b/Background/test/fTest2D.cpp index 0ed96eb0..a40bc67d 100644 --- a/Background/test/fTest2D.cpp +++ b/Background/test/fTest2D.cpp @@ -6,7 +6,6 @@ #include "boost/program_options.hpp" #include "boost/lexical_cast.hpp" -#include "TObjString.h" #include "TFile.h" #include "TMath.h" @@ -20,7 +19,7 @@ #include "RooAbsPdf.h" #include "RooArgSet.h" #include "RooFitResult.h" -// #include "RooMinuit.h" +#include "RooMinuit.h" #include "RooMinimizer.h" #include "RooMsgService.h" #include "RooDataHist.h" @@ -55,17 +54,18 @@ using namespace boost; namespace po = program_options; -bool BLIND = false; +bool BLIND = true; bool runFtestCheckWithToys=false; int mgg_low =100; int mgg_high =180; -//int mjj_low =70; int mjj_low =70; int mjj_high =190; //int nBinsForMass = 4*(mgg_high-mgg_low); -int nBinsForMass = 2*(mgg_high-mgg_low); +int nBinsForMass = (mgg_high-mgg_low)/1.; //int nBinsForMass2 = (mjj_high-mjj_low); -int nBinsForMass2 = (mjj_high-mjj_low); +int nBinsForMass2 = (mjj_high-mjj_low)/5.; +//int mjj_blind_p1 = 105; +//int mjj_blind_p2 = 145; RooRealVar *intLumi_ = new RooRealVar("IntLumi","hacked int lumi", 1000.); @@ -312,7 +312,7 @@ double getGoodnessOfFit(RooRealVar *mass, RooAbsPdf *mpdf, RooDataSet *data, std } -void plot(RooRealVar *mass, RooAbsPdf *pdf, RooDataSet *data, string name,vector flashggCats_, int status, double *prob, int proj=1){ +void plot(RooRealVar *mass, RooRealVar *mgg, RooAbsPdf *pdf, RooDataSet *data, string name,vector flashggCats_, int status, double *prob, int proj=1){ // Chi2 taken from full range fit RooPlot *plot_chi2 = mass->frame(); @@ -335,14 +335,18 @@ void plot(RooRealVar *mass, RooAbsPdf *pdf, RooDataSet *data, string name,vector else data->plotOn(plot,Binning(nBinsForMass)); } if(proj==2){ - mass->setRange("unblindReg_3",mjj_low,105); - mass->setRange("unblindReg_4",145,mjj_high); + mgg->setRange("unblindReg_3",100,110); + mgg->setRange("unblindReg_4",170,180); if (BLIND) { + cout<<"blind jj"<plotOn(plot,Binning(nBinsForMass2),CutRange("unblindReg_3")); data->plotOn(plot,Binning(nBinsForMass2),CutRange("unblindReg_4")); data->plotOn(plot,Binning(nBinsForMass2),Invisible()); + } - else data->plotOn(plot,Binning(nBinsForMass2)); + else + data->plotOn(plot,Binning(nBinsForMass2)); } TCanvas *canv = new TCanvas(); @@ -363,7 +367,7 @@ void plot(RooRealVar *mass, RooAbsPdf *pdf, RooDataSet *data, string name,vector delete canv; delete lat; } -void plot(RooRealVar *mass, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet *data, string name, vector flashggCats_, int cat, int bestFitPdf=-1, int proj=1){ +void plot(RooRealVar *mass, RooRealVar *mgg, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet *data, string name, vector flashggCats_, int cat, int bestFitPdf=-1, int proj=1){ TCanvas *canv = new TCanvas(Form("canvas_%d",proj),Form("canvas_%d",proj)); TPad *pad1 = new TPad(Form("pad1_%d",proj),Form("pad1_%d",proj),0,0.25,1,1); @@ -394,15 +398,18 @@ void plot(RooRealVar *mass, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet else data->plotOn(plot,Binning(nBinsForMass)); } if(proj==2){ - mass->setRange("unblindReg_3",mjj_low,105); - mass->setRange("unblindReg_4",145,mjj_high); + mgg->setRange("unblindReg_3",100,115); + mgg->setRange("unblindReg_4",135,180); if (BLIND) { - // data->plotOn(plot,Binning(nBinsForMass2),CutRange("unblindReg_3,unblindReg_4")); - data->plotOn(plot,Binning(nBinsForMass2),CutRange("unblindReg_3"),LineColor(kWhite),MarkerColor(kWhite)); - data->plotOn(plot,Binning(nBinsForMass2),CutRange("unblindReg_4"),LineColor(kWhite),MarkerColor(kWhite)); - data->plotOn(plot,Binning(nBinsForMass2),Invisible(),LineColor(kWhite),MarkerColor(kWhite)); + // cout<<"check bins"< " + std::to_string(mjj_low) + ")"; + RooDataSet* filteredData = (RooDataSet*)data->reduce("CMS_hgg_mass <= 115 | CMS_hgg_mass >= 135"); + // filteredData->plotOn(plot,Binning(nBinsForMass2),CutRange("unblindReg_3"),LineColor(kBlack),MarkerColor(kBlack),Name("Data1")); + // filteredData->plotOn(plot,Binning(nBinsForMass2),CutRange("unblindReg_4"),LineColor(kBlack),MarkerColor(kBlack),Name("Data2")); + filteredData->plotOn(plot,Binning(nBinsForMass2),LineColor(kBlack),MarkerColor(kBlack)); } - else data->plotOn(plot,Binning(nBinsForMass2)); + else + data->plotOn(plot,Binning(nBinsForMass2)); } vector pdfNameSplit; @@ -412,13 +419,14 @@ void plot(RooRealVar *mass, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet //split in mgg and mjj func by splitting in _. string currentPdfName(pdfs->getCurrentPdf()->GetName()); split(pdfNameSplit,currentPdfName,boost::is_any_of("_")); - if (proj==1) bestFitFuncNameOtherProj = pdfNameSplit[2]; //first is Mgg, then is Dijet_mass + if (proj==1) bestFitFuncNameOtherProj = pdfNameSplit[2]; //first is Mgg, then is Mjj if (proj==2) bestFitFuncNameOtherProj = pdfNameSplit[1]; } } cout<<"Best fit name : "<getObject(plot->numItems()-1); int npoints = plotdata->GetN(); + cout<<"check Npoint"<getIndex(); @@ -445,8 +453,8 @@ void plot(RooRealVar *mass, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet string currentPdfName(pdfs->getCurrentPdf()->GetName()); split(pdfNameSplit,currentPdfName,boost::is_any_of("_")); string currentFuncNameOtherProj; - if (proj==1) currentFuncNameOtherProj = pdfNameSplit[2]; //first is Mgg, then is Dijet_mass - if (proj==2) currentFuncNameOtherProj = pdfNameSplit[1]; //first is Mgg, then is Dijet_mass + if (proj==1) currentFuncNameOtherProj = pdfNameSplit[2]; //first is Mgg, then is Mjj + if (proj==2) currentFuncNameOtherProj = pdfNameSplit[1]; //first is Mgg, then is Mjj if (currentFuncNameOtherProj==bestFitFuncNameOtherProj) { pdfs->getCurrentPdf()->plotOn(plot,LineColor(col),LineStyle(style));//,RooFit::NormRange("fitdata_1,fitdata_2")); pdfToConsiderCounter+=1; @@ -480,9 +488,9 @@ void plot(RooRealVar *mass, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet if (BLIND && proj==1) { if ((xtmp > 115 ) && ( xtmp < 135) ) continue; } - if (BLIND && proj==2) { - if ((xtmp > 105 ) && ( xtmp < 145) ) continue; - } + /* if (BLIND && proj==2) { + if ((xtmp > mjj_blind_p1 ) && ( xtmp < mjj_blind_p2) ) continue; + }*/ // std::cout << "[INFO] plotdata->Integral() " << plotdata->Integral() << " ( bins " << npoints << ") hbkgplots[i]->Integral() " << hbplottmp->Integral() << " (bins " << hbplottmp->GetNbinsX() << std::endl; double errhi = plotdata->GetErrorYhigh(ipoint); @@ -511,9 +519,9 @@ void plot(RooRealVar *mass, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet hdummy->GetYaxis()->SetLabelSize(0.07); hdummy->GetXaxis()->SetLabelSize(0.1); if(proj==1){ - hdummy->GetXaxis()->SetTitle("m_{#gamma#gamma} (GeV)"); + hdummy->GetXaxis()->SetTitle("m_{#gamma#gamma} [GeV]"); } else { - hdummy->GetXaxis()->SetTitle("m_{jj} (GeV)"); + hdummy->GetXaxis()->SetTitle("m_{jj} [GeV]"); } hdummy->GetXaxis()->SetTitleSize(0.12); hdummy->Draw("HIST"); @@ -528,7 +536,7 @@ void plot(RooRealVar *mass, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet line3->Draw(); hdatasub->Draw("PESAME"); pad1->cd(); - if (BLIND) hdata->Draw("PESAME"); + if (BLIND && proj==1) hdata->Draw("PESAME"); // enf extra bit for ratio plot/// canv->SaveAs(Form("%s.pdf",name.c_str())); canv->SaveAs(Form("%s.jpg",name.c_str())); @@ -536,7 +544,7 @@ void plot(RooRealVar *mass, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet delete canv; } -void plot(RooRealVar *mass, map pdfs, RooDataSet *data, string name, vector flashggCats_, int cat, int bestFitPdf=-1, int proj = 1){ +void plot(RooRealVar *mass, RooRealVar *mgg , map pdfs, RooDataSet *data, string name, vector flashggCats_, int cat, int bestFitPdf=-1, int proj = 1){ int color[7] = {kBlue,kRed,kMagenta,kGreen+1,kOrange+7,kAzure+10,kBlack}; TCanvas *canv = new TCanvas(); @@ -554,15 +562,17 @@ void plot(RooRealVar *mass, map pdfs, RooDataSet *data, strin } else data->plotOn(plot,Binning(nBinsForMass)); } - if(proj==2){ - mass->setRange("unblindReg_1",mjj_low,105); - mass->setRange("unblindReg_2",145,mjj_high); + if(proj==2){ + mgg->setRange("unblindReg_3",100,110); + mgg->setRange("unblindReg_4",170,180); if (BLIND) { - data->plotOn(plot,Binning(nBinsForMass2),CutRange("unblindReg_1")); - data->plotOn(plot,Binning(nBinsForMass2),CutRange("unblindReg_2")); + cout<<"blind jj"<plotOn(plot,Binning(nBinsForMass2),CutRange("unblindReg_3")); + data->plotOn(plot,Binning(nBinsForMass2),CutRange("unblindReg_4")); data->plotOn(plot,Binning(nBinsForMass2),Invisible()); } - else data->plotOn(plot,Binning(nBinsForMass2)); + else + data->plotOn(plot,Binning(nBinsForMass2)); } TObject *datLeg = plot->getObject(int(plot->numItems()-1)); if(flashggCats_.size() >0){ @@ -593,7 +603,7 @@ void plot(RooRealVar *mass, map pdfs, RooDataSet *data, strin // *prob = getGoodnessOfFit(mass,(it->second),data,name); // cout<< Form("reduced #chi^{2} = %.3f, #chi^{2} = %.3f, Prob = %.2f",chi2,chi2*(nBinsForMass-np),*prob)<first.c_str(),ext.c_str(),chi2*(numBins-np),chi2,prob)<first.c_str(),ext.c_str(),chi2)<var("CMS_hgg_mass"); - // if(_fitStrategy==2) mjj = _w->var("Dijet_mass"); - // else mjj = new RooRealVar("Dijet_mass","Dijet_mass",mjj_low,mjj_high); + // if(_fitStrategy==2) mjj = _w->var("CMS_hjj_mass"); + // else mjj = new RooRealVar("CMS_hjj_mass","CMS_hjj_mass",mjj_low,mjj_high); if(_fitStrategy==2) mjj = _w->var("Dijet_mass"); else mjj = new RooRealVar("Dijet_mass","Dijet_mass",mjj_low,mjj_high); } else if(isbbggLimits_){ - mgg = _w->var("mgg"); - mjj = _w->var("mjj"); + mgg = _w->var("CMS_hgg_mass"); + mjj = _w->var("Dijet_mass"); } @@ -813,7 +823,7 @@ void BkgMultiModelFitAllOrders(TString OutputFileName, std::string jsonForEnvelo } else if(isbbggLimits_){ catname = Form("cat%d",c); - data[c] = (RooDataSet*)_w->data(Form("data_obs_%s",catname.c_str())); + data[c] = (RooDataSet*)_w->data(Form("data_mass_%s",catname.c_str())); } else{ catname = Form("%s",flashggCats_[c].c_str()); @@ -878,9 +888,9 @@ void BkgMultiModelFitAllOrders(TString OutputFileName, std::string jsonForEnvelo mggBkgTmp = getPdf(pdfsModel,functionClasses[mggfunc],mggfunc_ord, TString::Format("bkg_mgg_for%s%d_%s",label[mjjfunc],mjjfunc_ord,flashggCats_[c].c_str())); mjjBkgTmp = getPdf(pdfsModel_1,functionClasses[mjjfunc],mjjfunc_ord, TString::Format("bkg_mjj_for%s%d_%s",label[mggfunc],mggfunc_ord,flashggCats_[c].c_str())); - double gofProbMgg =0; +/* double gofProbMgg =0; double gofProbMjj =0; -/* int fitStatus=0; + int fitStatus=0; int proj=1; plot(mgg,mggBkgTmp,data[c],Form("%s/%s%d_mgg_gof_%s.pdf",outDir.c_str(),label[mggfunc],mggfunc_ord,flashggCats_[c].c_str()),flashggCats_,fitStatus,&gofProbMgg,proj); if (! ((gofProbMgg > 0.05) || (mggfunc_ord == func_ordmgg[c][mggfunc]) )){ // Good looking fit or one of our regular truth functions @@ -891,7 +901,7 @@ void BkgMultiModelFitAllOrders(TString OutputFileName, std::string jsonForEnvelo if (! ((gofProbMjj > 0.05) || (mjjfunc_ord == func_ordmjj[c][mjjfunc]) )){ // Good looking fit or one of our regular truth functions gofPass_mjj=false; } - cout<<"INFO Dijet_mass : "<import(norm); wBias->import(multipdf); wBias->import(*data[c]); - - plot(mgg,&multipdf,&category,data[c],Form("%s/multipdf_mass_%s",outDir.c_str(),flashggCats_[c].c_str()),flashggCats_,c,bestFitPdfIndex,1); - plot(mjj,&multipdf,&category,data[c],Form("%s/multipdf_mass2_%s",outDir.c_str(),flashggCats_[c].c_str()),flashggCats_,c,bestFitPdfIndex,2); + // datasetfull = (RooDataSet*)_w->data(Form("Data_13TeV_%s",catname.c_str())); + plot(mgg,mgg,&multipdf,&category,data[c],Form("%s/multipdf_mass_%s",outDir.c_str(),flashggCats_[c].c_str()),flashggCats_,c,bestFitPdfIndex,1); + plot(mjj,mgg,&multipdf,&category,data[c],Form("%s/multipdf_mass2_%s",outDir.c_str(),flashggCats_[c].c_str()),flashggCats_,c,bestFitPdfIndex,2); if(isFlashgg_==1 && _fitStrategy == 1){ RooDataHist dataBinned(Form("roohist_data_mass_%s",flashggCats_[c].c_str()),"data",*mgg,*data[c]); @@ -1001,7 +1011,7 @@ int main(int argc, char* argv[]){ lumi_8TeV = "19.1 fb^{-1}"; // default is "19.7 fb^{-1}" lumi_7TeV = "4.9 fb^{-1}"; // default is "5.1 fb^{-1}" lumi_sqrtS = "13 TeV"; // used with iPeriod = 0, e.g. for simulation-only plots (default is an empty string) - int year_ = 2016; + string year_ = "2016"; //int year_ = 2017; string fileName; @@ -1018,7 +1028,7 @@ int main(int argc, char* argv[]){ vector flashggCats_; bool isData_ =0; bool isbbggLimits_ =0; std::string jsonForEnvelope_ = ""; - + int massY_; po::options_description desc("Allowed options"); desc.add_options() ("help,h", "Show help") @@ -1033,14 +1043,16 @@ int main(int argc, char* argv[]){ ("is2012", "Run 2012 config") ("unblind", "Dont blind plots") ("isFlashgg", po::value(&isFlashgg_)->default_value(1), "Use Flashgg output ") - ("FitStrategy", po::value(&FitStrategy_)->default_value(2), "1D or 2D ") + ("FitStrategy", po::value(&FitStrategy_)->default_value(2), "1D or 2D ") ("isbbggLimits", po::value(&isbbggLimits_)->default_value(0), "Use bbggLimit output ") ("isData", po::value(&isData_)->default_value(0), "Use Data not MC ") ("flashggCats,f", po::value(&flashggCatsStr_)->default_value("UntaggedTag_0,UntaggedTag_1,UntaggedTag_2,UntaggedTag_3,UntaggedTag_4,VBFTag_0,VBFTag_1,VBFTag_2,TTHHadronicTag,TTHLeptonicTag,VHHadronicTag,VHTightTag,VHLooseTag,VHEtTag"), "Flashgg category names to consider") - ("jsonForEnvelope", po::value(&jsonForEnvelope_)->default_value(""), "Json file for envelope") - ("year", po::value(&year_)->default_value(2016), "Dataset year") + ("jsonForEnvelope", po::value(&jsonForEnvelope_)->default_value(""), "Json file for envelope") + ("massY,mY", po::value(&massY_)->default_value(700), "Y mass point") + ("year", po::value(&year_)->default_value("2016"), "Dataset year") ("verbose,v", "Run with more output") ; + po::variables_map vm; po::store(po::parse_command_line(argc,argv,desc),vm); po::notify(vm); @@ -1054,6 +1066,22 @@ int main(int argc, char* argv[]){ if (vm.count("verbose")) verbose=true; if (vm.count("runFtestCheckWithToys")) runFtestCheckWithToys=true; + std::cout << "modeling bkg for Y ===== " << massY_ << endl; + + /* + if(massY_ == 90){::mjj_blind_p1 = 80; ::mjj_blind_p2 = 100;} + else if(massY_ == 100){::mjj_blind_p1 = 85; ::mjj_blind_p2 = 120;} + else if(massY_ == 125){::mjj_blind_p1 = 105; ::mjj_blind_p2 = 145;} + else if(massY_ == 150){::mjj_blind_p1 = 125; ::mjj_blind_p2 = 165;} + else if(massY_ == 200){::mjj_blind_p1 = 160; ::mjj_blind_p2 = 210;} + else if(massY_ == 250){::mjj_blind_p1 = 210; ::mjj_blind_p2 = 260;} + else if(massY_ == 300){::mjj_blind_p1 = 240; ::mjj_blind_p2 = 310;} + else if(massY_ == 400){::mjj_blind_p1 = 330; ::mjj_blind_p2 = 410;} + else if(massY_ == 500){::mjj_blind_p1 = 420; ::mjj_blind_p2 = 520;} + else if(massY_ == 600){::mjj_blind_p1 = 500; ::mjj_blind_p2 = 620;} + else if(massY_ == 700){::mjj_blind_p1 = 600; ::mjj_blind_p2 = 720;} + else if(massY_ == 800){::mjj_blind_p1 = 680; ::mjj_blind_p2 = 820;} + */ if (!verbose) { RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); RooMsgService::instance().setSilentMode(true); @@ -1125,23 +1153,38 @@ int main(int argc, char* argv[]){ // vector > pdfs_vec; PdfModelBuilder pdfsModel,pdfsModel2; - RooRealVar *mass2,*mass; + RooRealVar *mass,*mass2=nullptr; if(isbbggLimits_){ - mass = (RooRealVar*)inWS->var("mgg"); - if(FitStrategy_==2) mass2 = (RooRealVar*)inWS->var("mjj"); + mass = (RooRealVar*)inWS->var("CMS_hgg_mass"); + if(FitStrategy_==2) mass2 = (RooRealVar*)inWS->var("Dijet_mass"); } - else{ - RooRealVar *mass = (RooRealVar*)inWS->var("CMS_hgg_mass"); //FIXME - //if(FitStrategy_==2) mass2 = (RooRealVar*)inWS->var("Dijet_mass"); //FIXME - if(FitStrategy_==2) {RooRealVar *mass2 = (RooRealVar*)inWS->var("Dijet_mass");} //FIXME + mass = (RooRealVar*)inWS->var("CMS_hgg_mass"); //FIXME + //if(FitStrategy_==2) mass2 = (RooRealVar*)inWS->var("CMS_hjj_mass"); //FIXME + if(FitStrategy_==2) mass2 = (RooRealVar*)inWS->var("Dijet_mass"); //FIXME } if(FitStrategy_!=2) mass2 = mass; - std:: cout << "[INFO] Got masses from ws " << mass; if(FitStrategy_==2) std::cout << " and "<< mass2; - std::cout<< std::endl; - + // std::cout<< std::endl; + // if (massY_ >= 200 && massY_ <= 250){ + // ::mjj_low = 70; + // ::mjj_high = 400; + // } + // if(massY_ >= 300 && massY_ <= 500){ + // ::mjj_low = 200; + // ::mjj_high = 560; + // } + // else if (massY_ > 500 ){ + // ::mjj_low =400; + // ::mjj_high = 1000; + // } + double min_value = mass2->getMin(); + double max_value = mass2->getMax(); + ::mjj_low = (int)min_value; + ::mjj_high = (int)max_value; + ::nBinsForMass2 = (mjj_high-mjj_low)/10; + std::cout << "modeling bkg for Y ===== " << massY_ << " with cuts = " << mjj_low << " " << mjj_high << " and " << nBinsForMass2 << endl; pdfsModel.setObsVar(mass); if(FitStrategy_==2) pdfsModel2.setObsVar(mass2); double upperEnvThreshold = 0.1; // upper threshold on delta(chi2) to include function in envelope (looser than truth function) @@ -1174,7 +1217,7 @@ int main(int argc, char* argv[]){ dataFull = (RooDataSet*)inWS->data(Form("Data_13TeV_%s",catname.c_str())); /*dataFull= (RooDataSet*) dataFull0->emptyClone(); * for (int i =0 ; i < dataFull0->numEntries() ; i++){ - * double m = dataFull0->get(i)->getRealValue("Dijet_mass"); + * double m = dataFull0->get(i)->getRealValue("CMS_hgg_mass"); * //if (m <(mgg_low+0.01) or m > (mgg_high-0.01)) * if (m==mgg_low){ * std::cout << "dataset mass m="<< m << std::endl; @@ -1185,15 +1228,15 @@ int main(int argc, char* argv[]){ if (verbose) std::cout << "[INFO] opened data for " << Form("Data_%s",catname.c_str()) <<" - " << dataFull <data(Form("data_obs_%s",catname.c_str())); - if (verbose) std::cout << "[INFO] opened data for " << Form("data_obs_%s",catname.c_str()) <<" - " << dataFull <data(Form("data_mass_%s",catname.c_str())); + if (verbose) std::cout << "[INFO] opened data for " << Form("data_mass_%s",catname.c_str()) <<" - " << dataFull <data(Form("data_mass_%s",catname.c_str())); if (verbose) std::cout << "[INFO] opened data for " << Form("data_mass_%s",catname.c_str()) <<" - " << dataFull <setBins(nBinsForMass); RooDataSet *data; //RooDataHist thisdataBinned(Form("roohist_data_mass_%s",flashggCats_[cat].c_str()),"data",*mass,*dataFull); @@ -1215,7 +1258,6 @@ int main(int argc, char* argv[]){ data = (RooDataSet*)&thisdataBinned; RooDataSet *data2 = nullptr; if(FitStrategy_==2) - std::cout << "binnn" << nBinsForMass2 << std::endl; mass2->setBins(nBinsForMass2); string thisdataBinned_name2; @@ -1272,7 +1314,7 @@ int main(int argc, char* argv[]){ } double gofProb=0; // otherwise we get it later ... - if (!saveMultiPdf) plot(mass,bkgPdf,data,Form("%s/%s%d_%s.pdf",outDir.c_str(),funcType->c_str(),order,flashggCats_[cat].c_str()),flashggCats_,fitStatus,&gofProb,proj); + if (!saveMultiPdf) plot(mass,mass,bkgPdf,data,Form("%s/%s%d_%s.pdf",outDir.c_str(),funcType->c_str(),order,flashggCats_[cat].c_str()),flashggCats_,fitStatus,&gofProb,proj); cout << "[INFO]\t " << *funcType << " " << order << " " << prevNll << " " << thisNll << " " << chi2 << " " << prob << endl; //fprintf(resFile,"%15s && %d && %10.2f && %10.2f && %10.2f \\\\\n",funcType->c_str(),order,thisNll,chi2,prob); prevNll=thisNll; @@ -1293,7 +1335,7 @@ int main(int argc, char* argv[]){ } choices_vec.push_back(choices); - plot(mass,pdfs,data,Form("%s/truths_mass_%s",outDir.c_str(),flashggCats_[cat].c_str()),flashggCats_,cat,-1,proj); + plot(mass,mass,pdfs,data,Form("%s/truths_mass_%s",outDir.c_str(),flashggCats_[cat].c_str()),flashggCats_,cat,-1,proj); if(FitStrategy_==2) { proj=2; @@ -1336,7 +1378,7 @@ int main(int argc, char* argv[]){ } double gofProb=0; // otherwise we get it later ... - if (!saveMultiPdf) plot(mass2,bkgPdf,data2,Form("%s/%s%d_2_%s.pdf",outDir.c_str(),funcType->c_str(),order,flashggCats_[cat].c_str()),flashggCats_,fitStatus,&gofProb,proj); + if (!saveMultiPdf) plot(mass2,mass,bkgPdf,data2,Form("%s/%s%d_2_%s.pdf",outDir.c_str(),funcType->c_str(),order,flashggCats_[cat].c_str()),flashggCats_,fitStatus,&gofProb,proj); cout << "[INFO]\t " << *funcType << " " << order << " " << prevNll << " " << thisNll << " " << chi2 << " " << prob << endl; //fprintf(resFile,"%15s && %d && %10.2f && %10.2f && %10.2f \\\\\n",funcType->c_str(),order,thisNll,chi2,prob); prevNll=thisNll; @@ -1357,7 +1399,7 @@ int main(int argc, char* argv[]){ } choices_vec2.push_back(choices2); - plot(mass2,pdfs2,data2,Form("%s/truths_mass2_%s",outDir.c_str(),flashggCats_[cat].c_str()),flashggCats_,cat,-1,proj); + plot(mass2,mass,pdfs2,data2,Form("%s/truths_mass2_%s",outDir.c_str(),flashggCats_[cat].c_str()),flashggCats_,cat,-1,proj); } else{ for (vector::iterator funcType=functionClasses.begin(); diff --git a/Background/test/fTest_2.cpp b/Background/test/fTest_2.cpp index 95b2b58a..f1d4b727 100644 --- a/Background/test/fTest_2.cpp +++ b/Background/test/fTest_2.cpp @@ -19,7 +19,7 @@ #include "RooAbsPdf.h" #include "RooArgSet.h" #include "RooFitResult.h" -// #include "RooMinuit.h" +#include "RooMinuit.h" #include "RooMinimizer.h" #include "RooMsgService.h" #include "RooDataHist.h" @@ -55,9 +55,7 @@ namespace po = program_options; bool BLIND = false; bool runFtestCheckWithToys=false; -int mgg_low =300; -int mgg_high =1000; -int nBinsForMass = 0.2*(mgg_high-mgg_low); + RooRealVar *intLumi_ = new RooRealVar("IntLumi","hacked int lumi", 1000.); @@ -99,13 +97,13 @@ void runFit(RooAbsPdf *pdf, RooDataSet *data, double *NLL, int *stat_t, int MaxT -double getProbabilityFtest(double chi2, int ndof,RooAbsPdf *pdfNull, RooAbsPdf *pdfTest, RooRealVar *mass, RooDataSet *data, std::string name){ +double getProbabilityFtest(double chi2, int mjj_low, int mjj_high, int ndof,RooAbsPdf *pdfNull, RooAbsPdf *pdfTest, RooRealVar *mass, RooDataSet *data, std::string name){ double prob_asym = TMath::Prob(chi2,ndof); if (!runFtestCheckWithToys) return prob_asym; int ndata = data->sumEntries(); - + int nBinsForMass = (mjj_high-mjj_low)/10; // fit the pdfs to the data and keep this fit Result (for randomizing) RooFitResult *fitNullData = pdfNull->fitTo(*data,RooFit::Save(1),RooFit::Strategy(1) ,RooFit::Minimizer("Minuit2","minimize"),RooFit::SumW2Error(kTRUE),RooFit::PrintLevel(-1)); //FIXME @@ -227,10 +225,11 @@ double getProbabilityFtest(double chi2, int ndof,RooAbsPdf *pdfNull, RooAbsPdf * } -double getGoodnessOfFit(RooRealVar *mass, RooAbsPdf *mpdf, RooDataSet *data, std::string name){ +double getGoodnessOfFit(RooRealVar *mass, int mjj_low, int mjj_high,RooAbsPdf *mpdf, RooDataSet *data, std::string name){ double prob; int ntoys = 500; + int nBinsForMass = (mjj_high-mjj_low)/10; // Routine to calculate the goodness of fit. name+="_gofTest.pdf"; RooRealVar norm("norm","norm",data->sumEntries(),0,10E6); @@ -308,9 +307,10 @@ double getGoodnessOfFit(RooRealVar *mass, RooAbsPdf *mpdf, RooDataSet *data, std } -void plot(RooRealVar *mass, RooAbsPdf *pdf, RooDataSet *data, string name,vector flashggCats_, int status, double *prob){ +void plot(RooRealVar *mass,RooRealVar *mgg, int mjj_low, int mjj_high,RooAbsPdf *pdf, RooDataSet *data, string name,vector flashggCats_, int status, double *prob){ // Chi2 taken from full range fit + int nBinsForMass=(mjj_high-mjj_low)/10; RooPlot *plot_chi2 = mass->frame(); data->plotOn(plot_chi2,Binning(nBinsForMass)); pdf->plotOn(plot_chi2); @@ -318,19 +318,19 @@ void plot(RooRealVar *mass, RooAbsPdf *pdf, RooDataSet *data, string name,vector int np = pdf->getParameters(*data)->getSize()+1; //Because this pdf has no extend double chi2 = plot_chi2->chiSquare(np); - *prob = getGoodnessOfFit(mass,pdf,data,name); + *prob = getGoodnessOfFit(mass,mjj_low,mjj_high,pdf,data,name); RooPlot *plot = mass->frame(); - mass->setRange("unblindReg_1",mgg_low,115); - mass->setRange("unblindReg_2",135,mgg_high); + mgg->setRange("unblindReg_1",100,115); + mgg->setRange("unblindReg_2",135,180); if (BLIND) { - data->plotOn(plot,Binning(mgg_high-mgg_low),CutRange("unblindReg_1")); - data->plotOn(plot,Binning(mgg_high-mgg_low),CutRange("unblindReg_2")); - data->plotOn(plot,Binning(mgg_high-mgg_low),Invisible()); + data->plotOn(plot,Binning(0.2*(mjj_high-mjj_low)/10),CutRange("unblindReg_1")); + data->plotOn(plot,Binning(0.2*(mjj_high-mjj_low)/10),CutRange("unblindReg_2")); + data->plotOn(plot,Binning(0.2*(mjj_high-mjj_low)/10),Invisible()); } - else data->plotOn(plot,Binning(0.2*(mgg_high-mgg_low))); + else data->plotOn(plot,Binning((mjj_high-mjj_low)/10)); - // data->plotOn(plot,Binning(mgg_high-mgg_low)); + // data->plotOn(plot,Binning(0.2*(mjj_high-mjj_low)/10)); TCanvas *canv = new TCanvas(); pdf->plotOn(plot);//,RooFit::NormRange("fitdata_1,fitdata_2")); pdf->paramOn(plot,RooFit::Layout(0.34,0.96,0.89),RooFit::Format("NEA",AutoPrecision(1))); @@ -349,7 +349,7 @@ void plot(RooRealVar *mass, RooAbsPdf *pdf, RooDataSet *data, string name,vector delete canv; delete lat; } -void plot(RooRealVar *mass, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet *data, string name, vector flashggCats_, int cat, int bestFitPdf=-1){ +void plot(RooRealVar *mass, RooRealVar *mgg, int mjj_low, int mjj_high, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet *data, string name, vector flashggCats_, int cat, int bestFitPdf=-1){ int color[7] = {kBlue,kRed,kMagenta,kGreen+1,kOrange+7,kAzure+10,kBlack}; TLegend *leg = new TLegend(0.5,0.55,0.92,0.88); @@ -357,14 +357,14 @@ void plot(RooRealVar *mass, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet leg->SetLineColor(1); RooPlot *plot = mass->frame(); - mass->setRange("unblindReg_1",mgg_low,115); - mass->setRange("unblindReg_2",135,mgg_high); + mgg->setRange("unblindReg_1",100,115); + mgg->setRange("unblindReg_2",135,180); if (BLIND) { - data->plotOn(plot,Binning(mgg_high-mgg_low),CutRange("unblindReg_1")); - data->plotOn(plot,Binning(mgg_high-mgg_low),CutRange("unblindReg_2")); - data->plotOn(plot,Binning(mgg_high-mgg_low),Invisible()); + data->plotOn(plot,Binning(0.2*(mjj_high-mjj_low)/10),CutRange("unblindReg_1")); + data->plotOn(plot,Binning(0.2*(mjj_high-mjj_low)/10),CutRange("unblindReg_2")); + data->plotOn(plot,Binning(0.2*(mjj_high-mjj_low)/10),Invisible()); } - else data->plotOn(plot,Binning(0.2*(mgg_high-mgg_low))); + else data->plotOn(plot,Binning((mjj_high-mjj_low)/10)); TCanvas *canv = new TCanvas(); ///start extra bit for ratio plot/// RooHist *plotdata = (RooHist*)plot->getObject(plot->numItems()-1); @@ -409,7 +409,7 @@ void plot(RooRealVar *mass, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet leg->Draw("same"); CMS_lumi( canv, 0, 0); ///start extra bit for ratio plot/// - TH1D *hbplottmp = (TH1D*) pdf->createHistogram("hbplottmp",*mass,Binning(0.2*(mgg_high-mgg_low),0.2*(mgg_low,mgg_high))); + TH1D *hbplottmp = (TH1D*) pdf->createHistogram("hbplottmp",*mass,Binning((mjj_high-mjj_low)/10,mjj_low,mjj_high)); hbplottmp->Scale(plotdata->Integral()); hbplottmp->Draw("same"); int npoints = plotdata->GetN(); @@ -421,9 +421,9 @@ void plot(RooRealVar *mass, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet //double bkgval = hbplottmp->GetBinContent(ipoint+1); plotdata->GetPoint(ipoint, xtmp,ytmp); double bkgval = nomBkgCurve->interpolate(xtmp); - if (BLIND) { - if ((xtmp > 115 ) && ( xtmp < 135) ) continue; - } + // if (BLIND) { + // if ((xtmp > 115 ) && ( xtmp < 135) ) continue; + // } std::cout << "[INFO] plotdata->Integral() " << plotdata->Integral() << " ( bins " << npoints << ") hbkgplots[i]->Integral() " << hbplottmp->Integral() << " (bins " << hbplottmp->GetNbinsX() << std::endl; double errhi = plotdata->GetErrorYhigh(ipoint); double errlow = plotdata->GetErrorYlow(ipoint); @@ -437,18 +437,18 @@ void plot(RooRealVar *mass, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet point++; } pad2->cd(); - TH1 *hdummy = new TH1D("hdummyweight","",mgg_high-mgg_low,mgg_low,mgg_high); + TH1 *hdummy = new TH1D("hdummyweight","",(mjj_high-mjj_low)/10,mjj_low,mjj_high); hdummy->SetMaximum(hdatasub->GetHistogram()->GetMaximum()+1); hdummy->SetMinimum(hdatasub->GetHistogram()->GetMinimum()-1); hdummy->GetYaxis()->SetTitle("data - best fit PDF"); hdummy->GetYaxis()->SetTitleSize(0.12); // hdummy->GetXaxis()->SetTitle("m_{#gamma#gamma} (GeV)"); hdummy->GetXaxis()->SetTitle("m_{jj} (GeV)"); - hdummy->GetXaxis()->SetTitleSize(0.12); + hdummy->GetXaxis()->SetTitleSize(0.10); hdummy->Draw("HIST"); hdummy->GetYaxis()->SetNdivisions(808); - TLine *line3 = new TLine(mgg_low,0.,mgg_high,0.); + TLine *line3 = new TLine(mjj_low,0.,mjj_high,0.); line3->SetLineColor(bestcol); //line3->SetLineStyle(kDashed); line3->SetLineWidth(5.0); @@ -461,7 +461,7 @@ void plot(RooRealVar *mass, RooMultiPdf *pdfs, RooCategory *catIndex, RooDataSet delete canv; } -void plot(RooRealVar *mass, map pdfs, RooDataSet *data, string name, vector flashggCats_, int cat, int bestFitPdf=-1){ +void plot(RooRealVar *mass, RooRealVar *mgg, int mjj_low, int mjj_high, map pdfs, RooDataSet *data, string name, vector flashggCats_, int cat, int bestFitPdf=-1){ int color[7] = {kBlue,kRed,kMagenta,kGreen+1,kOrange+7,kAzure+10,kBlack}; TCanvas *canv = new TCanvas(); @@ -470,14 +470,14 @@ void plot(RooRealVar *mass, map pdfs, RooDataSet *data, strin leg->SetLineColor(0); RooPlot *plot = mass->frame(); - mass->setRange("unblindReg_1",mgg_low,115); - mass->setRange("unblindReg_2",135,mgg_high); + mgg->setRange("unblindReg_1",100,115); + mgg->setRange("unblindReg_2",135,180); if (BLIND) { - data->plotOn(plot,Binning(mgg_high-mgg_low),CutRange("unblindReg_1")); - data->plotOn(plot,Binning(mgg_high-mgg_low),CutRange("unblindReg_2")); - data->plotOn(plot,Binning(mgg_high-mgg_low),Invisible()); + data->plotOn(plot,Binning(0.2*(mjj_high-mjj_low)/10),CutRange("unblindReg_1")); + data->plotOn(plot,Binning(0.2*(mjj_high-mjj_low)/10),CutRange("unblindReg_2")); + data->plotOn(plot,Binning(0.2*(mjj_high-mjj_low)/10),Invisible()); } - else data->plotOn(plot,Binning(mgg_high-mgg_low)); + else data->plotOn(plot,Binning((mjj_high-mjj_low)/10)); TObject *datLeg = plot->getObject(int(plot->numItems()-1)); if(flashggCats_.size() >0){ @@ -612,6 +612,7 @@ int main(int argc, char* argv[]){ int ncats; int singleCategory; int catOffset; + string datfile; string outDir; string outfilename; @@ -641,11 +642,15 @@ int main(int argc, char* argv[]){ ("flashggCats,f", po::value(&flashggCatsStr_)->default_value("UntaggedTag_0,UntaggedTag_1,UntaggedTag_2,UntaggedTag_3,UntaggedTag_4,VBFTag_0,VBFTag_1,VBFTag_2,TTHHadronicTag,TTHLeptonicTag,VHHadronicTag,VHTightTag,VHLooseTag,VHEtTag"), "Flashgg category names to consider") ("year", po::value(&year_)->default_value("2016"), "Dataset year") ("catOffset", po::value(&catOffset)->default_value(0), "Category numbering scheme offset") + // ("ML", po::value(&mjj_low)->default_value(20), "Category numbering scheme offset") + // ("MU", po::value(&mjj_high)->default_value(220), "Category numbering scheme offset") ("verbose,v", "Run with more output") ; po::variables_map vm; po::store(po::parse_command_line(argc,argv,desc),vm); po::notify(vm); + + if (vm.count("help")) { cout << desc << endl; exit(1); } if (vm.count("is2011")) is2011=true; if (vm.count("unblind")) BLIND=false; @@ -740,7 +745,15 @@ int main(int argc, char* argv[]){ PdfModelBuilder pdfsModel; RooRealVar *mass = (RooRealVar*)inWS->var("Dijet_mass"); - std:: cout << "[INFO] Got mass from ws " << mass << std::endl; + RooRealVar *mgg = (RooRealVar*)inWS->var("CMS_hgg_mass"); + double min_value = mass->getMin(); + double max_value = mass->getMax(); + int mjj_low = (int)min_value; + int mjj_high = (int)max_value; + // if (mjj_low<0) {mjj_low = 0 ;} + // int mjj_high = ((int)max_value/10)*10+20; + int nBinsForMass = (mjj_high-mjj_low)/10; + std:: cout << "[INFO] Got mass from ws " << (int)max_value <<" "<< mjj_high<<" " << mjj_low << std::endl; pdfsModel.setObsVar(mass); double upperEnvThreshold = 0.1; // upper threshold on delta(chi2) to include function in envelope (looser than truth function) @@ -775,9 +788,9 @@ int main(int argc, char* argv[]){ /*dataFull= (RooDataSet*) dataFull0->emptyClone(); for (int i =0 ; i < dataFull0->numEntries() ; i++){ double m = dataFull0->get(i)->getRealValue("Dijet_mass"); - //if (m <(mgg_low+0.01) or m > (mgg_high-0.01)) + //if (m <(mjj_low+0.01) or m > (mjj_high-0.01)) - if (m==mgg_low){ + if (m==mjj_low){ std::cout << "dataset mass m="<< m << std::endl; continue; } @@ -789,7 +802,8 @@ int main(int argc, char* argv[]){ {dataFull = (RooDataSet*)inWS->data(Form("data_mass_%s",catname.c_str())); if (verbose) std::cout << "[INFO] opened data for " << Form("data_mass_%s",catname.c_str()) <<" - " << dataFull < " + std::to_string(mjj_low) + ")"; + // RooDataSet *datablind = (RooDataSet*)dataFull->reduce(cut.c_str()); mass->setBins(nBinsForMass); RooDataSet *data; @@ -808,8 +822,10 @@ int main(int argc, char* argv[]){ thisdataBinned_name= Form("roohist_data_mass_cat%d",cat); //RooDataSet *data = (RooDataSet*)dataFull; } - RooDataHist thisdataBinned(thisdataBinned_name.c_str(),"data",*mass,*dataFull); + RooDataHist thisdataBinned(thisdataBinned_name.c_str(),"data",*mass,*dataFull); data = (RooDataSet*)&thisdataBinned; + + RooArgList storedPdfs("store"); @@ -850,7 +866,7 @@ int main(int argc, char* argv[]){ chi2 = 2.*(prevNll-thisNll); if (chi2<0. && order>1) chi2=0.; if (prev_pdf!=NULL){ - prob = getProbabilityFtest(chi2,order-prev_order,prev_pdf,bkgPdf,mass,data + prob = getProbabilityFtest(chi2,mjj_low,mjj_high,order-prev_order,prev_pdf,bkgPdf,mass,data ,Form("%s/Ftest_from_%s%d_cat%d.pdf",outDir.c_str(),funcType->c_str(),order,(cat+catOffset))); std::cout << "[INFO] F-test Prob(chi2>chi2(data)) == " << prob << std::endl; } else { @@ -858,7 +874,7 @@ int main(int argc, char* argv[]){ } double gofProb=0; // otherwise we get it later ... - if (!saveMultiPdf) plot(mass,bkgPdf,data,Form("%s/%s%d_cat%d.pdf",outDir.c_str(),funcType->c_str(),order,(cat+catOffset)),flashggCats_,fitStatus,&gofProb); + if (!saveMultiPdf) plot(mass,mgg,mjj_low,mjj_high,bkgPdf,data,Form("%s/%s%d_cat%d.pdf",outDir.c_str(),funcType->c_str(),order,(cat+catOffset)),flashggCats_,fitStatus,&gofProb); cout << "[INFO]\t " << *funcType << " " << order << " " << prevNll << " " << thisNll << " " << chi2 << " " << prob << endl; //fprintf(resFile,"%15s && %d && %10.2f && %10.2f && %10.2f \\\\\n",funcType->c_str(),order,thisNll,chi2,prob); prevNll=thisNll; @@ -914,7 +930,7 @@ int main(int argc, char* argv[]){ // Calculate goodness of fit for the thing to be included (will use toys for lowstats)! double gofProb =0; - plot(mass,bkgPdf,data,Form("%s/%s%d_cat%d.pdf",outDir.c_str(),funcType->c_str(),order,(cat+catOffset)),flashggCats_,fitStatus,&gofProb); + plot(mass,mgg,mjj_low,mjj_high,bkgPdf,data,Form("%s/%s%d_cat%d.pdf",outDir.c_str(),funcType->c_str(),order,(cat+catOffset)),flashggCats_,fitStatus,&gofProb); if ((prob < upperEnvThreshold) ) { // Looser requirements for the envelope @@ -950,7 +966,7 @@ int main(int argc, char* argv[]){ choices_envelope_vec.push_back(choices_envelope); pdfs_vec.push_back(pdfs); - plot(mass,pdfs,data,Form("%s/truths_cat%d",outDir.c_str(),(cat+catOffset)),flashggCats_,cat); + plot(mass,mgg,mjj_low,mjj_high,pdfs,data,Form("%s/truths_cat%d",outDir.c_str(),(cat+catOffset)),flashggCats_,cat); if (saveMultiPdf){ @@ -966,9 +982,9 @@ int main(int argc, char* argv[]){ catname = Form("cat%d",(cat+catOffset)); } RooCategory catIndex(catindexname.c_str(),"c"); - RooMultiPdf *pdf = new RooMultiPdf(Form("CMS_hgg_%s_%s_bkgshape",catname.c_str(),ext.c_str()),"all pdfs",catIndex,storedPdfs); + RooMultiPdf *pdf = new RooMultiPdf(Form("CMS_hjj_%s_%s_bkgshape",catname.c_str(),ext.c_str()),"all pdfs",catIndex,storedPdfs); //RooRealVar nBackground(Form("CMS_hgg_%s_%s_bkgshape_norm",catname.c_str(),ext.c_str()),"nbkg",data->sumEntries(),0,10E8); - RooRealVar nBackground(Form("CMS_hgg_%s_%s_bkgshape_norm",catname.c_str(),ext.c_str()),"nbkg",data->sumEntries(),0,3*data->sumEntries()); + RooRealVar nBackground(Form("CMS_hjj_%s_%s_bkgshape_norm",catname.c_str(),ext.c_str()),"nbkg",data->sumEntries(),0,3*data->sumEntries()); //nBackground.removeRange(); // bug in roofit will break combine until dev branch brought in //double check the best pdf! int bestFitPdfIndex = getBestFitFunction(pdf,data,&catIndex,!verbose); @@ -979,7 +995,7 @@ int main(int argc, char* argv[]){ std::cout << "[INFO] Best Fit Pdf = " << bestFitPdfIndex << ", " << storedPdfs.at(bestFitPdfIndex)->GetName() << std::endl; std::cout << "// ------------------------------------------------------------------------- //" <setBins(nBinsForMass); RooDataHist dataBinned(Form("roohist_data_mass_%s",catname.c_str()),"data",*mass,*dataFull); @@ -989,7 +1005,7 @@ int main(int argc, char* argv[]){ outputws->import(catIndex); outputws->import(dataBinned); outputws->import(*data); - plot(mass,pdf,&catIndex,data,Form("%s/multipdf_%s",outDir.c_str(),catname.c_str()),flashggCats_,cat,bestFitPdfIndex); + plot(mass,mgg,mjj_low,mjj_high,pdf,&catIndex,data,Form("%s/multipdf_%s",outDir.c_str(),catname.c_str()),flashggCats_,cat,bestFitPdfIndex); } diff --git a/Background/test/pseudodataMaker.cpp b/Background/test/pseudodataMaker.cpp index 404d9ae1..5a26b9fa 100644 --- a/Background/test/pseudodataMaker.cpp +++ b/Background/test/pseudodataMaker.cpp @@ -21,8 +21,7 @@ #include "RooFitResult.h" #include "RooPlot.h" #include "RooMsgService.h" -// #include "RooMinuit.h" -#include "RooMinimizer.h" +#include "RooMinuit.h" #include "RooPolynomial.h" #include "RooRandom.h" diff --git a/Background/test/workspaceTool.cpp b/Background/test/workspaceTool.cpp index 02f996a3..760f5f4e 100644 --- a/Background/test/workspaceTool.cpp +++ b/Background/test/workspaceTool.cpp @@ -21,8 +21,7 @@ #include "RooFitResult.h" #include "RooPlot.h" #include "RooMsgService.h" -// #include "RooMinuit.h" -#include "RooMinimizer.h" +#include "RooMinuit.h" #include "boost/program_options.hpp" #include "boost/algorithm/string/split.hpp" diff --git a/Datacard/RunYields.py b/Datacard/RunYields.py index 4ec771f7..6f3ae0d3 100644 --- a/Datacard/RunYields.py +++ b/Datacard/RunYields.py @@ -35,13 +35,13 @@ def get_options(): parser.add_option('--batch', dest='batch', default='IC', help='Batch') parser.add_option('--queue', dest='queue', default='microcentury', help='Queue: should not take long (microcentury will do)') parser.add_option('--jobOpts', dest='jobOpts', default='', help="Additional options to add to job submission. For Condor separate individual options with a colon (specify all within quotes e.g. \"option_xyz = abc+option_123 = 456\")") - parser.add_option('--printOnly', dest='printOnly', default=False, action="store_true", help="Dry run: print(submission files only") + parser.add_option('--printOnly', dest='printOnly', default=False, action="store_true", help="Dry run: print submission files only") return parser.parse_args() (opt,args) = get_options() -print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUNNING YIELDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") +print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUNNING YIELDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" def leave(): - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUNNING YIELDS (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUNNING YIELDS (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" sys.exit(1) # Store all opts in orderedDict for submissionTools @@ -76,11 +76,11 @@ def leave(): if( opt.doNOTAG )&( 'NOTAG' not in options['cats'] ): if( containsNOTAG(WSFileNames) ): options['cats'] += ',NOTAG' else: - print(" --> [WARNING] NOTAG dataset not present in input workspace. Skipping NOTAG" ) + print " --> [WARNING] NOTAG dataset not present in input workspace. Skipping NOTAG" options['nCats'] = len(options['cats'].split(",")) -print(" --> Running yields for following cats: %s"%options['cats']) +print " --> Running yields for following cats: %s"%options['cats'] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Make directory to store job scripts and output @@ -88,13 +88,13 @@ def leave(): # Write submission files: style depends on batch system writeSubFiles(options) -print(" --> Finished writing submission scripts") +print " --> Finished writing submission scripts" # Submit scripts to batch system if not options['printOnly']: submitFiles(options) else: - print(" --> Running with printOnly option. Will not submit scripts") + print " --> Running with printOnly option. Will not submit scripts" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave() diff --git a/Datacard/addLineToDatacard.py b/Datacard/addLineToDatacard.py index d36038b9..510a3a28 100644 --- a/Datacard/addLineToDatacard.py +++ b/Datacard/addLineToDatacard.py @@ -13,7 +13,7 @@ def get_options(): (opt,args) = get_options() if not os.path.exists( opt.inputDatacard ): - print(" --> [ERROR] Input datacard %s does not exist."%opt.inputDatacard) + print " --> [ERROR] Input datacard %s does not exist."%opt.inputDatacard sys.exit(1) catLine = '' diff --git a/Datacard/cleanDatacard.py b/Datacard/cleanDatacard.py index e5e65952..f0edc18b 100644 --- a/Datacard/cleanDatacard.py +++ b/Datacard/cleanDatacard.py @@ -55,13 +55,14 @@ def isDiag( proc, cat): #if line.count('pdfindex') and not line.count('_13TeV_2016'): line = line.replace('_13TeV','_13TeV_2016') outFile.write('%s'%line) continue - print(print('Processing line %s'%vals[0])) + print + print 'Processing line %s'%vals[0] line = line.split('lnN')[0] + 'lnN ' for i,effect in enumerate(vals[2:]): proc = procs[i] cat = cats[i] - #print('proc = %s'%proc) - #print('cat = %s'%cat) + #print 'proc = %s'%proc + #print 'cat = %s'%cat if opts.removeNonDiagonal and not isDiag(proc,cat): line += '- ' continue @@ -74,7 +75,7 @@ def isDiag( proc, cat): if val < factorLo or val > factorHi: line += '- ' if opts.verbose: - print('Symmetric: replacing value of %1.3f with -'%val) + print 'Symmetric: replacing value of %1.3f with -'%val else: line += '%1.3f '%val elif len(vals) == 2: @@ -82,24 +83,24 @@ def isDiag( proc, cat): valHi = float(vals[1]) if valLo < factorLo or valLo > factorHi: if opts.verbose: - print('Asymmetric: replacing low value of %1.3f with 1'%valLo) + print 'Asymmetric: replacing low value of %1.3f with 1'%valLo valLo = 1 if valHi <= factorLo or valHi > factorHi: if opts.verbose: - print('Asymmetric: replacing high value of %1.3f with 1'%valHi) + print 'Asymmetric: replacing high value of %1.3f with 1'%valHi valHi = 1 if opts.removeDoubleSided and valHi > 1.000001 and valLo > 1.000001: #line += '%1.3f '%(0.5*(valHi+valLo)) line += '%1.3f '%(max(valHi,valLo)) if opts.verbose: - #print('DoubleSided: replacing %1.3f/%1.3f with %1.3f'%(valLo, valHi, 0.5*(valHi+valLo))) - print('DoubleSided: replacing %1.3f/%1.3f with %1.3f'%(valLo, valHi, max(valHi,valLo))) + #print 'DoubleSided: replacing %1.3f/%1.3f with %1.3f'%(valLo, valHi, 0.5*(valHi+valLo)) + print 'DoubleSided: replacing %1.3f/%1.3f with %1.3f'%(valLo, valHi, max(valHi,valLo)) elif opts.removeDoubleSided and valHi < 0.999999 and valLo < 0.999999: #line += '%1.3f '%(0.5*(valHi+valLo)) line += '%1.3f '%(min(valHi,valLo)) if opts.verbose: - #print('DoubleSided: replacing %1.3f/%1.3f with %1.3f'%(valLo, valHi, 0.5*(valHi+valLo))) - print('DoubleSided: replacing %1.3f/%1.3f with %1.3f'%(valLo, valHi, min(valHi,valLo))) + #print 'DoubleSided: replacing %1.3f/%1.3f with %1.3f'%(valLo, valHi, 0.5*(valHi+valLo)) + print 'DoubleSided: replacing %1.3f/%1.3f with %1.3f'%(valLo, valHi, min(valHi,valLo)) else: line += '%1.3f/%1.3f '%(valLo,valHi) else: diff --git a/Datacard/combineAll.py b/Datacard/combineAll.py new file mode 100644 index 00000000..a6e00b1c --- /dev/null +++ b/Datacard/combineAll.py @@ -0,0 +1,49 @@ +import os.path +import shutil,subprocess + +# MX +# 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2200 2400 2600 2800 3000 3500 4000 bbbb boosted +# 300 400 500 600 700 800 900 1000 1100 1200 1400 1600 1800 2000 bbbb +# 300 350 400 450 500 550 600 650 700 750 800 850 900 950 1000 1100 1200 1400 1600 1800 2000 bbgg +# 240 280 320 360 450 400 500 550 600 700 800 900 1000 1200 1400 1600 1800 2000 2500 3000 bbtt + +# MX_to_combine = [240, 280, 300, 400, 500, 600, 700, 800, 900, 1000 ] +MX_to_combine = [1200] + + +# MY +# 60 70 80 90 100 125 150 200 250 300 350 400 450 500 600 bbbb boosted +# 60 70 80 90 100 125 150 200 250 300 400 500 600 700 800 900 1000 1200 1400 1600 1800 bbbb +# 60 70 80 90 100 150 250 300 400 500 600 650 700 800 900 1000 1100 1200 1300 1400 1600 1800 2000 2200 2400 2600 2800 bbtt +# 90 100 150 200 250 300 400 500 600 700 800 900 1000 1200 1400 1600 1800 bbgg + +MY_to_combine = [ 80,100,300,400,500,600,700,900,1000 ] +# MY_to_combine = [100 ] + + +folders_to_combine = { + "bbbb_hy" : "/eos/user/z/zhjie/CMSSW_10_2_13/src/flashggFinalFit/Datacard/4b_res/datacard_mass_Xxx_Yyy.txt", # scalling all signals to 1 fb X BR (H) + "bbgg_hy" : "/eos/user/z/zhjie/CMSSW_10_2_13/src/flashggFinalFit/Datacard/bbgg/datacard_mass_Xxx_Yyy.txt", # scalling all signals to 1 fb X BR (H) + # "bbtt_hy" : "bbtt/HY/v3/datacard_mass_Xxx_Yyy.txt", # scalling all signals to 1 fb X BR (H) + "bbbb_boosted_hy" : "/eos/user/z/zhjie/CMSSW_10_2_13/src/flashggFinalFit/Datacard/4b_boost/datacard_mass_Xxx_Yyy.txt", # scalling all signals to 1 fb X BR (H) + "bbtt":"/eos/user/z/zhjie/CMSSW_10_2_13/src/flashggFinalFit/Datacard/bbtt/datacard_mass_Xxx_Yyy.txt", +} + +for MX in MX_to_combine : + for MY in MY_to_combine : + command = "combineCards.py " + counter = 0 + for channel in folders_to_combine.keys() : + file_to_test = "%s" % folders_to_combine[channel].replace("xx", str(MX)).replace("yy", str(MY)) + print(file_to_test) + if os.path.isfile(file_to_test) : + command += "%s=%s " % (channel, file_to_test) + counter += 1 + command += " > datacard_mass_X%s_Y%s.txt" % (str(MX), str(MY)) + + if counter > 0 : + print(MX, MY, command) + proc=subprocess.Popen([command],shell=True,stdout=subprocess.PIPE) + out = proc.stdout.read() + + diff --git a/Datacard/makeDatacard.py b/Datacard/makeDatacard.py index 3f824e15..eb0ca75e 100644 --- a/Datacard/makeDatacard.py +++ b/Datacard/makeDatacard.py @@ -1,6 +1,6 @@ # Datacard making script: uses output pkl file of makeYields.py script -print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG DATACARD MAKER RUN II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") +print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG DATACARD MAKER RUN II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " import os, sys import re from optparse import OptionParser @@ -9,7 +9,7 @@ import glob import pickle from collections import OrderedDict as od -from systematics import theory_systematics, experimental_systematics, signal_shape_systematics +from systematics import theory_systematics, experimental_systematics,experimental_systematics_boost, signal_shape_systematics def get_options(): parser = OptionParser() @@ -34,7 +34,7 @@ def get_options(): (opt,args) = get_options() def leave(): - print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG DATACARD MAKER RUN II (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") + print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG DATACARD MAKER RUN II (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " exit(1) STXSMergingScheme, STXSScaleCorrelationScheme = None, None @@ -43,7 +43,7 @@ def leave(): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Concatenate dataframes -print(" --> Loading per category dataframes into single dataframe") +print " --> Loading per category dataframes into single dataframe" print('opt.ext',opt.ext) extStr = "_%s"%opt.ext if opt.ext != '' else '' pkl_files = glob.glob("./yields%s/*.pkl"%extStr) @@ -61,32 +61,45 @@ def leave(): if opt.doSystematics: from tools.calcSystematics import factoryType, addConstantSyst, experimentalSystFactory, theorySystFactory, groupSystematics, envelopeSystematics, renameSyst - print(" ..........................................................................................") + print " .........................................................................................." # Extract factory types of systematics - print(" --> Extracting factory types for systematics") + print " --> Extracting factory types for systematics" experimentalFactoryType = {} theoryFactoryType = {} mask = (~data['cat'].str.contains("NOTAG"))&(data['type']=='sig') - for s in experimental_systematics: - if s['type'] == 'factory': - # Fix for HEM as only in 2018 workspaces - if s['name'] == 'JetHEM': experimentalFactoryType[s['name']] = "a_h" - else: - experimentalFactoryType[s['name']] = factoryType(data[mask],s) + if "boost" in opt.ext: + for s in experimental_systematics_boost: + if s['type'] == 'factory': + # Fix for HEM as only in 2018 workspaces + if s['name'] == 'JetHEM' or s['name'] == 'FJHEM': experimentalFactoryType[s['name']] = "a_h" + else: + experimentalFactoryType[s['name']] = factoryType(data[mask],s) + else: + for s in experimental_systematics: + if s['type'] == 'factory': + # Fix for HEM as only in 2018 workspaces + if s['name'] == 'JetHEM' or s['name'] == 'FJHEM': experimentalFactoryType[s['name']] = "a_h" + else: + experimentalFactoryType[s['name']] = factoryType(data[mask],s) for s in theory_systematics: if s['type'] == 'factory': theoryFactoryType[s['name']] = factoryType(data[mask],s) # Experimental: - print(" --> Adding experimental systematics variations to dataFrame") + print " --> Adding experimental systematics variations to dataFrame" # Add constant systematics to dataFrame - for s in experimental_systematics: - if s['type'] == 'constant': data = addConstantSyst(data,s,opt) - data = experimentalSystFactory(data, experimental_systematics, experimentalFactoryType, opt ) + if "boost" in opt.ext: + for s in experimental_systematics_boost: + if s['type'] == 'constant': data = addConstantSyst(data,s,opt) + data = experimentalSystFactory(data, experimental_systematics_boost, experimentalFactoryType, opt ) + else: + for s in experimental_systematics: + if s['type'] == 'constant': data = addConstantSyst(data,s,opt) + data = experimentalSystFactory(data, experimental_systematics, experimentalFactoryType, opt ) # Theory: - print(" --> Adding theory systematics variations to dataFrame") + print " --> Adding theory systematics variations to dataFrame" # Add constant systematics to dataFrame for s in theory_systematics: if s['type'] == 'constant': data = addConstantSyst(data,s,opt) @@ -102,11 +115,11 @@ def leave(): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Pruning: if process contributes less than 0.1% of yield in analysis category then ignore if opt.prune: - print(" ..........................................................................................") - print(" --> Pruning processes which contribute < %.2f%% of RECO category yield"%(100*opt.pruneThreshold)) + print " .........................................................................................." + print " --> Pruning processes which contribute < %.2f%% of RECO category yield"%(100*opt.pruneThreshold) data['prune'] = 0 if opt.doTrueYield: - print(" --> Using the true yield of process for pruning: N = Product(XS,BR,eff*acc,lumi)") + print " --> Using the true yield of process for pruning: N = Product(XS,BR,eff*acc,lumi)" mask = (data['type']=='sig') # Extract XS*BR using tools.XSBR @@ -134,8 +147,8 @@ def leave(): data.loc[mask,'prune'] = 1 else: - print(" --> Using nominal yield of process (sumEntries) for pruning") - print(data['cat']) + print " --> Using nominal yield of process (sumEntries) for pruning" + print(data) mask = (data['type']=='sig') # Extract per category yields @@ -143,12 +156,8 @@ def leave(): for cat in data.cat.unique(): catYields[cat] = data[(data['cat']==cat)&(data['type']=='sig')].nominal_yield.sum() # Set prune = 1 if < threshold of total cat yield - # mask = (data['nominal_yield'] Saving dataFrame: %s.pkl"%opt.output) + print " .........................................................................................." + print " --> Saving dataFrame: %s.pkl"%opt.output with open("%s.pkl"%opt.output,"wb") as fD: pickle.dump(data,fD) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # WRITE TO .TXT FILE -print(" ..........................................................................................") +print " .........................................................................................." fdataName = "%s.txt"%opt.output -print(" --> Writing to datacard file: %s"%fdataName) +print " --> Writing to datacard file: %s"%fdataName from tools.writeToDatacard import writePreamble, writeProcesses, writeSystematic, writeMCStatUncertainty, writePdfIndex, writeBreak fdata = open(fdataName,"w") if not writePreamble(fdata,opt): - print(" --> [ERROR] in writing preamble. Leaving...") + print " --> [ERROR] in writing preamble. Leaving..." leave() if not writeProcesses(fdata,data,opt): - print(" --> [ERROR] in writing processes. Leaving...") + print " --> [ERROR] in writing processes. Leaving..." leave() if opt.doSystematics: - for syst in experimental_systematics: - if not writeSystematic(fdata,data,syst,opt): - print(" --> [ERROR] in writing systematic %s (experiment). Leaving"%syst['name']) - leave() - writeBreak(fdata) - for syst in theory_systematics: - if not writeSystematic(fdata,data,syst,opt,stxsMergeScheme=STXSMergingScheme,scaleCorrScheme=STXSScaleCorrelationScheme): - print(" --> [ERROR] in writing systematic %s (theory). Leaving"%syst['name']) - leave() - writeBreak(fdata) - for syst in signal_shape_systematics: - if not writeSystematic(fdata,data,syst,opt): - print(" --> [ERROR] in writing systematic %s (signal shape). Leaving"%syst['name']) - leave() + if "boost" in opt.ext: + for syst in experimental_systematics_boost: + if not writeSystematic(fdata,data,syst,opt): + print " --> [ERROR] in writing systematic %s (experiment). Leaving"%syst['name'] + leave() + writeBreak(fdata) + for syst in theory_systematics: + if not writeSystematic(fdata,data,syst,opt,stxsMergeScheme=STXSMergingScheme,scaleCorrScheme=STXSScaleCorrelationScheme): + print " --> [ERROR] in writing systematic %s (theory). Leaving"%syst['name'] + leave() + writeBreak(fdata) + for syst in signal_shape_systematics: + if not writeSystematic(fdata,data,syst,opt): + print " --> [ERROR] in writing systematic %s (signal shape). Leaving"%syst['name'] + leave() + else: + for syst in experimental_systematics: + if not writeSystematic(fdata,data,syst,opt): + print " --> [ERROR] in writing systematic %s (experiment). Leaving"%syst['name'] + leave() + writeBreak(fdata) + for syst in theory_systematics: + if not writeSystematic(fdata,data,syst,opt,stxsMergeScheme=STXSMergingScheme,scaleCorrScheme=STXSScaleCorrelationScheme): + print " --> [ERROR] in writing systematic %s (theory). Leaving"%syst['name'] + leave() + writeBreak(fdata) + for syst in signal_shape_systematics: + if not writeSystematic(fdata,data,syst,opt): + print " --> [ERROR] in writing systematic %s (signal shape). Leaving"%syst['name'] + leave() if opt.doMCStatUncertainty: writeBreak(fdata) if not writeMCStatUncertainty(fdata,data,opt): - print(" --> [ERROR] in writing MC stat uncertainty systematic. Leaving") + print " --> [ERROR] in writing MC stat uncertainty systematic. Leaving" leave() writeBreak(fdata) if not writePdfIndex(fdata,data,opt): - print(" --> [ERROR] in writing pdf indices. Leaving...") + print " --> [ERROR] in writing pdf indices. Leaving..." leave() fdata.close() diff --git a/Datacard/makeYields.py b/Datacard/makeYields.py index 4c66db7b..9b96a286 100644 --- a/Datacard/makeYields.py +++ b/Datacard/makeYields.py @@ -12,14 +12,14 @@ import pickle import math from collections import OrderedDict -from systematics import theory_systematics, experimental_systematics, signal_shape_systematics +from systematics import theory_systematics, experimental_systematics, experimental_systematics_boost,signal_shape_systematics from commonObjects import * from commonTools import * -print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG DATACARD MAKER RUN II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") +print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG DATACARD MAKER RUN II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " def leave(): - print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG DATACARD MAKER RUN II (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") + print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG DATACARD MAKER RUN II (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " exit(1) def get_options(): @@ -48,25 +48,25 @@ def get_options(): # Extract years and inputWSDir inputWSDirMap = od() for i in opt.inputWSDirMap.split(","): - print(" --> Taking %s input workspaces from: %s"%(i.split("=")[0],i.split("=")[1]) ) + print " --> Taking %s input workspaces from: %s"%(i.split("=")[0],i.split("=")[1]) if not os.path.isdir( i.split("=")[1] ): - print(" --> [ERROR] Directory %s does not exist. Leaving..."%i.split("=")[1]) + print " --> [ERROR] Directory %s does not exist. Leaving..."%i.split("=")[1] leave() inputWSDirMap[i.split("=")[0]] = i.split("=")[1] years = inputWSDirMap.keys() procsMap = od() if opt.procs == 'auto': - for y,iWSDir in inputWSDirMap.items(): + for y,iWSDir in inputWSDirMap.iteritems(): WSFileNames = extractWSFileNames(iWSDir) procsMap[y] = extractListOfProcs(WSFileNames) # Require common procs for each year for i,iy in enumerate(years): for j,jy in enumerate(years): if j > i: - if set(procsMap[iy].split(",")) != set(procsMap[jy].split(",")): - print(" --> [ERROR] Mis-match in list of process for %s and %s. Intersection = %s"%(iy,jy,(set(procsMap[jy]).symmetric_difference(set(procsMap[iy]))))) - leave() + if set(procsMap[iy].split(",")) != set(procsMap[jy].split(",")): + print " --> [ERROR] Mis-match in list of process for %s and %s. Intersection = %s"%(iy,jy,(set(procsMap[jy]).symmetric_difference(set(procsMap[iy])))) + leave() # Define list of procs (alphabetically ordered) procs = procsMap[years[0]].split(",") else: procs = opt.procs.split(",") @@ -78,7 +78,7 @@ def get_options(): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # FILL DATAFRAME: all processes -print(" ..........................................................................................") +print " .........................................................................................." # Signal processes for year in years: @@ -97,6 +97,7 @@ def get_options(): else: _cat = "%s_%s"%(opt.cat,year) # Input flashgg ws + print("ccc",inputWSDirMap[year],opt.mass,proc) _inputWSFile = glob.glob("%s/*M%s*_%s.root"%(inputWSDirMap[year],opt.mass,proc))[0] _nominalDataName = "%s_%s_%s_%s"%(_proc_s0,opt.mass,sqrts__,opt.cat) @@ -114,14 +115,16 @@ def get_options(): # Input model ws if opt.cat == "NOTAG": _modelWSFile, _model = '-', '-' else: - _modelWSFile = "%s/CMS-HGG_sigfit_%s_%s.root"%(opt.sigModelWSDir,opt.sigModelExt,_cat) + _modelWSFile = "%s/CMS-HGG_sigfit_%s_%s_%s.root"%(opt.sigModelWSDir,opt.sigModelExt,_cat,proc) _model = "%s_%s:%s_%s"%(outputWSName__,sqrts__,outputWSObjectTitle__,_id) + # _model = "%s_%s:%s_%s"%(outputWSName__,sqrts__,outputWSObjectTitle2d__,_id) ##2D fit + # Extract rate from lumi _rate = float(lumiMap[year])*1000 # Add signal process to dataFrame: - print(" --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc,_cat)) + print " --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc,_cat) data.loc[len(data)] = [year,'sig',_procOriginal,_proc,_proc_s0,_cat,_inputWSFile,_nominalDataName,_modelWSFile,_model,_rate] # Background and data processes @@ -131,15 +134,21 @@ def get_options(): if opt.mergeYears: _cat = opt.cat _modelWSFile = "%s/CMS-HGG_%s_%s.root"%(opt.bkgModelWSDir,opt.bkgModelExt,_cat) + # _model_bkg = "%s:CMS_%s_%s_%s_bkgshape"%(bkgWSName__,decayMode_2d,_cat,sqrts__) ##2D fit _model_bkg = "%s:CMS_%s_%s_%s_bkgshape"%(bkgWSName__,decayMode,_cat,sqrts__) _model_data = "%s:roohist_data_mass_%s"%(bkgWSName__,_cat) + # _model_data = "%s:Data_13TeV_%s"%(bkgWSName__,_cat)##2D fit _proc_s0 = '-' #not needed for data/bkg _inputWSFile = '-' #not needed for data/bkg _nominalDataName = '-' #not needed for data/bkg - print(" --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_bkg,_cat)) - print(" --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_data,_cat)) + + datafile=opt.bkgModelWSDir+"/allData.root" + modelname="tagsDumper/cms_hgg_13TeV:Data_13TeV_%s"%(_cat) + print " --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_bkg,_cat) + print " --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_data,_cat) data.loc[len(data)] = ["merged",'bkg',_proc_bkg,_proc_bkg,'-',_cat,_inputWSFile,_nominalDataName,_modelWSFile,_model_bkg,opt.bkgScaler] data.loc[len(data)] = ["merged",'data',_proc_data,_proc_data,'-',_cat,_inputWSFile,_nominalDataName,_modelWSFile,_model_data,-1] + # data.loc[len(data)] = ["merged",'data',_proc_data,_proc_data,'-',_cat,_inputWSFile,_nominalDataName,datafile,modelname,-1] ##2D fit # Category separate per year else: @@ -152,15 +161,15 @@ def get_options(): _proc_s0 = '-' #not needed for data/bkg _inputWSFile = '-' #not needed for data/bkg _nominalDataName = '-' #not needed for data/bkg - print(" --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_bkg,_cat)) - print(" --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_data,_cat)) + print " --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_bkg,_cat) + print " --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_data,_cat) data.loc[len(data)] = ["year",'bkg',_proc_bkg,_proc_bkg,'-',_cat,_inputWSFile,_nominalDataName,_modelWSFile,_model_bkg,opt.bkgScaler] data.loc[len(data)] = ["year",'data',_proc_data,_proc_data,'-',_cat,_inputWSFile,_nominalDataName,_modelWSFile,_model_data,-1] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Yields: for each signal row in dataFrame extract the yield -print(" ..........................................................................................") +print " .........................................................................................." # * if systematics=True: also extract reweighted yields for each uncertainty source from tools.calcSystematics import factoryType, calcSystYields @@ -175,38 +184,66 @@ def get_options(): # * a_h: anti-symmetric RooDataHist (2 columns in dataframe) # * a_w: anti-symmetric weight in nominal RooDataSet (2 columns in dataframe) # * s_w: symmetric (single) weight in nominal RooDataSet (1 column in dataframe) - experimentalFactoryType = {} - theoryFactoryType = {} - # No experimental systematics for NOTAG - if opt.cat != "NOTAG": - for s in experimental_systematics: - print(s) + if "boost" in opt.ext: + experimentalFactoryType = {} + theoryFactoryType = {} + # No experimental systematics for NOTAG + if opt.cat != "NOTAG": + for s in experimental_systematics_boost: + print(s) + if s['type'] == 'factory': + # Fix for HEM as only in 2018 workspaces + if s['name'] == 'JetHEM': experimentalFactoryType[s['name']] = "a_h" + else: experimentalFactoryType[s['name']] = factoryType(data,s) + if experimentalFactoryType[s['name']] in ["a_w","a_h"]: + data['%s_up_yield'%s['name']] = '-' + data['%s_down_yield'%s['name']] = '-' + else: data['%s_yield'%s['name']] = '-' + for s in theory_systematics: if s['type'] == 'factory': - # Fix for HEM as only in 2018 workspaces - if s['name'] == 'JetHEM': experimentalFactoryType[s['name']] = "a_h" - else: experimentalFactoryType[s['name']] = factoryType(data,s) - if experimentalFactoryType[s['name']] in ["a_w","a_h"]: - data['%s_up_yield'%s['name']] = '-' - data['%s_down_yield'%s['name']] = '-' - else: data['%s_yield'%s['name']] = '-' - for s in theory_systematics: - if s['type'] == 'factory': - theoryFactoryType[s['name']] = factoryType(data,s) - if theoryFactoryType[s['name']] in ["a_w","a_h"]: - data['%s_up_yield'%s['name']] = '-' - data['%s_down_yield'%s['name']] = '-' - if not opt.skipCOWCorr: - data['%s_up_yield_COWCorr'%s['name']] = '-' - data['%s_down_yield_COWCorr'%s['name']] = '-' - else: - data['%s_yield'%s['name']] = '-' - if not opt.skipCOWCorr: data['%s_yield_COWCorr'%s['name']] = '-' + theoryFactoryType[s['name']] = factoryType(data,s) + if theoryFactoryType[s['name']] in ["a_w","a_h"]: + data['%s_up_yield'%s['name']] = '-' + data['%s_down_yield'%s['name']] = '-' + if not opt.skipCOWCorr: + data['%s_up_yield_COWCorr'%s['name']] = '-' + data['%s_down_yield_COWCorr'%s['name']] = '-' + else: + data['%s_yield'%s['name']] = '-' + if not opt.skipCOWCorr: data['%s_yield_COWCorr'%s['name']] = '-' + else: + experimentalFactoryType = {} + theoryFactoryType = {} + # No experimental systematics for NOTAG + if opt.cat != "NOTAG": + for s in experimental_systematics: + print(s) + if s['type'] == 'factory': + # Fix for HEM as only in 2018 workspaces + if s['name'] == 'JetHEM': experimentalFactoryType[s['name']] = "a_h" + else: experimentalFactoryType[s['name']] = factoryType(data,s) + if experimentalFactoryType[s['name']] in ["a_w","a_h"]: + data['%s_up_yield'%s['name']] = '-' + data['%s_down_yield'%s['name']] = '-' + else: data['%s_yield'%s['name']] = '-' + for s in theory_systematics: + if s['type'] == 'factory': + theoryFactoryType[s['name']] = factoryType(data,s) + if theoryFactoryType[s['name']] in ["a_w","a_h"]: + data['%s_up_yield'%s['name']] = '-' + data['%s_down_yield'%s['name']] = '-' + if not opt.skipCOWCorr: + data['%s_up_yield_COWCorr'%s['name']] = '-' + data['%s_down_yield_COWCorr'%s['name']] = '-' + else: + data['%s_yield'%s['name']] = '-' + if not opt.skipCOWCorr: data['%s_yield_COWCorr'%s['name']] = '-' # Loop over signal rows in dataFrame: extract yields (nominal & systematic variations) totalSignalRows = float(data[data['type']=='sig'].shape[0]) for ir,r in data[data['type']=='sig'].iterrows(): - print(" --> Extracting yields: (%s,%s) [%.1f%%]"%(r['proc'],r['cat'],100*(float(ir)/totalSignalRows))) + print " --> Extracting yields: (%s,%s) [%.1f%%]"%(r['proc'],r['cat'],100*(float(ir)/totalSignalRows)) # Open input WS file and extract workspace f_in = ROOT.TFile(r.inputWSFile) @@ -243,23 +280,23 @@ def get_options(): # Skip centralObjectWeight correction as concerns events in acceptance print("contents",contents) experimentalSystYields = calcSystYields(r['nominalDataName'],contents,inputWS,experimentalFactoryType,skipCOWCorr=True,proc=r['proc'],year=r['year'],ignoreWarnings=opt.ignore_warnings) - for s,f in experimentalFactoryType.items(): - if f in ['a_w','a_h']: - for direction in ['up','down']: - data.at[ir,"%s_%s_yield"%(s,direction)] = experimentalSystYields["%s_%s"%(s,direction)] - else: - data.at[ir,"%s_yield"%s] = experimentalSystYields[s] + for s,f in experimentalFactoryType.iteritems(): + if f in ['a_w','a_h']: + for direction in ['up','down']: + data.at[ir,"%s_%s_yield"%(s,direction)] = experimentalSystYields["%s_%s"%(s,direction)] + else: + data.at[ir,"%s_yield"%s] = experimentalSystYields[s] # For theoretical systematics: theorySystYields = calcSystYields(r['nominalDataName'],contents,inputWS,theoryFactoryType,skipCOWCorr=opt.skipCOWCorr,proc=r['proc'],year=r['year'],ignoreWarnings=opt.ignore_warnings) - for s,f in theoryFactoryType.items(): + for s,f in theoryFactoryType.iteritems(): if f in ['a_w','a_h']: - for direction in ['up','down']: - data.at[ir,"%s_%s_yield"%(s,direction)] = theorySystYields["%s_%s"%(s,direction)] - if not opt.skipCOWCorr: data.at[ir,"%s_%s_yield_COWCorr"%(s,direction)] = theorySystYields["%s_%s_COWCorr"%(s,direction)] + for direction in ['up','down']: + data.at[ir,"%s_%s_yield"%(s,direction)] = theorySystYields["%s_%s"%(s,direction)] + if not opt.skipCOWCorr: data.at[ir,"%s_%s_yield_COWCorr"%(s,direction)] = theorySystYields["%s_%s_COWCorr"%(s,direction)] else: - data.at[ir,"%s_yield"%s] = theorySystYields[s] - if not opt.skipCOWCorr: data.at[ir,"%s_yield_COWCorr"%s] = theorySystYields["%s_COWCorr"%s] + data.at[ir,"%s_yield"%s] = theorySystYields[s] + if not opt.skipCOWCorr: data.at[ir,"%s_yield_COWCorr"%s] = theorySystYields["%s_COWCorr"%s] # Remove the workspace and file from heap inputWS.Delete() @@ -267,8 +304,8 @@ def get_options(): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # SAVE YIELDS DATAFRAME -print(" ..........................................................................................") +print " .........................................................................................." extStr = "_%s"%opt.ext if opt.ext != '' else '' -print(" --> Saving yields dataframe: ./yields%s/%s.pkl"%(extStr,opt.cat)) +print " --> Saving yields dataframe: ./yields%s/%s.pkl"%(extStr,opt.cat) if not os.path.isdir("./yields%s"%extStr): os.system("mkdir ./yields%s"%extStr) with open("./yields%s/%s.pkl"%(extStr,opt.cat),"wb") as fD: pickle.dump(data,fD) \ No newline at end of file diff --git a/Datacard/makeYields_old.py b/Datacard/makeYields_old.py index a2794d67..1c60753b 100644 --- a/Datacard/makeYields_old.py +++ b/Datacard/makeYields_old.py @@ -17,9 +17,9 @@ from commonObjects import * from commonTools import * -print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG DATACARD MAKER RUN II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") +print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG DATACARD MAKER RUN II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " def leave(): - print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG DATACARD MAKER RUN II (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") + print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG DATACARD MAKER RUN II (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " exit(1) def get_options(): @@ -48,9 +48,9 @@ def get_options(): # Extract years and inputWSDir inputWSDirMap = od() for i in opt.inputWSDirMap.split(","): - print(" --> Taking %s input workspaces from: %s"%(i.split("=")[0],i.split("=")[1]) ) + print " --> Taking %s input workspaces from: %s"%(i.split("=")[0],i.split("=")[1]) if not os.path.isdir( i.split("=")[1] ): - print(" --> [ERROR] Directory %s does not exist. Leaving..."%i.split("=")[1]) + print " --> [ERROR] Directory %s does not exist. Leaving..."%i.split("=")[1] leave() inputWSDirMap[i.split("=")[0]] = i.split("=")[1] years = inputWSDirMap.keys() @@ -65,7 +65,7 @@ def get_options(): for j,jy in enumerate(years): if j > i: if set(procsMap[iy].split(",")) != set(procsMap[jy].split(",")): - print(" --> [ERROR] Mis-match in list of process for %s and %s. Intersection = %s"%(iy,jy,(set(procsMap[jy]).symmetric_difference(set(procsMap[iy]))))) + print " --> [ERROR] Mis-match in list of process for %s and %s. Intersection = %s"%(iy,jy,(set(procsMap[jy]).symmetric_difference(set(procsMap[iy])))) leave() # Define list of procs (alphabetically ordered) procs = procsMap[years[0]].split(",") @@ -78,7 +78,7 @@ def get_options(): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # FILL DATAFRAME: all processes -print(" ..........................................................................................") +print " .........................................................................................." # Signal processes for year in years: @@ -123,7 +123,7 @@ def get_options(): _rate = float(lumiMap[year])*1000 # Add signal process to dataFrame: - print(" --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc,_cat)) + print " --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc,_cat) data.loc[len(data)] = [year,'sig',_procOriginal,_proc,_proc_s0,_cat,_inputWSFile,_nominalDataName,_modelWSFile,_model,_rate] # Background and data processes @@ -138,8 +138,8 @@ def get_options(): _proc_s0 = '-' #not needed for data/bkg _inputWSFile = '-' #not needed for data/bkg _nominalDataName = '-' #not needed for data/bkg - print(" --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_bkg,_cat)) - print(" --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_data,_cat)) + print " --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_bkg,_cat) + print " --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_data,_cat) data.loc[len(data)] = ["merged",'bkg',_proc_bkg,_proc_bkg,'-',_cat,_inputWSFile,_nominalDataName,_modelWSFile,_model_bkg,opt.bkgScaler] data.loc[len(data)] = ["merged",'data',_proc_data,_proc_data,'-',_cat,_inputWSFile,_nominalDataName,_modelWSFile,_model_data,-1] @@ -154,15 +154,15 @@ def get_options(): _proc_s0 = '-' #not needed for data/bkg _inputWSFile = '-' #not needed for data/bkg _nominalDataName = '-' #not needed for data/bkg - print(" --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_bkg,_cat)) - print(" --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_data,_cat)) + print " --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_bkg,_cat) + print " --> Adding to dataFrame: (proc,cat) = (%s,%s)"%(_proc_data,_cat) data.loc[len(data)] = ["year",'bkg',_proc_bkg,_proc_bkg,'-',_cat,_inputWSFile,_nominalDataName,_modelWSFile,_model_bkg,opt.bkgScaler] data.loc[len(data)] = ["year",'data',_proc_data,_proc_data,'-',_cat,_inputWSFile,_nominalDataName,_modelWSFile,_model_data,-1] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Yields: for each signal row in dataFrame extract the yield -print(" ..........................................................................................") +print " .........................................................................................." # * if systematics=True: also extract reweighted yields for each uncertainty source from tools.calcSystematics import factoryType, calcSystYields @@ -207,7 +207,7 @@ def get_options(): totalSignalRows = float(data[data['type']=='sig'].shape[0]) for ir,r in data[data['type']=='sig'].iterrows(): - print(" --> Extracting yields: (%s,%s) [%.1f%%]"%(r['proc'],r['cat'],100*(float(ir)/totalSignalRows))) + print " --> Extracting yields: (%s,%s) [%.1f%%]"%(r['proc'],r['cat'],100*(float(ir)/totalSignalRows)) # Open input WS file and extract workspace f_in = ROOT.TFile(r.inputWSFile) @@ -266,8 +266,8 @@ def get_options(): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # SAVE YIELDS DATAFRAME -print(" ..........................................................................................") +print " .........................................................................................." extStr = "_%s"%opt.ext if opt.ext != '' else '' -print(" --> Saving yields dataframe: ./yields%s/%s.pkl"%(extStr,opt.cat)) +print " --> Saving yields dataframe: ./yields%s/%s.pkl"%(extStr,opt.cat) if not os.path.isdir("./yields%s"%extStr): os.system("mkdir ./yields%s"%extStr) with open("./yields%s/%s.pkl"%(extStr,opt.cat),"wb") as fD: pickle.dump(data,fD) diff --git a/Datacard/systematics.py b/Datacard/systematics.py index eaf69a1c..a2173589 100644 --- a/Datacard/systematics.py +++ b/Datacard/systematics.py @@ -23,13 +23,11 @@ # Normalisation uncertainties: enter interpretations {'name':'BR_hgg','title':'BR_hgg','type':'constant','prior':'lnN','correlateAcrossYears':1,'value':"0.98/1.021"}, # New scheme for ggH stage 1.2 - - - {'name':'QCDscale_ggHH','title':'QCDscale','type':'constant','prior':'lnN','correlateAcrossYears':1,'value':"0.950/1.022"}, - {'name':'pdf_Higgs_ggHH','title':'pdf_Higgs','type':'constant','prior':'lnN','correlateAcrossYears':1,'value':"1.030"}, - {'name':'alphaS_ggH','title':'alphaS','type':'constant','prior':'lnN','correlateAcrossYears':1,'value':"1.026"} - - + + # New scheme for ggH stage 1.2 + {'name':'QCDscale_ttH','title':'QCDscale_ttH','type':'constant','prior':'lnN','correlateAcrossYears':1,'value':"0.908/1.058"}, + {'name':'pdf_Higgs_ttH','title':'pdf_Higgs_ttH','type':'constant','prior':'lnN','correlateAcrossYears':1,'value':"1.030"}, + {'name':'alphaS_ttH','title':'alphaS_ttH','type':'constant','prior':'lnN','correlateAcrossYears':1,'value':"1.020"} ] # PDF weight # for i in range(1,60): theory_systematics.append( {'name':'pdfWeight_%g'%i, 'title':'CMS_hgg_pdfWeight_%g'%i, 'type':'factory','prior':'lnN','correlateAcrossYears':1,'tiers':['shape']} ) @@ -50,11 +48,9 @@ {'name':'photon_presel_sf_Diphoton_Photon_','title':'CMS_hgg_PreselSF','type':'factory','prior':'lnN','correlateAcrossYears':0}, {'name':'electron_veto_sf_Diphoton_Photon_','title':'CMS_hgg_electronVetoSF','type':'factory','prior':'lnN','correlateAcrossYears':0}, {'name':'trigger_sf_','title':'CMS_hgg_TriggerWeight','type':'factory','prior':'lnN','correlateAcrossYears':0}, - {'name':'L1_prefiring_sf_','title':'CMS_hgg_prefire','type':'factory','prior':'lnN','correlateAcrossYears':0}, - + {'name':'puid_','title':'PuJetID','type':'factory','prior':'lnN','correlateAcrossYears':0}, {'name':'puWeight_','title':'CMS_hgg_puWeight','type':'factory','prior':'lnN','correlateAcrossYears':0}, - {'name':'btag_deepjet_sf_SelectedbJet_jes_','title':'CMS_hgg_btag_jes','type':'factory','prior':'lnN','correlateAcrossYears':1}, {'name':'btag_deepjet_sf_SelectedbJet_lf_','title':'CMS_hgg_btag_lf','type':'factory','prior':'lnN','correlateAcrossYears':1}, {'name':'btag_deepjet_sf_SelectedbJet_hfstats1_','title':'CMS_hgg_btag_hfstats1','type':'factory','prior':'lnN','correlateAcrossYears':0}, @@ -64,18 +60,82 @@ {'name':'btag_deepjet_sf_SelectedbJet_hf_','title':'CMS_hgg_btag_hf','type':'factory','prior':'lnN','correlateAcrossYears':1}, {'name':'btag_deepjet_sf_SelectedbJet_lfstats1_','title':'CMS_hgg_btag_lfstats1','type':'factory','prior':'lnN','correlateAcrossYears':0}, {'name':'btag_deepjet_sf_SelectedbJet_lfstats2_','title':'CMS_hgg_btag_lfstats2','type':'factory','prior':'lnN','correlateAcrossYears':0}, - - # {'name':'PNet_','title':'CMS_hgg_ParticalNet','type':'factory','prior':'lnN','correlateAcrossYears':0}, - - {'name':'JES','title':'CMS_scale_j','type':'factory','prior':'lnN','correlateAcrossYears':0}, - {'name':'JER','title':'CMS_res_j','type':'factory','prior':'lnN','correlateAcrossYears':0}, - {'name':'FJER','title':'CMS_res_fj','type':'factory','prior':'lnN','correlateAcrossYears':0}, - {'name':'FJES','title':'CMS_scale_fj','type':'factory','prior':'lnN','correlateAcrossYears':0}, - {'name':'FJHEM','title':'CMS_FatjetHEM','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'JER','title':'CMS_JER','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'JES','title':'CMS_JES','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'JESAbsoluteMPFBias','title':'CMS_JESAbsoluteMPFBias','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESAbsoluteScale','title':'CMS_JESAbsoluteScale','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESAbsoluteStat','title':'CMS_JESAbsoluteStat','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'JESFlavorQCD','title':'CMS_JESFlavorQCD','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESFragmentation','title':'CMS_JESFragmentation','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESPileUpDataMC','title':'CMS_JESPileUpDataMC','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESPileUpPtBB','title':'CMS_JESPileUpPtBB','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESPileUpPtEC1','title':'CMS_JESPileUpPtEC1','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'JESPileUpPtEC2','title':'CMS_JESPileUpPtEC2','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'JESPileUpPtHF','title':'CMS_JESPileUpPtHF','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESPileUpPtRef','title':'CMS_JESPileUpPtRef','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESRelativeBal','title':'CMS_JESRelativeBal','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESRelativeFSR','title':'CMS_JESRelativeFSR','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESRelativeJEREC1','title':'CMS_JESRelativeJEREC1','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESRelativeJEREC2','title':'CMS_JESRelativeJEREC2','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESRelativeJERHF','title':'CMS_JESRelativeJERHF','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESRelativePtBB','title':'CMS_JESRelativePtBB','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESRelativePtEC1','title':'CMS_JESRelativePtEC1','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'JESRelativePtEC2','title':'CMS_JESRelativePtEC2','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'JESRelativePtHF','title':'CMS_JESRelativePtHF','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESRelativeSample','title':'CMS_JESRelativeSample','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'JESRelativeStatEC','title':'CMS_JESRelativeStatEC','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'JESRelativeStatFSR','title':'CMS_JESRelativeStatFSR','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'JESRelativeStatHF','title':'CMS_JESRelativeStatHF','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'JESSinglePionECAL','title':'CMS_JESSinglePionECAL','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESSinglePionHCAL','title':'CMS_JESSinglePionHCAL','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'JESTimePtEta','title':'CMS_JESTimePtEta','type':'factory','prior':'lnN','correlateAcrossYears':0}, {'name':'JetHEM','title':'CMS_JetHEM','type':'factory','prior':'lnN','correlateAcrossYears':0} ] - +experimental_systematics_boost=[ + # Updated luminosity partial-correlation scheme: 13/5/21 (recommended simplified nuisances) + {'name':'lumi_13TeV_Uncorrelated','title':'lumi_13TeV_Uncorrelated','type':'constant','prior':'lnN','correlateAcrossYears':0,'value':{'2016pre':'1.010','2016post':'1.010','2017':'1.020','2018':'1.015'}}, + {'name':'lumi_13TeV_Correlated','title':'lumi_13TeV_Correlated','type':'constant','prior':'lnN','correlateAcrossYears':-1,'value':{'2016pre':'1.006','2016post':'1.006','2017':'1.009','2018':'1.020'}}, + {'name':'lumi_13TeV_Correlated_1718','title':'lumi_13TeV_Correlated_1718','type':'constant','prior':'lnN','correlateAcrossYears':-1,'value':{'2016pre':'-','2016post':'-','2017':'1.006','2018':'1.002'}}, + {'name':'photon_id_sf_Diphoton_Photon_','title':'CMS_hgg_MVASF','type':'factory','prior':'lnN','correlateAcrossYears':0}, + {'name':'photon_presel_sf_Diphoton_Photon_','title':'CMS_hgg_PreselSF','type':'factory','prior':'lnN','correlateAcrossYears':0}, + {'name':'electron_veto_sf_Diphoton_Photon_','title':'CMS_hgg_electronVetoSF','type':'factory','prior':'lnN','correlateAcrossYears':0}, + {'name':'trigger_sf_','title':'CMS_hgg_TriggerWeight','type':'factory','prior':'lnN','correlateAcrossYears':0}, + {'name':'L1_prefiring_sf_','title':'CMS_hgg_prefire','type':'factory','prior':'lnN','correlateAcrossYears':0}, + {'name':'puWeight_','title':'CMS_hgg_puWeight','type':'factory','prior':'lnN','correlateAcrossYears':0}, + {'name':'PNet_','title':'CMS_hgg_ParticalNet','type':'factory','prior':'lnN','correlateAcrossYears':0}, + {'name':'FJER','title':'CMS_FJER','type':'factory','prior':'lnN','correlateAcrossYears':0}, + {'name':'FJES','title':'CMS_FJES','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'FJESAbsoluteMPFBias','title':'CMS_FJESAbsoluteMPFBias','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESAbsoluteScale','title':'CMS_FJESAbsoluteScale','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESAbsoluteStat','title':'CMS_FJESAbsoluteStat','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'FJESFlavorQCD','title':'CMS_FJESFlavorQCD','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESFragmentation','title':'CMS_FJESFragmentation','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESPileUpDataMC','title':'CMS_FJESPileUpDataMC','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESPileUpPtBB','title':'CMS_FJESPileUpPtBB','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESPileUpPtEC1','title':'CMS_FJESPileUpPtEC1','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'FJESPileUpPtEC2','title':'CMS_FJESPileUpPtEC2','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'FJESPileUpPtHF','title':'CMS_FJESPileUpPtHF','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESPileUpPtRef','title':'CMS_FJESPileUpPtRef','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESRelativeBal','title':'CMS_FJESRelativeBal','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESRelativeFSR','title':'CMS_FJESRelativeFSR','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESRelativeJEREC1','title':'CMS_FJESRelativeJEREC1','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESRelativeJEREC2','title':'CMS_FJESRelativeJEREC2','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESRelativeJERHF','title':'CMS_FJESRelativeJERHF','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESRelativePtBB','title':'CMS_FJESRelativePtBB','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESRelativePtEC1','title':'CMS_FJESRelativePtEC1','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'FJESRelativePtEC2','title':'CMS_FJESRelativePtEC2','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'FJESRelativePtHF','title':'CMS_FJESRelativePtHF','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESRelativeSample','title':'CMS_FJESRelativeSample','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'FJESRelativeStatEC','title':'CMS_FJESRelativeStatEC','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'FJESRelativeStatFSR','title':'CMS_FJESRelativeStatFSR','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'FJESRelativeStatHF','title':'CMS_FJESRelativeStatHF','type':'factory','prior':'lnN','correlateAcrossYears':0}, + # {'name':'FJESSinglePionECAL','title':'CMS_FJESSinglePionECAL','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESSinglePionHCAL','title':'CMS_FJESSinglePionHCAL','type':'factory','prior':'lnN','correlateAcrossYears':1}, + # {'name':'FJESTimePtEta','title':'CMS_FJESTimePtEta','type':'factory','prior':'lnN','correlateAcrossYears':0}, + {'name':'FJHEM','title':'CMS_FatjetHEM','type':'factory','prior':'lnN','correlateAcrossYears':0}, + + ] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Shape nuisances: effect encoded in signal model diff --git a/Datacard/tools/STXS_tools.py b/Datacard/tools/STXS_tools.py index 26289661..fb5643de 100644 --- a/Datacard/tools/STXS_tools.py +++ b/Datacard/tools/STXS_tools.py @@ -104,3 +104,4 @@ STXSScaleCorrelationScheme['ttH_scale_highpt'] = ['ttH_PTH_GT300'] STXSScaleCorrelationScheme['tH_scale'] = ['tHq','tHW'] STXSScaleCorrelationScheme['bbH_scale'] = ['bbH'] +STXSScaleCorrelationScheme['ttH'] = ['ttH'] diff --git a/Datacard/tools/calcSystematics.py b/Datacard/tools/calcSystematics.py index 6f157dc7..e5aeb06a 100644 --- a/Datacard/tools/calcSystematics.py +++ b/Datacard/tools/calcSystematics.py @@ -14,7 +14,7 @@ def addConstantSyst(sd,_syst,options): if "json" in _syst['value']: fromJson = True with open( _syst['value'], "r" ) as jsonfile: uval = json.load(jsonfile) - + # Add column to dataFrame with default value if _syst['correlateAcrossYears'] == 1: sd[_syst['name']] = '-' @@ -22,21 +22,34 @@ def addConstantSyst(sd,_syst,options): sd.loc[(sd['type']=='sig'),_syst['name']] = sd[(sd['type']=='sig')].apply(lambda x: getValueFromJson(x,uval,_syst['name']), axis=1) else: # If signal and not NOTAG then set value - sd.loc[(sd['type']=='sig')&(~sd['cat'].str.contains("NOTAG")), _syst['name']] = _syst['value'] - + if "ttH" in _syst['name']: + + mask = (sd['type']=='sig')&(~sd['cat'].str.contains("NOTAG"))&~(sd['proc'].str.contains('gghh')) + else: + + mask = (sd['type']=='sig')&(~sd['cat'].str.contains("NOTAG")) + sd.loc[mask, _syst['name']] = _syst['value'] +# sd.loc[(sd['type']=='sig')&(~sd['cat'].str.contains("NOTAG")), _syst['name']] = _syst['value'] # Partial correlation elif _syst['correlateAcrossYears'] == -1: sd[_syst['name']] = '-' # Loop over years and set value for each year for year in options.years.split(","): - mask = (sd['type']=='sig')&(~sd['cat'].str.contains("NOTAG"))&(sd['year']==year) + if "ttH" in _syst['name']: + mask = (sd['type']=='sig')&(~sd['cat'].str.contains("NOTAG"))&(sd['year']==year)&~(sd['proc'].str.contains('gghh')) + else: + mask = (sd['type']=='sig')&(~sd['cat'].str.contains("NOTAG"))&(sd['year']==year) sd.loc[mask,_syst['name']] = _syst['value'][year] # If not correlate across years then create separate columns for each year and fill separately else: for year in options.years.split(","): sd["%s_%s"%(_syst['name'],year)] = '-' - sd.loc[(sd['type']=='sig')&(sd['year']==year)&(~sd['cat'].str.contains("NOTAG")), "%s_%s"%(_syst['name'],year)] = _syst['value'][year] + if "ttH" in _syst['name']: + mask = (sd['type']=='sig')&(sd['year']==year)&(~sd['cat'].str.contains("NOTAG"))&(~sd['proc'].str.contains('gghh')) + else: + mask = (sd['type']=='sig')&(sd['year']==year)&(~sd['cat'].str.contains("NOTAG")) + sd.loc[mask, "%s_%s"%(_syst['name'],year)] = _syst['value'][year] return sd @@ -76,7 +89,7 @@ def factoryType(d,s): if nWeights == 2: return "a_w" elif nWeights == 1: return "s_w" else: - print(" --> [ERROR] systematic %s: > 2 weights in workspace. Leaving..."%s['name']) + print " --> [ERROR] systematic %s: > 2 weights in workspace. Leaving..."%s['name'] sys.exit(1) # Check if RooDataHist exists for syst @@ -91,7 +104,7 @@ def factoryType(d,s): f.Close() # If never found: - print(" --> [ERROR] systematic %s: cannot extract type in factoryType function. Doesn't match requirement for (anti)-symmetric weights or anti-symmetric histograms. Leaving...") + print " --> [ERROR] systematic %s: cannot extract type in factoryType function. Doesn't match requirement for (anti)-symmetric weights or anti-symmetric histograms. Leaving..." sys.exit(1) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -104,7 +117,7 @@ def calcSystYields(_nominalDataName,_nominalDataContents,_inputWS,_systFactoryTy # Define dictionary to store systematic yield counters systYields = {} # Loop over systematics and create counter in dict - for s, f in _systFactoryTypes.items(): + for s, f in _systFactoryTypes.iteritems(): if f in ["a_h","a_w"]: for direction in ['up','down']: systYields["%s_%s"%(s,direction)] = 0 @@ -117,21 +130,21 @@ def calcSystYields(_nominalDataName,_nominalDataContents,_inputWS,_systFactoryTy # For systematics stored as weights (a_w,s_w) in nominal RooDataSets # Extract nominal dataset data_nominal = _inputWS.data(_nominalDataName) - # CHECK: is weight in contents: if not then add syst to systToSkip container + print(warning) + # CHECK: is weight in contents: if not then add syst to systToSkip container + print warning systToSkip = [] - for s,f in _systFactoryTypes.items(): + for s,f in _systFactoryTypes.iteritems(): print("SSSSSS",s) if f == "a_h": continue elif f == "a_w": if( "%sUp01sigma"%s not in _nominalDataContents )|( "%sDown01sigma"%s not in _nominalDataContents ): - systToSkip.append(s) - print(" --> [%s] Weight in nominal RooDataSet for systematic (%s) does not exist for (%s,%s). %s"%(errMessage,s,proc,year,errString)) - if not ignoreWarnings: sys.exit(1) + systToSkip.append(s) + print " --> [%s] Weight in nominal RooDataSet for systematic (%s) does not exist for (%s,%s). %s"%(errMessage,s,proc,year,errString) + if not ignoreWarnings: sys.exit(1) else: if s not in _nominalDataContents: - systToSkip.append(s) - print(" --> [%s] Weight in nominal RooDataSet for systematic (%s) does not exist for (%s,%s). %s"%(errMessage,s,proc,year,errString)) - if not ignoreWarnings: sys.exit(1) + systToSkip.append(s) + print " --> [%s] Weight in nominal RooDataSet for systematic (%s) does not exist for (%s,%s). %s"%(errMessage,s,proc,year,errString) + if not ignoreWarnings: sys.exit(1) # Loop over events and extract reweighted yields for i in range(0,data_nominal.numEntries()): @@ -140,7 +153,7 @@ def calcSystYields(_nominalDataName,_nominalDataContents,_inputWS,_systFactoryTy f_COWCorr = p.getRealValue("centralObjectWeight") if "centralObjectWeight" in _nominalDataContents else 1. f_NNLOPS = abs(p.getRealValue("NNLOPSweight")) if "NNLOPSweight" in _nominalDataContents else 1. # Loop over systematics: - for s, f in _systFactoryTypes.items(): + for s, f in _systFactoryTypes.iteritems(): if f == "a_h": continue @@ -171,8 +184,8 @@ def calcSystYields(_nominalDataName,_nominalDataContents,_inputWS,_systFactoryTy systYields["%s_down"%s] += w_down if not skipCOWCorr: if f_COWCorr != 0: - systYields["%s_up_COWCorr"%s] += w_up*(f_NNLOPS/f_COWCorr) - systYields["%s_down_COWCorr"%s] += w_down*(f_NNLOPS/f_COWCorr) + systYields["%s_up_COWCorr"%s] += w_up*(f_NNLOPS/f_COWCorr) + systYields["%s_down_COWCorr"%s] += w_down*(f_NNLOPS/f_COWCorr) # If symmetric weights else: @@ -188,32 +201,32 @@ def calcSystYields(_nominalDataName,_nominalDataContents,_inputWS,_systFactoryTy elif "alphaSWeight" in s: centralWeightStr = "scaleWeight_0" elif "pdfWeight" in s: centralWeightStr = "pdfWeight_0" else: centralWeightStr = "centralObjectWeight" - f_central = p.getRealValue(centralWeightStr) if centralWeightStr in _nominalDataContents else 1. - f = p.getRealValue(s) + f_central = p.getRealValue(centralWeightStr) if centralWeightStr in _nominalDataContents else 1. + f = p.getRealValue(s) # Checks: # 1) if both central weight and shifted weight are 0 then add nominal weight if( f_central == 0 )&( f == 0 ): systYields[s] += w if not skipCOWCorr: - if f_COWCorr != 0: - systYields["%s_COWCorr"%s] += w*(f_NNLOPS/f_COWCorr) - # 2) only central weight is zero then skip event - elif f_central == 0: continue - else: - # Add weights to counter - systYields[s] += w*(f/f_central) - if not skipCOWCorr: + if f_COWCorr != 0: + systYields["%s_COWCorr"%s] += w*(f_NNLOPS/f_COWCorr) + # 2) only central weight is zero then skip event + elif f_central == 0: continue + else: + # Add weights to counter + systYields[s] += w*(f/f_central) + if not skipCOWCorr: if f_COWCorr != 0: systYields["%s_COWCorr"%s] += w*(f_NNLOPS/f_COWCorr)*(f/f_central) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # For systematics stored as separate RooDataHists - for s, f in _systFactoryTypes.items(): + for s, f in _systFactoryTypes.iteritems(): if f == "a_h": data_hist_up, data_hist_down = _inputWS.data("%s_%sUp01sigma"%(_nominalDataName,s)), _inputWS.data("%s_%sDown01sigma"%(_nominalDataName,s)) - # Check if datasets exist: if not print(warning message and set to nominal weight) + # Check if datasets exist: if not print warning message and set to nominal weight if( data_hist_up == None )|( data_hist_down == None ): - print(" --> [%s] RooDataHist for systematic (%s) does not exist for (%s,%s). %s"%(errMessage,s,proc,year,errString)) + print " --> [%s] RooDataHist for systematic (%s) does not exist for (%s,%s). %s"%(errMessage,s,proc,year,errString) if not ignoreWarnings: sys.exit(1) systYields["%s_up"%s] = data_nominal.sumEntries() systYields["%s_down"%s] = data_nominal.sumEntries() @@ -254,7 +267,7 @@ def experimentalSystFactory(d,systs,ftype,options,_removal=False): # Remove yield columns from dataFrame if _removal: if f in ['a_h','a_w']: - for direction in ['up','down']: d.drop(['%s_%s_yield'%(s['name'],direction)], axis=1, inplace=True) + for direction in ['up','down']: d.drop(['%s_%s_yield'%(s['name'],direction)], axis=1, inplace=True) else: d.drop(['%s_yield'%s['name']], axis=1, inplace=True) return d @@ -272,12 +285,12 @@ def theorySystFactory(d,systs,ftype,options,stxsMergeScheme=None,_removal=False) mask = (d['proc_s0']==proc_s0)&(d['year']==year) d.loc[mask,'proc_s0_nominal_yield'] = d[mask]['nominal_yield%s'%corrExt].sum() for s in systs: - if s['type'] == 'constant': continue - f = ftype[s['name']] - if f in ['a_w','a_h']: - for direction in ['up','down']: + if s['type'] == 'constant': continue + f = ftype[s['name']] + if f in ['a_w','a_h']: + for direction in ['up','down']: d.loc[mask,'proc_s0_%s_%s_yield'%(s['name'],direction)] = d[mask]['%s_%s_yield%s'%(s['name'],direction,corrExt)].sum() - else: + else: d.loc[mask,'proc_s0_%s_yield'%s['name']] = d[mask]['%s_yield%s'%(s['name'],corrExt)].sum() # Calculate the per-STXS bin (per-year already in proc name) yield variations: add as column in dataFrame for proc in d[d['type']=='sig'].proc.unique(): @@ -291,15 +304,15 @@ def theorySystFactory(d,systs,ftype,options,stxsMergeScheme=None,_removal=False) for direction in ['up','down']: d.loc[mask,'proc_%s_%s_yield'%(s['name'],direction)] = d[mask]['%s_%s_yield%s'%(s['name'],direction,corrExt)].sum() else: - d.loc[mask,'proc_%s_yield'%s['name']] = d[mask]['%s_yield%s'%(s['name'],corrExt)].sum() + d.loc[mask,'proc_%s_yield'%s['name']] = d[mask]['%s_yield%s'%(s['name'],corrExt)].sum() # For merging STXS bins in parameter scheme: if options.doSTXSMerging: - for mergeName, mergeBins in stxsMergeScheme.items(): + for mergeName, mergeBins in stxsMergeScheme.iteritems(): for year in options.years.split(","): mBins = [] # add full name (inc year and and decay) for mb in mergeBins: mBins.append("%s_%s_hgg"%(mb,year)) - mask = (d['type']=='sig')&(d.apply(lambda x: x['proc'] in mBins, axis=1)) + mask = (d['type']=='sig')&(d.apply(lambda x: x['proc'] in mBins, axis=1)) d.loc[mask,'merge_%s_nominal_yield'%mergeName] = d[mask]['nominal_yield%s'%corrExt].sum() # Loop over systematics for s in systs: @@ -339,12 +352,12 @@ def theorySystFactory(d,systs,ftype,options,stxsMergeScheme=None,_removal=False) if options.doSTXSMerging: for mergeName in stxsMergeScheme: for s in systs: - if s['type'] == 'constant': continue + if s['type'] == 'constant': continue elif 'mnorm' not in s['tiers']: continue - for year in options.years.split(","): - # Remove NaN entries and require specific year - mask = (d['merge_%s_nominal_yield'%mergeName]==d['merge_%s_nominal_yield'%mergeName])&(d['year']==year)&(d['nominal_yield']!=0) - d.loc[mask,"%s_%s_mnorm"%(s['name'],mergeName)] = d[mask].apply(lambda x: compareYield(x,f,s['name'],mode='mnorm',mname=mergeName), axis=1) + for year in options.years.split(","): + # Remove NaN entries and require specific year + mask = (d['merge_%s_nominal_yield'%mergeName]==d['merge_%s_nominal_yield'%mergeName])&(d['year']==year)&(d['nominal_yield']!=0) + d.loc[mask,"%s_%s_mnorm"%(s['name'],mergeName)] = d[mask].apply(lambda x: compareYield(x,f,s['name'],mode='mnorm',mname=mergeName), axis=1) # Removal: remove yield columns from dataFrame if _removal: @@ -357,12 +370,12 @@ def theorySystFactory(d,systs,ftype,options,stxsMergeScheme=None,_removal=False) # Extract factory type f = ftype[s['name']] if f in ['a_h','a_w']: - for direction in ['up','down']: - for id_ in ids_: - d.drop(['%s%s_%s_yield'%(id_,s['name'],direction)], axis=1, inplace=True) + for direction in ['up','down']: + for id_ in ids_: + d.drop(['%s%s_%s_yield'%(id_,s['name'],direction)], axis=1, inplace=True) else: - for id_ in ids_: - d.drop(['%s%s_yield'%(id_,s['name'])], axis=1, inplace=True) + for id_ in ids_: + d.drop(['%s%s_yield'%(id_,s['name'])], axis=1, inplace=True) # Remove also nominal yields for all combinations ids_.remove('') @@ -385,7 +398,7 @@ def compareYield(row,factoryType,sname,mode='default',mname=None): else: return [1.] if factoryType in ["a_w","a_h"]: for direction in ['up','down']: - if row["proc_%s_%s_yield"%(sname,direction)] == 0: return [1.,1.] + if row["proc_%s_%s_yield"%(sname,direction)] == 0: return [1.,1.] else: if row["proc_%s_yield"%sname] == 0: return [1.] @@ -444,7 +457,7 @@ def compareYield(row,factoryType,sname,mode='default',mname=None): return [inc] else: - print(" --> [ERROR] theory systematic tier %s is not supported. Leaving"%mode) + print " --> [ERROR] theory systematic tier %s is not supported. Leaving"%mode sys.exit(1) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -463,7 +476,7 @@ def groupSystematics(d,systs,options,prefix="scaleWeight",groupings=[],stxsMerge skipGroup = False if( s0 == None )|( s1 == None ): - print(" --> [WARNING] No systematic exists for prefix %s and group %s. Skipping"%(prefix,gr)) + print " --> [WARNING] No systematic exists for prefix %s and group %s. Skipping"%(prefix,gr) skipGroup = True if skipGroup: continue @@ -511,7 +524,7 @@ def groupSystematics(d,systs,options,prefix="scaleWeight",groupings=[],stxsMerge def envelopeSystematics(d,systs,options,regexp=None,stxsMergeScheme=None,_removal=False): if regexp is None: - print(" --> [WARNING] No systematics with regexp (None). Cannot form envelope") + print " --> [WARNING] No systematics with regexp (None). Cannot form envelope" return d, systs # Extract systematics with regexp @@ -519,7 +532,7 @@ def envelopeSystematics(d,systs,options,regexp=None,stxsMergeScheme=None,_remova for s in systs: if regexp in s['name']: s_regexp.append(s) if len(s_regexp) == 0: - print(" --> [WARNING] No systematics with regexp (%s). Cannot form envelope"%regexp) + print " --> [WARNING] No systematics with regexp (%s). Cannot form envelope"%regexp return d, systs # Determine properties of envelope from first entry: remove "group" tag if in name @@ -534,7 +547,7 @@ def envelopeSystematics(d,systs,options,regexp=None,stxsMergeScheme=None,_remova # Loop over systematic tiers: all enveloped systematics must have the same tiers for s in s_regexp: if s['tiers'] != env['tiers']: - print(" --> [WARNING] Systematics in envelope have different tiers. Cannot form envelope") + print " --> [WARNING] Systematics in envelope have different tiers. Cannot form envelope" for tier in env['tiers']: if tier == 'mnorm': if options.doSTXSMerging: diff --git a/Datacard/tools/commonObjects.py b/Datacard/tools/commonObjects.py index 54f0e5ba..6ca6fd81 100644 --- a/Datacard/tools/commonObjects.py +++ b/Datacard/tools/commonObjects.py @@ -29,6 +29,7 @@ # Production modes and decay channel: for extract XS from combine productionModes = ['ggH','qqH','ttH','tHq','tHW','WH','ZH','bbH'] decayMode = 'hgg' +decayMode_2d = 'hjjgg' # flashgg input WS objects inputWSName__ = "tagsDumper/cms_hgg_13TeV" @@ -36,6 +37,7 @@ # Signal output WS objects outputWSName__ = "wsig" outputWSObjectTitle__ = "hggpdfsmrel" +outputWSObjectTitle2d__ = "hhbbggpdfsmrel" outputWSNuisanceTitle__ = "CMS_hgg_nuisance" outputNuisanceExtMap = {'scales':'%sscale'%sqrts__,'scalesCorr':'%sscaleCorr'%sqrts__,'smears':'%ssmear'%sqrts__,'scalesGlobal':'%sscale'%sqrts__} # Bkg output WS objects diff --git a/Datacard/tools/commonTools.py b/Datacard/tools/commonTools.py index ce8c6662..a7f10ed8 100644 --- a/Datacard/tools/commonTools.py +++ b/Datacard/tools/commonTools.py @@ -16,7 +16,7 @@ def rooiter(x): def extractWSFileNames( _inputWSDir ): if not os.path.isdir(_inputWSDir): - print(" --> [ERROR] No such directory (%s)") + print " --> [ERROR] No such directory (%s)" return False return glob.glob("%s/output_*.root"%_inputWSDir) @@ -81,7 +81,7 @@ def signalFromFileName(_fileName): elif "THW" in _fileName: p = "thw" elif "bbH" in _fileName: p = "bbh" else: - print(" --> [ERROR]: cannot extract production mode from input file name. Please update tools.commonTools.signalFromFileName") + print " --> [ERROR]: cannot extract production mode from input file name. Please update tools.commonTools.signalFromFileName" exit(1) return p,d diff --git a/Datacard/tools/submissionTools.py b/Datacard/tools/submissionTools.py index 1933d27c..35eaf07c 100644 --- a/Datacard/tools/submissionTools.py +++ b/Datacard/tools/submissionTools.py @@ -4,7 +4,7 @@ from commonObjects import * def run(cmd): - print("%s\n\n"%cmd) + print "%s\n\n"%cmd os.system(cmd) def writePreamble(_file): @@ -56,7 +56,7 @@ def writeSubFiles(_opts): for cidx in range(_opts['nCats']): c = _opts['cats'].split(",")[cidx] _f.write("if [ $1 -eq %g ]; then\n"%cidx) - _f.write(" python3 %s/makeYields.py --cat %s --procs %s --ext %s --mass %s --inputWSDirMap %s %s\n"%(dwd__,c,_opts['procs'],_opts['ext'],_opts['mass'],_opts['inputWSDirMap'],_opts['modeOpts'])) + _f.write(" python %s/makeYields.py --cat %s --procs %s --ext %s --mass %s --inputWSDirMap %s %s\n"%(dwd__,c,_opts['procs'],_opts['ext'],_opts['mass'],_opts['inputWSDirMap'],_opts['modeOpts'])) _f.write("fi\n") # Close .sh file @@ -76,7 +76,7 @@ def writeSubFiles(_opts): c = _opts['cats'].split(",")[cidx] _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") writePreamble(_f) - _f.write("python3 %s/makeYields.py --cat %s --procs %s --ext %s --mass %s --inputWSDirMap %s --sigModelWSDir %s --sigModelExt %s --bkgModelWSDir %s --bkgModelExt %s %s\n"%(dwd__,c,_opts['procs'],_opts['ext'],_opts['mass'],_opts['inputWSDirMap'],_opts['sigModelWSDir'],_opts['sigModelExt'],_opts['bkgModelWSDir'],_opts['bkgModelExt'],_opts['modeOpts'])) + _f.write("python %s/makeYields.py --cat %s --procs %s --ext %s --mass %s --inputWSDirMap %s --sigModelWSDir %s --sigModelExt %s --bkgModelWSDir %s --bkgModelExt %s %s\n"%(dwd__,c,_opts['procs'],_opts['ext'],_opts['mass'],_opts['inputWSDirMap'],_opts['sigModelWSDir'],_opts['sigModelExt'],_opts['bkgModelWSDir'],_opts['bkgModelExt'],_opts['modeOpts'])) _f.close() os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) @@ -89,7 +89,7 @@ def submitFiles(_opts): _executable = "condor_yields_%s"%_opts['ext'] cmdLine = "cd %s; condor_submit %s.sub; cd %s"%(_jobdir,_executable,dwd__) run(cmdLine) - print(" --> Finished submitting files") + print " --> Finished submitting files" # SGE elif _opts['batch'] in ['IC','SGE']: @@ -103,7 +103,7 @@ def submitFiles(_opts): _subfile = "%s/%s_%s"%(_jobdir,_executable,c) cmdLine = "qsub -q hep.q %s -o %s.log -e %s.err %s.sh"%(jobOptsStr,_subfile,_subfile,_subfile) run(cmdLine) - print(" --> Finished submitting files") + print " --> Finished submitting files" # Running locally elif _opts['batch'] == 'local': @@ -113,6 +113,6 @@ def submitFiles(_opts): _subfile = "%s/%s_%s"%(_jobdir,_executable,c) cmdLine = "bash %s.sh"%_subfile run(cmdLine) - print(" --> Finished running files") + print " --> Finished running files" diff --git a/Datacard/tools/writeToDatacard.py b/Datacard/tools/writeToDatacard.py index 01419376..e72288e0 100644 --- a/Datacard/tools/writeToDatacard.py +++ b/Datacard/tools/writeToDatacard.py @@ -40,12 +40,16 @@ def writeProcesses(f,d,options): lbin_cat += "%-55s "%cat lobs_cat += "%-55s "%"-1" sigID = 0 + singleID = 2 # Loop over rows for respective category for ir,r in d[d['cat']==cat].iterrows(): if r['proc'] == "data_obs": continue lbin_procXcat += "%-55s "%cat lproc += "%-55s "%r['proc'] if r['proc'] == "bkg_mass": lprocid += "%-55s "%"1" + elif any(keyword in r['proc'] for keyword in ["VBF", "GGH", "VH", "ttH"]): + lprocid += "%-55s "%singleID + singleID += 1 else: lprocid += "%-55s "%sigID sigID -= 1 @@ -66,9 +70,7 @@ def writeSystematic(f,d,s,options,stxsMergeScheme=None,scaleCorrScheme=None): # For signal shape systematics add simple line if s['type'] == 'signal_shape': stitle = "%s_%s"%(outputWSNuisanceTitle__,s['title']) - if s['mode'] != 'other': - if outputNuisanceExtMap[s['mode']] != '': - stitle += "_%s"%outputNuisanceExtMap[s['mode']] + if s['mode'] != 'other': stitle += "_%s"%outputNuisanceExtMap[s['mode']] # If not correlated: separate nuisance per year if s['mode'] in ['scales','smears']: for year in options.years.split(","): @@ -107,54 +109,54 @@ def writeSystematic(f,d,s,options,stxsMergeScheme=None,scaleCorrScheme=None): # Construct syst line/lines if separate by year if(s['correlateAcrossYears'] == 1)|(s['correlateAcrossYears'] == -1): - stitle = "%s%s%s"%(s['title'],mergeStr,tierStr) - lsyst = '%-50s %-10s '%(stitle,s['prior']) - # Loop over categories and then iterate over rows in category - for cat in d.cat.unique(): - for ir,r in d[d['cat']==cat].iterrows(): - if r['proc'] == "data_obs": continue - # Extract value and add to line (with checks) - sval = r["%s%s%s"%(s['name'],mergeStr,tierStr)] - lsyst = addSyst(lsyst,sval,stitle,r['proc'],cat) - # Remove final space from line and add to file - f.write("%s\n"%lsyst[:-1]) + stitle = "%s%s%s"%(s['title'],mergeStr,tierStr) + lsyst = '%-50s %-10s '%(stitle,s['prior']) + # Loop over categories and then iterate over rows in category + for cat in d.cat.unique(): + for ir,r in d[d['cat']==cat].iterrows(): + if r['proc'] == "data_obs": continue + # Extract value and add to line (with checks) + sval = r["%s%s%s"%(s['name'],mergeStr,tierStr)] + lsyst = addSyst(lsyst,sval,stitle,r['proc'],cat) + # Remove final space from line and add to file + f.write("%s\n"%lsyst[:-1]) # For uncorrelated scale weights: not for merged bins if options.doSTXSScaleCorrelationScheme: if(tier!='mnorm')&("scaleWeight" in s['name']): - for ps,psProcs in scaleCorrScheme.items(): - psStr = "_%s"%ps - stitle = "%s%s%s"%(s['title'],psStr,tierStr) - lsyst = '%-50s %-10s '%(stitle,s['prior']) - # Loop over categories and then iterate over rows in category - for cat in d.cat.unique(): - for ir,r in d[d['cat']==cat].iterrows(): - if r['proc'] == "data_obs": continue - # Remove year+hgg tags from proc - p = re.sub("_2016_hgg","",r['proc']) - p = re.sub("_2017_hgg","",p) - p = re.sub("_2018_hgg","",p) - p = re.sub("_2022preEE_hgg","",p) - p = re.sub("_2022postEE_hgg","",p) - # Add value if in proc in phase space else - - if p in psProcs: sval = r["%s%s"%(s['name'],tierStr)] - else: sval = '-' - lsyst = addSyst(lsyst,sval,stitle,r['proc'],cat) + for ps,psProcs in scaleCorrScheme.iteritems(): + psStr = "_%s"%ps + stitle = "%s%s%s"%(s['title'],psStr,tierStr) + lsyst = '%-50s %-10s '%(stitle,s['prior']) + # Loop over categories and then iterate over rows in category + for cat in d.cat.unique(): + for ir,r in d[d['cat']==cat].iterrows(): + if r['proc'] == "data_obs": continue + # Remove year+hgg tags from proc + p = re.sub("_2016post_hgg","",r['proc']) + # p = re.sub("_2016pre_hgg","",r['proc']) + p = re.sub("_2017_hgg","",p) + p = re.sub("_2018_hgg","",p) + # p = re.sub("_2016pre_hgg","",p) + # Add value if in proc in phase space else - + if p in psProcs: sval = r["%s%s"%(s['name'],tierStr)] + else: sval = '-' + lsyst = addSyst(lsyst,sval,stitle,r['proc'],cat) # Remove final space from line and add to file f.write("%s\n"%lsyst[:-1]) else: - for year in options.years.split(","): - stitle = "%s%s%s_%s"%(s['title'],mergeStr,tierStr,year) - sname = "%s%s%s_%s"%(s['name'],mergeStr,tierStr,year) - lsyst = '%-50s %-10s '%(stitle,s['prior']) - # Loop over categories and then iterate over rows in category - for cat in d.cat.unique(): - for ir,r in d[d['cat']==cat].iterrows(): - if r['proc'] == "data_obs": continue - # Extract value and add to line (with checks) - sval = r[sname] - lsyst = addSyst(lsyst,sval,stitle,r['proc'],cat) - # Remove final space from line and add to file - f.write("%s\n"%lsyst[:-1]) + for year in options.years.split(","): + stitle = "%s%s%s_%s"%(s['title'],mergeStr,tierStr,year) + sname = "%s%s%s_%s"%(s['name'],mergeStr,tierStr,year) + lsyst = '%-50s %-10s '%(stitle,s['prior']) + # Loop over categories and then iterate over rows in category + for cat in d.cat.unique(): + for ir,r in d[d['cat']==cat].iterrows(): + if r['proc'] == "data_obs": continue + # Extract value and add to line (with checks) + sval = r[sname] + lsyst = addSyst(lsyst,sval,stitle,r['proc'],cat) + # Remove final space from line and add to file + f.write("%s\n"%lsyst[:-1]) return True @@ -170,7 +172,7 @@ def addSyst(l,v,s,p,c): if abs(v[0]-1)<0.0005: l += "%-15s "%"-" # Check 2: variation is not negative. Print message and add - to datacard (cleaned later) elif v[0] < 0.: - print(" --> [WARNING] systematic %s: negative variation for (%s,%s)"%(s,p,c)) + print " --> [WARNING] systematic %s: negative variation for (%s,%s)"%(s,p,c) #vstr = "%s"%v[0] vstr = "-" l += "%-15s "%v[0] @@ -182,7 +184,7 @@ def addSyst(l,v,s,p,c): if(abs(v[0]-1)<0.0005)&(abs(v[1]-1)<0.0005): l += "%-15s "%"-" # Check 2: neither variation is negative. Print message and add - to datacard (cleaned later) elif(v[0]<0.)|(v[1]<0.): - print(" --> [WARNING] systematic %s: negative variation for (%s,%s)"%(s,p,c)) + print " --> [WARNING] systematic %s: negative variation for (%s,%s)"%(s,p,c) #vstr = "%.3f/%.3f"%(v[0],v[1]) vstr = "-" l += "%-15s "%vstr @@ -193,7 +195,7 @@ def addSyst(l,v,s,p,c): l += "%-15s "%vstr return l else: - print(" --> [ERROR] systematic %s: value does not have type string or list for (%s,%s). Leaving..."%(s['title'],p,c)) + print " --> [ERROR] systematic %s: value does not have type string or list for (%s,%s). Leaving..."%(s['title'],p,c) sys.exit(1) def writeMCStatUncertainty(f,d,options): @@ -219,10 +221,7 @@ def writeMCStatUncertainty(f,d,options): for cat in d.cat.unique(): for ir,r in d[d['cat']==cat].iterrows(): if r['proc'] == "data_obs": continue - elif r['type'] == "sig": - sval = scval if cat == scat else '-' - else: - sval = '-' + sval = scval if cat == scat else '-' # Extract value and add to line (with checks) lsyst = addSyst(lsyst,sval,stitle,r['proc'],cat) # Remove final space from line and add to file diff --git a/Signal/RunPackager.py b/Signal/RunPackager.py index f6eba17a..aad35b33 100644 --- a/Signal/RunPackager.py +++ b/Signal/RunPackager.py @@ -5,8 +5,8 @@ from collections import OrderedDict as od # Import tools from ./tools -from commonTools import * -from commonObjects import * +from tools.commonTools import * +from tools.commonObjects import * from tools.submissionTools import * def get_options(): @@ -21,7 +21,7 @@ def get_options(): parser.add_option('--batch', dest='batch', default='condor', help='Batch') parser.add_option('--queue', dest='queue', default='espresso', help='Queue: should not take long (microcentury will do)') parser.add_option('--jobOpts', dest='jobOpts', default='', help="Additional options to add to job submission. For Condor separate individual options with a colon (specify all within quotes e.g. \"option_xyz = abc+option_123 = 456\")") - parser.add_option('--printOnly', dest='printOnly', default=False, action="store_true", help="Dry run: print(submission files only") + parser.add_option('--printOnly', dest='printOnly', default=False, action="store_true", help="Dry run: print submission files only") return parser.parse_args() (opt,args) = get_options() diff --git a/Signal/RunPlotter.py b/Signal/RunPlotter.py index 176f3d06..358577e9 100644 --- a/Signal/RunPlotter.py +++ b/Signal/RunPlotter.py @@ -5,8 +5,8 @@ import json from optparse import OptionParser -from commonTools import * -from commonObjects import * +from tools.commonTools import * +from tools.commonObjects import * from tools.plottingTools import * def get_options(): @@ -100,6 +100,7 @@ def get_options(): # Iterate over norms: extract total category norm catNorm = 0 for k, norm in norms.items(): + proc, year = k.split("__") w.var("IntLumi").setVal(lumiScaleFactor*lumiMap[year]) catNorm += norm.getVal() diff --git a/Signal/RunSignalScripts.py b/Signal/RunSignalScripts.py index 05be91b9..190d7452 100644 --- a/Signal/RunSignalScripts.py +++ b/Signal/RunSignalScripts.py @@ -6,8 +6,8 @@ from collections import OrderedDict as od import importlib # Import tools from ./tools -from commonTools import * -from commonObjects import * +from tools.commonTools import * +from tools.commonObjects import * from tools.submissionTools import * def get_options(): @@ -18,7 +18,7 @@ def get_options(): parser.add_option('--modeOpts', dest='modeOpts', default='', help="Additional options to add to command line when running scripts (specify all within quotes e.g. \"--XYZ ABC\")") parser.add_option('--jobOpts', dest='jobOpts', default='', help="Additional options to add to job submission. For Condor separate individual options with a colon (specify all within quotes e.g. \"option_xyz = abc+option_123 = 456\")") parser.add_option('--groupSignalFitJobsByCat', dest='groupSignalFitJobsByCat', default=False, action="store_true", help="Option to group signalFit jobs by category") - parser.add_option('--printOnly', dest='printOnly', default=False, action="store_true", help="Dry run: print(submission files only") + parser.add_option('--printOnly', dest='printOnly', default=False, action="store_true", help="Dry run: print submission files only") return parser.parse_args() (opt,args) = get_options() @@ -37,7 +37,10 @@ def leave(): # os.system("cp %s config.py"%opt.inputConfig) config=opt.inputConfig config = os.path.splitext(config)[0] + # config = config.split("") config=config.replace("/",".") + print("fff",config) + module = importlib.import_module(config) # from config import signalScriptCfg _cfg = module.signalScriptCfg @@ -85,7 +88,6 @@ def leave(): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Extract list of filenames WSFileNames = extractWSFileNames(options['inputWSDir']) -print("chuw",WSFileNames) if not WSFileNames: leave() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/Signal/config_2000_2017_250_cat0.py b/Signal/config_2000_2017_250_cat0.py new file mode 100644 index 00000000..85199313 --- /dev/null +++ b/Signal/config_2000_2017_250_cat0.py @@ -0,0 +1,28 @@ +# Config file: options for signal fitting + +_year = '2017' + +signalScriptCfg = { + + # Setup + 'inputWSDir':'/eos/cms/store/group/phys_higgs/cmshgg/zhjie/output/output_1000_boost/opt/2017/root/addsys/ws_gghh_cat0_250', + 'procs':'auto', # if auto: inferred automatically from filenames + 'cats':'auto', # if auto: inferred automatically from (0) workspace + 'ext':'dcb_%s_boost_masscut_M2000_M250_cat0'%_year, + 'analysis':'STXS', # To specify which replacement dataset mapping (defined in ./python/replacementMap.py) + 'year':'%s'%_year, # Use 'combined' if merging all years: not recommended + 'massPoints':'125', + + #Photon shape systematics + 'scales':'scale', # separate nuisance per year + 'scalesCorr':'material,fnuf', # correlated across years + 'scalesGlobal':'', # affect all processes equally, correlated across years + 'smears':'smear', # separate nuisance per year + + # Job submission options + 'batch':'local', # ['condor','SGE','IC','local'] + 'queue':'hep.q' + #'batch':'condor', # ['condor','SGE','IC','local'] + #'queue':'espresso', + +} diff --git a/Signal/config_3000_2018_250_cat0.py b/Signal/config_3000_2018_250_cat0.py new file mode 100644 index 00000000..bfcfce98 --- /dev/null +++ b/Signal/config_3000_2018_250_cat0.py @@ -0,0 +1,28 @@ +# Config file: options for signal fitting + +_year = '2018' + +signalScriptCfg = { + + # Setup + 'inputWSDir':'/eos/cms/store/group/phys_higgs/cmshgg/zhjie/output/output_3000/opt/2018/root/ws_ttH_cat0_250', + 'procs':'auto', # if auto: inferred automatically from filenames + 'cats':'auto', # if auto: inferred automatically from (0) workspace + 'ext':'dcb_%s_res_M3000_M250_cat0'%_year, + 'analysis':'STXS', # To specify which replacement dataset mapping (defined in ./python/replacementMap.py) + 'year':'%s'%_year, # Use 'combined' if merging all years: not recommended + 'massPoints':'125', + + #Photon shape systematics + 'scales':'', # separate nuisance per year + 'scalesCorr':'', # correlated across years + 'scalesGlobal':'', # affect all processes equally, correlated across years + 'smears':'', # separate nuisance per year + + # Job submission options + 'batch':'local', # ['condor','SGE','IC','local'] + 'queue':'hep.q' + #'batch':'condor', # ['condor','SGE','IC','local'] + #'queue':'espresso', + +} diff --git a/Signal/createMjjMggModel.py b/Signal/createMjjMggModel.py index b01abad0..59de54a9 100644 --- a/Signal/createMjjMggModel.py +++ b/Signal/createMjjMggModel.py @@ -21,7 +21,7 @@ def get_options(): #parser.add_option("--inp-dir-mjj",type='string',dest="inp_dir_mjj",default='/work/nchernya/DiHiggs/CMSSW_7_4_7/src/flashggFinalFit/Signal/output/mjj/04_02_2020_v3/') #parser.add_option("--inp-file",type='string',dest="inp_file",default='CMS-HGG_sigfit_2016_2017_2018_04_02_2020.root') #parser.add_option("--inp-file-mjj",type='string',dest="inp_file_mjj",default='workspace_out_mjj_12_02_2020.root') - parser.add_option("--out-dir",type='string',dest="out_dir",default='./Mjjgg/') + parser.add_option("--out-dir",type='string',dest="out_dir",default='../res_1000/Doublefit/') #parser.add_option("--cats",type='string',dest="cats",default='DoubleHTag_0,DoubleHTag_1,DoubleHTag_2,DoubleHTag_3,DoubleHTag_4,DoubleHTag_5,DoubleHTag_6,DoubleHTag_7,DoubleHTag_8,DoubleHTag_9,DoubleHTag_10,DoubleHTag_11') # parser.add_option("--cats",type='string',dest="cats",default='DoubleHTag_0,DoubleHTag_1,DoubleHTag_2,DoubleHTag_3,DoubleHTag_4,DoubleHTag_5,DoubleHTag_6,DoubleHTag_7,DoubleHTag_8,DoubleHTag_9') parser.add_option("--cats",type='string',dest="cats",default='resolved_cat0') @@ -32,38 +32,41 @@ def get_options(): cats = opt.cats.split(',') input_procs = opt.inp_procs.split(',') -print(opt.inp_dir + opt.inp_file) +print("check",input_procs) tfile = ROOT.TFile(opt.inp_file) tfile_mjj = ROOT.TFile(opt.inp_file_mjj) -ws_mgg = tfile.Get("multipdf") -ws_mjj = tfile_mjj.Get("multipdf") +ws_mgg = tfile.Get("wsig_13TeV") +ws_mjj = tfile_mjj.Get("wsig_13TeV") for num,f in enumerate(input_procs): - for cat_num,cat in enumerate(cats) : - #for cat_num,cat in enumerate([cats[0]]) : - pdf_mjj = "CMS_hgg_%s_13TeV_bkgshape"%(cat) - pdf_mgg = "CMS_hgg_%s_13TeV_bkgshape"%(cat) - #ws_mjj.pdf(pdf_mjj).Print("v") - print("pdf",pdf_mjj,pdf_mgg) - pdfjj=ws_mjj.pdf(pdf_mjj) - pdfjj.SetName("CMS_hjj_resolved_cat0_13TeV_bkgshape") - getattr(ws_mgg, 'import')(pdfjj,ROOT.RooCmdArg()) - getattr(ws_mgg, 'import')(ws_mjj.var("Dijet_mass"),ROOT.RooCmdArg()) - prod_pdf = "CMS_hjjgg_%s_13TeV_bkgshape"%(cat) - #print(ws_mgg.pdf(pdf_mgg)) - #print(ws_mjj.pdf(pdf_mjj)) - sig_prod_pdf = ROOT.RooProdPdf(prod_pdf,"",ws_mgg.pdf(pdf_mgg),pdfjj) - #sig_prod_pdf.Print("v") - getattr(ws_mgg, 'import')(sig_prod_pdf,ROOT.RooFit.RecycleConflictNodes()) - #Save normalization for combine - #sig_prod_pdf_norm = (ws_mgg.function(pdf_mgg+"_norm")).clone(prod_pdf+"_norm") - sig_prod_pdf_norm = (ws_mjj.var(pdf_mjj+"_norm")).clone(prod_pdf+"_norm") #take norm from mjj? - getattr(ws_mgg, 'import')(sig_prod_pdf_norm,ROOT.RooFit.RecycleConflictNodes()) - ##Printing normalization - # ws_mgg.var("MH").setVal(125.) #just to check that the normalization is the same for Mgg and Mjj, it is of course. - # print('mgg : ',ws_mgg.function(pdf_mgg+"_norm").getVal(),', mjj : ',ws_mjj.function(pdf_mjj+"_norm").getVal(), ", imported product : ",ws_mgg.function(prod_pdf+"_norm").getVal() #just to check that the normalization is the same for Mgg and Mjj, it is of course.) - + # for year in '2017,2018,2016pre,2016post'.split(','): + for year in '2017,2018,2016pre,2016post'.split(','): + for cat_num,cat in enumerate(cats) : + #for cat_num,cat in enumerate([cats[0]]) : + pdf_mjj_old = "hggpdfsmrel_%s_%s_%s_13TeV"%(input_procs[num],year,cat) + pdf_mjj = "hjjpdfsmrel_%s_%s_%s_13TeV"%(input_procs[num],year,cat) + old_pdf = ws_mjj.pdf(pdf_mjj_old) + new_pdf = old_pdf.Clone(pdf_mjj) + new_pdf.SetName(pdf_mjj) + + pdf_mgg = "hggpdfsmrel_%s_%s_%s_13TeV"%(input_procs[num],year,cat) + + getattr(ws_mgg, 'import')(new_pdf,ROOT.RooCmdArg()) + # getattr(ws_mgg, 'import')(ws_mjj.var("Dijet_mass"),ROOT.RooCmdArg()) + prod_pdf = "hhbbggpdfsmrel_%s_%s_%s_13TeV"%(input_procs[num],year,cat) + + sig_prod_pdf = ROOT.RooProdPdf(prod_pdf,"",ws_mgg.pdf(pdf_mgg),new_pdf) + #sig_prod_pdf.Print("v") + getattr(ws_mgg, 'import')(sig_prod_pdf,ROOT.RooFit.RecycleConflictNodes()) + #Save normalization for combine + #sig_prod_pdf_norm = (ws_mgg.function(pdf_mgg+"_norm")).clone(prod_pdf+"_norm") + sig_prod_pdf_norm = (ws_mjj.function(pdf_mjj_old+"_norm")).clone(prod_pdf+"_norm") #take norm from mjj? + getattr(ws_mgg, 'import')(sig_prod_pdf_norm,ROOT.RooFit.RecycleConflictNodes()) + ##Printing normalization + # ws_mgg.var("MH").setVal(125.) #just to check that the normalization is the same for Mgg and Mjj, it is of course. + # print 'mgg : ',ws_mgg.function(pdf_mgg+"_norm").getVal(),', mjj : ',ws_mjj.function(pdf_mjj+"_norm").getVal(), ", imported product : ",ws_mgg.function(prod_pdf+"_norm").getVal() #just to check that the normalization is the same for Mgg and Mjj, it is of course. + -f_out = ROOT.TFile.Open(opt.out_dir+"CMS-HGG_multipdf_resolved_cat0_2016_2017_2018_%s_%s.root"%(opt.mass,opt.cats),"RECREATE") +f_out = ROOT.TFile.Open(opt.out_dir+"CMS-HGG_sigfit_packaged_%s_%s.root"%(opt.cats,input_procs[num]),"RECREATE") ws_mgg.Write() f_out.Close() diff --git a/Signal/scripts/calcPhotonSyst.py b/Signal/scripts/calcPhotonSyst.py index d60dca62..d04a2c5b 100644 --- a/Signal/scripts/calcPhotonSyst.py +++ b/Signal/scripts/calcPhotonSyst.py @@ -2,7 +2,7 @@ # * Run script once per category, loops over signal processes # * Output is pandas dataframe -print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG PHOTON SYST CALCULATOR ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") +print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG PHOTON SYST CALCULATOR ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " import ROOT import pandas as pd import pickle @@ -17,13 +17,14 @@ from commonObjects import * def leave(): - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG PHOTON SYST CALCULATOR (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") + print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG PHOTON SYST CALCULATOR (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " sys.exit(1) def get_options(): parser = OptionParser() parser.add_option("--xvar", dest='xvar', default='CMS_hgg_mass', help="Observable") parser.add_option("--cat", dest='cat', default='', help="RECO category") + parser.add_option("--mass", dest='mass', default=125, help="mass") parser.add_option("--procs", dest='procs', default='', help="Signal processes") parser.add_option("--ext", dest='ext', default='', help="Extension") parser.add_option("--inputWSDir", dest='inputWSDir', default='', help="Input flashgg WS directory") @@ -37,9 +38,9 @@ def get_options(): parser.add_option("--thresholdRate", dest='thresholdRate', default=0.05, type='float', help='Reject mean variations if larger than thresholdRate') return parser.parse_args() (opt,args) = get_options() - +print(opt.mass,"checkcccc") # RooRealVar to fill histograms -mgg = ROOT.RooRealVar(opt.xvar,opt.xvar,125) +mgg = ROOT.RooRealVar(opt.xvar,opt.xvar,opt.mass) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Function to extact histograms from WS @@ -58,15 +59,15 @@ def getHistograms( _ws, _nominalDataName, _sname ): # Check if not NONE type and fill histograms if rds_nominal: rds_nominal.fillHistogram(_hists['nominal'],ROOT.RooArgList(mgg)) else: - print(" --> [ERROR] Could not extract nominal RooDataSet: %s. Leaving"%_nominalDataName) + print " --> [ERROR] Could not extract nominal RooDataSet: %s. Leaving"%_nominalDataName sys,exit(1) if rdh_up: rdh_up.fillHistogram(_hists['up'],ROOT.RooArgList(mgg)) else: - print(" --> [ERROR] Could not extract RooDataHist (%s,up) for %s. Leaving"%(_sname,_nominalDataName)) + print " --> [ERROR] Could not extract RooDataHist (%s,up) for %s. Leaving"%(_sname,_nominalDataName) sys,exit(1) if rdh_down: rdh_down.fillHistogram(_hists['down'],ROOT.RooArgList(mgg)) else: - print(" --> [ERROR] Could not extract RooDataHist (%s,down) for %s. Leaving"%(_sname,_nominalDataName)) + print " --> [ERROR] Could not extract RooDataHist (%s,down) for %s. Leaving"%(_sname,_nominalDataName) sys,exit(1) return _hists @@ -74,7 +75,7 @@ def getHistograms( _ws, _nominalDataName, _sname ): # Functions to extract mean, sigma and rate variations def getMeanVar(_hists): mu, muVar = {}, {} - for htype,h in _hists.items(): mu[htype] = h.GetMean() + for htype,h in _hists.iteritems(): mu[htype] = h.GetMean() if mu['nominal']==0: return 0 for htype in ['up','down']: muVar[htype] = (mu[htype]-mu['nominal'])/mu['nominal'] x = (abs(muVar['up'])+abs(muVar['down']))/2 @@ -84,7 +85,7 @@ def getMeanVar(_hists): def getSigmaVar(_hists): sigma, sigmaVar = {}, {} - for htype,h in _hists.items(): sigma[htype] = getEffSigma(h) + for htype,h in _hists.iteritems(): sigma[htype] = getEffSigma(h) if sigma['nominal']==0: return 0 for htype in ['up','down']: sigmaVar[htype] = (sigma[htype]-sigma['nominal'])/sigma['nominal'] x = (abs(sigmaVar['up'])+abs(sigmaVar['down']))/2 @@ -93,7 +94,7 @@ def getSigmaVar(_hists): def getRateVar(_hists): rate, rateVar = {}, {} - for htype,h in _hists.items(): rate[htype] = h.Integral() + for htype,h in _hists.iteritems(): rate[htype] = h.Integral() # Shape variations can both be one sided therefore use midpoint as nominal rate['midpoint'] = 0.5*(rate['up']+rate['down']) if rate['midpoint']==0: return 0 @@ -117,13 +118,12 @@ def getRateVar(_hists): # Glob M125 filename _WSFileName = glob.glob("%s/output*M125*%s.root"%(opt.inputWSDir,_proc))[0] _nominalDataName = "%s_125_%s_%s"%(procToData(_proc.split("_")[0]),sqrts__,opt.cat) - data = pd.concat([data,pd.DataFrame([{'proc':_proc,'cat':opt.cat,'inputWSFile':_WSFileName,'nominalDataName':_nominalDataName}])], ignore_index=True, sort=False) - # data = data.append({'proc':_proc,'cat':opt.cat,'inputWSFile':_WSFileName,'nominalDataName':_nominalDataName}, ignore_index=True, sort=False) + data = data.append({'proc':_proc,'cat':opt.cat,'inputWSFile':_WSFileName,'nominalDataName':_nominalDataName}, ignore_index=True, sort=False) # Loop over rows in dataFrame and open ws for ir,r in data.iterrows(): - print(" --> Processing (%s,%s)"%(r['proc'],opt.cat)) + print " --> Processing (%s,%s)"%(r['proc'],opt.cat) # Open ROOT file and extract workspace f = ROOT.TFile(r['inputWSFile']) @@ -134,21 +134,21 @@ def getRateVar(_hists): for s in getattr(opt,stype).split(","): if s == '': continue sname = "%s%s"%(inputNuisanceExtMap[stype],s) - print(" * Systematic = %s (%s)"%(sname,stype)) + print " * Systematic = %s (%s)"%(sname,stype) hists = getHistograms(inputWS,r['nominalDataName'],sname) # If nominal yield = 0: if hists['nominal'].Integral() == 0: _meanVar, _sigmaVar, _rateVar = 0, 0, 0 else: - _meanVar = getMeanVar(hists) - _sigmaVar = getSigmaVar(hists) - _rateVar = getRateVar(hists) + _meanVar = getMeanVar(hists) + _sigmaVar = getSigmaVar(hists) + _rateVar = getRateVar(hists) # Add values to dataFrame data.at[ir,'%s_%s_mean'%(s,outputNuisanceExtMap[stype])] = _meanVar data.at[ir,'%s_%s_sigma'%(s,outputNuisanceExtMap[stype])] = _sigmaVar data.at[ir,'%s_%s_rate'%(s,outputNuisanceExtMap[stype])] = _rateVar # Delete histograms - for h in hists.values(): h.Delete() + for h in hists.itervalues(): h.Delete() # Delete ws and close file inputWS.Delete() @@ -160,4 +160,4 @@ def getRateVar(_hists): if not os.path.isdir("%s/outdir_%s/calcPhotonSyst"%(swd__,opt.ext)): os.system("mkdir %s/outdir_%s/calcPhotonSyst"%(swd__,opt.ext)) if not os.path.isdir("%s/outdir_%s/calcPhotonSyst/pkl"%(swd__,opt.ext)): os.system("mkdir %s/outdir_%s/calcPhotonSyst/pkl"%(swd__,opt.ext)) with open("%s/outdir_%s/calcPhotonSyst/pkl/%s.pkl"%(swd__,opt.ext,opt.cat),"wb") as f: pickle.dump(data,f) -print(" --> Successfully saved photon systematics as pkl file: %s/outdir_%s/calcPhotonSyst/pkl/%s.pkl"%(swd__,opt.ext,opt.cat)) +print " --> Successfully saved photon systematics as pkl file: %s/outdir_%s/calcPhotonSyst/pkl/%s.pkl"%(swd__,opt.ext,opt.cat) diff --git a/Signal/scripts/fTest.py b/Signal/scripts/fTest.py index 8f5a608c..2c8ac8d9 100644 --- a/Signal/scripts/fTest.py +++ b/Signal/scripts/fTest.py @@ -1,7 +1,7 @@ # Python script to perform signal modelling fTest: extract number of gaussians for final fit # * run per category over single mass point -print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG SIGNAL FTEST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") +print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG SIGNAL FTEST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " import ROOT import pandas as pd import pickle @@ -22,7 +22,7 @@ MHLow, MHHigh = '120', '130' def leave(): - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG SIGNAL FTEST (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") + print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG SIGNAL FTEST (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " exit() def get_options(): @@ -58,6 +58,7 @@ def get_options(): xvarFit = xvar.Clone() dZ = inputWS0.var("dZ") +print("xvar",xvar,dZ) aset = ROOT.RooArgSet(xvar,dZ) f0.Close() @@ -74,6 +75,7 @@ def get_options(): f = ROOT.TFile(WSFileName,"read") inputWS = f.Get(inputWSName__) print('"%s_%s_%s_%s"%(procToData(proc.split("_")[0]),opt.mass,sqrts__,opt.cat)',"%s_%s_%s_%s"%(procToData(proc.split("_")[0]),opt.mass,sqrts__,opt.cat)) + print("aset",aset) print("inputWSfilename:",inputWSName__) print(procToData(proc.split("_")[0])," ",opt.mass," ",sqrts__," ",opt.cat) d = reduceDataset(inputWS.data("%s_%s_%s_%s"%(procToData(proc.split("_")[0]),opt.mass,sqrts__,opt.cat)),aset) @@ -86,11 +88,12 @@ def get_options(): else: procsToFTest = list(df.sort_values('sumEntries',ascending=False)[0:opt.nProcsToFTest].proc.values) for pidx, proc in enumerate(procsToFTest): - print("\n --> Process (%g): %s"%(pidx,proc)) + print "\n --> Process (%g): %s"%(pidx,proc) # Split dataset to RV/WV: ssf requires input as dict (with mass point as key) datasets_RV, datasets_WV = od(), od() WSFileName = glob.glob("%s/output*M%s*%s.root"%(opt.inputWSDir,opt.mass,proc))[0] + f = ROOT.TFile(WSFileName,"read") inputWS = f.Get(inputWSName__) d = reduceDataset(inputWS.data("%s_%s_%s_%s"%(procToData(proc.split("_")[0]),opt.mass,sqrts__,opt.cat)),aset) @@ -99,7 +102,9 @@ def get_options(): # Run fTest: RV # If numEntries below threshold then keep as n = 1 - if datasets_RV[opt.mass].numEntries() < opt.threshold: continue + if datasets_RV[opt.mass].numEntries() < opt.threshold: + + continue else: ssfs = od() min_reduced_chi2, nGauss_opt = 999, 1 @@ -110,11 +115,11 @@ def get_options(): ssf.runFit() ssf.buildSplines() if ssf.Ndof >= 1: - ssfs[k] = ssf - if ssfs[k].getReducedChi2() < min_reduced_chi2: - min_reduced_chi2 = ssfs[k].getReducedChi2() - nGauss_opt = nGauss - print(" * (%s,%s,RV): nGauss = %g, chi^2/n(dof) = %.4f"%(proc,opt.cat,nGauss,ssfs[k].getReducedChi2())) + ssfs[k] = ssf + if ssfs[k].getReducedChi2() < min_reduced_chi2: + min_reduced_chi2 = ssfs[k].getReducedChi2() + nGauss_opt = nGauss + print " * (%s,%s,RV): nGauss = %g, chi^2/n(dof) = %.4f"%(proc,opt.cat,nGauss,ssfs[k].getReducedChi2()) # Set optimum df.loc[df['proc']==proc,'nRV'] = nGauss_opt # Make plots @@ -135,11 +140,11 @@ def get_options(): ssf.runFit() ssf.buildSplines() if ssf.Ndof >= 1: - ssfs[k] = ssf - if ssfs[k].getReducedChi2() < min_reduced_chi2: - min_reduced_chi2 = ssfs[k].getReducedChi2() - nGauss_opt = nGauss - print(" * (%s,%s,WV): nGauss = %g, chi^2/n(dof) = %.4f"%(proc,opt.cat,nGauss,ssfs[k].getReducedChi2())) + ssfs[k] = ssf + if ssfs[k].getReducedChi2() < min_reduced_chi2: + min_reduced_chi2 = ssfs[k].getReducedChi2() + nGauss_opt = nGauss + print " * (%s,%s,WV): nGauss = %g, chi^2/n(dof) = %.4f"%(proc,opt.cat,nGauss,ssfs[k].getReducedChi2()) # Set optimum df.loc[df['proc']==proc,'nWV'] = nGauss_opt # Make plots diff --git a/Signal/scripts/getDiagProc.py b/Signal/scripts/getDiagProc.py index 74309724..faf5cbec 100644 --- a/Signal/scripts/getDiagProc.py +++ b/Signal/scripts/getDiagProc.py @@ -1,6 +1,6 @@ # Script to determine diagonal process for each category and write to json file -print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG GET DIAG PROC RUN II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") +print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG GET DIAG PROC RUN II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " import os, sys import re from optparse import OptionParser @@ -15,7 +15,7 @@ from commonObjects import * def leave(): - print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG GET DIAG PROC RUN II (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") + print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG GET DIAG PROC RUN II (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " sys.exit(1) def get_options(): @@ -42,7 +42,7 @@ def get_options(): # Loop over procs for proc in allProcs.split(","): - print(" --> Processing: %s"%proc) + print " --> Processing: %s"%proc # Open workspace _WSFileName = glob.glob("%s/output*M%s*%s.root"%(opt.inputWSDir,opt.MH,proc))[0] f = ROOT.TFile(_WSFileName,'read') @@ -64,13 +64,13 @@ def get_options(): f.Close() # Save json file -print(" --> Writing diagonal processes to json file\n") +print " --> Writing diagonal processes to json file\n" if not os.path.isdir("%s/outdir_%s/getDiagProc/json"%(swd__,opt.ext)): os.system("mkdir %s/outdir_%s/getDiagProc/json"%(swd__,opt.ext)) with open("%s/outdir_%s/getDiagProc/json/diagonal_process.json"%(swd__,opt.ext),"w") as jf: json.dump(dproc,jf) # One json file for each cat: diagonal proc first line in file if opt.makeSimpleFTest: - print(" --> Making simple fTest config json using diagonal procs (nRV,nWV) = (%s,%s)"%(opt.nRV,opt.nWV)) + print " --> Making simple fTest config json using diagonal procs (nRV,nWV) = (%s,%s)"%(opt.nRV,opt.nWV) if not os.path.isdir("%s/outdir_%s/fTest"%(swd__,opt.ext)): os.system("mkdir %s/outdir_%s/fTest"%(swd__,opt.ext)) if not os.path.isdir("%s/outdir_%s/fTest/json"%(swd__,opt.ext)): os.system("mkdir %s/outdir_%s/fTest/json"%(swd__,opt.ext)) for cidx, cat in enumerate(allCats.split(",")): diff --git a/Signal/scripts/getEffAcc.py b/Signal/scripts/getEffAcc.py index 40c35b85..9c9f8862 100644 --- a/Signal/scripts/getEffAcc.py +++ b/Signal/scripts/getEffAcc.py @@ -2,7 +2,7 @@ # * Relies on the presense of NOTAG dataset. If not there, script will give NAN as output # * Also needs to run on all processed categories: take from file -print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG GET FRACTIONS MAKER RUN II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") +print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG GET FRACTIONS MAKER RUN II ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " import os, sys import re from optparse import OptionParser @@ -17,7 +17,7 @@ from commonObjects import * def leave(): - print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG GET FRACTIONS RUN II (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") + print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG GET FRACTIONS RUN II (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " sys.exit(1) def get_options(): @@ -37,7 +37,7 @@ def get_options(): allCats = extractListOfCats(WSFileNames) if containsNOTAG(WSFileNames): allCats += ",NOTAG" else: - print(" --> [ERROR] getEffAcc.py requires NOTAG dataset. Must use standard weights method in signalFit.py") + print " --> [ERROR] getEffAcc.py requires NOTAG dataset. Must use standard weights method in signalFit.py" leave() # Define dataframe to store yields: cow = centralObjectWeight @@ -47,10 +47,10 @@ def get_options(): # Loop over mass points: write separate json file for each masspoint for _mp in opt.massPoints.split(","): - print(" --> Processing mass point: %s"%_mp) + print " --> Processing mass point: %s"%_mp # Loop over processes for _proc in opt.procs.split(","): - print(" * proc = %s"%_proc) + print " * proc = %s"%_proc # Find corresponding file _WSFileName = glob.glob("%s/output*M%s*%s.root"%(opt.inputWSDir,_mp,_proc))[0] f = ROOT.TFile(_WSFileName,'read') @@ -108,7 +108,7 @@ def get_options(): # Calculate fractional cross section of each STXS bin (in terms of stage0 bin) for normalisation: output in txt file if opt.doSTXSFractions: if opt.skipCOWCorr: - print(" --> [ERROR] Must include centralObjectWeight corrections for signal normalisation fractions") + print " --> [ERROR] Must include centralObjectWeight corrections for signal normalisation fractions" leave() if not os.path.isdir("%s/outdir_%s/getEffAcc/fractions"%(swd__,opt.ext)): os.system("mkdir %s/outdir_%s/getEffAcc/fractions"%(swd__,opt.ext)) diff --git a/Signal/scripts/packageSignal.py b/Signal/scripts/packageSignal.py index b3dc6c26..50468b1e 100644 --- a/Signal/scripts/packageSignal.py +++ b/Signal/scripts/packageSignal.py @@ -47,7 +47,6 @@ def rooiter(x): print('fNames',fNames) print('opt.exts.split(",")[0]',opt.exts.split(",")[0]) print("debug: opt.exts", opt.exts) - print(opt.exts.split(",")[0]) print('fNames[opt.exts.split(",")[0]][0]',fNames[opt.exts.split(",")[0]][0]) print('fNames[0]') print(mp) @@ -56,7 +55,7 @@ def rooiter(x): data_merged["m%s"%mp] = ROOT.TFile(fNames[opt.exts.split(",")[0]][0]).Get("wsig_13TeV").data("sig_mass_m%s_%s"%(mp,opt.cat)).emptyClone("sig_mass_m%s_%s"%(mp,opt.cat)) data_merged_names.append( data_merged["m%s"%mp].GetName() ) -for ext, fNames_by_ext in fNames.items(): +for ext, fNames_by_ext in fNames.iteritems(): for fName in fNames_by_ext: for mp in opt.massPoints.split(","): d = ROOT.TFile(fName).Get("wsig_13TeV").data("sig_mass_m%s_%s"%(mp,opt.cat)) @@ -65,10 +64,10 @@ def rooiter(x): w = d.weight() data_merged["m%s"%mp].add(p,w) -for _data in data_merged.values(): packagedWS.imp(_data) +for _data in data_merged.itervalues(): packagedWS.imp(_data) # Loop over input signal fit workspaces -for ext, fNames_by_ext in fNames.items(): +for ext, fNames_by_ext in fNames.iteritems(): for fName in fNames_by_ext: fin = ROOT.TFile(fName) wsin = fin.Get("wsig_13TeV") @@ -80,9 +79,9 @@ def rooiter(x): allData = wsin.allData() # Import objects into output workspace - for _varName, _var in allVars.items(): packagedWS.imp(_var,ROOT.RooFit.RecycleConflictNodes(),ROOT.RooFit.Silence()) - for _funcName, _func in allFunctions.items(): packagedWS.imp(_func,ROOT.RooFit.RecycleConflictNodes(),ROOT.RooFit.Silence()) - for _pdfName, _pdf in allPdfs.items(): packagedWS.imp(_pdf,ROOT.RooFit.RecycleConflictNodes(),ROOT.RooFit.Silence()) + for _varName, _var in allVars.iteritems(): packagedWS.imp(_var,ROOT.RooFit.RecycleConflictNodes(),ROOT.RooFit.Silence()) + for _funcName, _func in allFunctions.iteritems(): packagedWS.imp(_func,ROOT.RooFit.RecycleConflictNodes(),ROOT.RooFit.Silence()) + for _pdfName, _pdf in allPdfs.iteritems(): packagedWS.imp(_pdf,ROOT.RooFit.RecycleConflictNodes(),ROOT.RooFit.Silence()) for _data in allData: # Skip merged datasets @@ -92,13 +91,13 @@ def rooiter(x): # Save to file if not os.path.isdir("outdir_%s"%opt.outputExt): os.system("mkdir outdir_%s"%opt.outputExt) if opt.mergeYears: - print((" --> Writing to: ./outdir_%s/CMS-HGG_sigfit_%s_%s.root")%(opt.outputExt,opt.outputExt,opt.cat)) + print (" --> Writing to: ./outdir_%s/CMS-HGG_sigfit_%s_%s.root")%(opt.outputExt,opt.outputExt,opt.cat) f = ROOT.TFile("./outdir_%s/CMS-HGG_sigfit_%s_%s.root"%(opt.outputExt,opt.outputExt,opt.cat),"RECREATE") else: print( " --> Writing to: ./outdir_%s/CMS-HGG_sigfit_%s_%s_%s.root")%(opt.outputExt,opt.outputExt,opt.cat,opt.year) f = ROOT.TFile("./outdir_%s/CMS-HGG_sigfit_%s_%s_%s.root"%(opt.outputExt,opt.outputExt,opt.cat,opt.year),"RECREATE") -print("chuw") + packagedWS.Write() -# packagedWS.Delete() +packagedWS.Delete() f.Delete() f.Close() diff --git a/Signal/scripts/signalFit.py b/Signal/scripts/signalFit.py index 525aaadd..237f9231 100644 --- a/Signal/scripts/signalFit.py +++ b/Signal/scripts/signalFit.py @@ -1,4 +1,4 @@ -print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG SIGNAL FITTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") +print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG SIGNAL FITTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " import ROOT import pandas as pd import pickle @@ -24,7 +24,7 @@ MHNominal = '125' def leave(): - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG SIGNAL FITTER (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") + print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG SIGNAL FITTER (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " exit() def get_options(): @@ -57,7 +57,7 @@ def get_options(): parser.add_option("--scalesGlobal", dest='scalesGlobal', default='', help='Photon shape systematics: scalesGlobal') parser.add_option("--smears", dest='smears', default='', help='Photon shape systematics: smears') # Parameter values - parser.add_option('--replacementThreshold', dest='replacementThreshold', default=50, type='int', help="Nevent threshold to trigger replacement dataset") + parser.add_option('--replacementThreshold', dest='replacementThreshold', default=15, type='int', help="Nevent threshold to trigger replacement dataset") parser.add_option('--beamspotWidthData', dest='beamspotWidthData', default=3.4, type='float', help="Width of beamspot in data [cm]") parser.add_option('--beamspotWidthMC', dest='beamspotWidthMC', default=5.14, type='float', help="Width of beamspot in MC [cm]") parser.add_option('--MHPolyOrder', dest='MHPolyOrder', default=1, type='int', help="Order of polynomial for MH dependence") @@ -73,9 +73,9 @@ def get_options(): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # SETUP: signal fit -print(" --> Running fit for (proc,cat) = (%s,%s)"%(opt.proc,opt.cat)) +print " --> Running fit for (proc,cat) = (%s,%s)"%(opt.proc,opt.cat) if( len(opt.massPoints.split(",")) == 1 )&( opt.MHPolyOrder > 0 ): - print(" --> [WARNING] Attempting to fit polynomials of O(MH^%g) for single mass point. Setting order to 0"%opt.MHPolyOrder) + print " --> [WARNING] Attempting to fit polynomials of O(MH^%g) for single mass point. Setting order to 0"%opt.MHPolyOrder opt.MHPolyOrder=0 # Add stopwatch function @@ -86,13 +86,13 @@ def get_options(): MHHigh=opt.MHHigh # Load replacement map if opt.analysis not in globalReplacementMap: - print(" --> [ERROR] replacement map does not exist for analysis (%s). Please add to tools/replacementMap.py"%opt.analysis) + print " --> [ERROR] replacement map does not exist for analysis (%s). Please add to tools/replacementMap.py"%opt.analysis leave() else: rMap = globalReplacementMap[opt.analysis] # Load XSBR map if opt.analysis not in globalXSBRMap: - print(" --> [ERROR] XS * BR map does not exist for analysis (%s). Please add to tools/XSBRMap.py"%opt.analysis) + print " --> [ERROR] XS * BR map does not exist for analysis (%s). Please add to tools/XSBRMap.py"%opt.analysis leave() else: xsbrMap = globalXSBRMap[opt.analysis] @@ -123,7 +123,7 @@ def get_options(): inputWS = f.Get(inputWSName__) d = reduceDataset(inputWS.data("%s_%s_%s_%s"%(procToData(opt.proc.split("_")[0]),MHNominal,sqrts__,opt.cat)),aset) if( d.numEntries() == 0. )|( d.sumEntries <= 0. ): - print(" --> (%s,%s) has zero events. Will not construct signal model"%(opt.proc,opt.cat)) + print " --> (%s,%s) has zero events. Will not construct signal model"%(opt.proc,opt.cat) exit() inputWS.Delete() f.Close() @@ -131,29 +131,29 @@ def get_options(): # Define proc x cat with which to extract shape: if skipVertexScenarioSplit label all events as "RV" procRVFit, catRVFit = opt.proc, opt.cat if opt.skipVertexScenarioSplit: - print(" --> Skipping vertex scenario split") + print " --> Skipping vertex scenario split" else: procWVFit, catWVFit = opt.proc, opt.cat # Options for using diagonal process from getDiagProc output json if opt.useDiagonalProcForShape: if not os.path.exists("%s/outdir_%s/getDiagProc/json/diagonal_process.json"%(swd__,opt.ext)): - print(" --> [ERROR] Diagonal process json from getDiagProc does not exist. Using nominal proc x cat for shape") + print " --> [ERROR] Diagonal process json from getDiagProc does not exist. Using nominal proc x cat for shape" else: with open("%s/outdir_%s/getDiagProc/json/diagonal_process.json"%(swd__,opt.ext),"r") as jf: dproc = json.load(jf) procRVFit = dproc[opt.cat] - print(" --> Using diagonal proc (%s,%s) for shape"%(procRVFit,opt.cat)) + print " --> Using diagonal proc (%s,%s) for shape"%(procRVFit,opt.cat) if not opt.skipVertexScenarioSplit: procWVFit = dproc[opt.cat] # Process for syst procSyst = opt.proc if opt.useDiagonalProcForSyst: if not os.path.exists("%s/outdir_%s/getDiagProc/json/diagonal_process.json"%(swd__,opt.ext)): - print(" --> [ERROR] Diagonal process json from getDiagProc does not exist. Using nominal proc x cat for systematics") + print " --> [ERROR] Diagonal process json from getDiagProc does not exist. Using nominal proc x cat for systematics" else: with open("%s/outdir_%s/getDiagProc/json/diagonal_process.json"%(swd__,opt.ext),"r") as jf: dproc = json.load(jf) procSyst = dproc[opt.cat] - print(" --> Using diagonal proc (%s,%s) for systematics"%(procSyst,opt.cat)) + print " --> Using diagonal proc (%s,%s) for systematics"%(procSyst,opt.cat) # Define process with which to extract normalisation: nominal procNorm, catNorm = opt.proc, opt.cat @@ -176,6 +176,7 @@ def get_options(): f.Close() # Check if nominal yield > threshold (or if +ve sum of weights). If not then use replacement proc x cat +print(datasetRVForFit[MHNominal].numEntries(),"event",datasetRVForFit[MHNominal].sumEntries()) if( datasetRVForFit[MHNominal].numEntries() < opt.replacementThreshold )|( datasetRVForFit[MHNominal].sumEntries() < 0. ): nominal_numEntries = datasetRVForFit[MHNominal].numEntries() procReplacementFit, catReplacementFit = rMap['procRVMap'][opt.cat], rMap['catRVMap'][opt.cat] @@ -191,29 +192,29 @@ def get_options(): # Check if replacement dataset has too few entries: if so throw error if( datasetRVForFit[MHNominal].numEntries() < opt.replacementThreshold )|( datasetRVForFit[MHNominal].sumEntries() < 0. ): - print(" --> [ERROR] replacement dataset (%s,%s) has too few entries (%g < %g)"%(procReplacementFit,catReplacementFit,datasetRVForFit[MHNominal].numEntries(),opt.replacementThreshold)) + print " --> [ERROR] replacement dataset (%s,%s) has too few entries (%g < %g)"%(procReplacementFit,catReplacementFit,datasetRVForFit[MHNominal].numEntries(),opt.replacementThreshold) sys.exit(1) else: procRVFit, catRVFit = procReplacementFit, catReplacementFit if opt.skipVertexScenarioSplit: - print(" --> Too few entries in nominal dataset (%g < %g). Using replacement (proc,cat) = (%s,%s) for extracting shape"%(nominal_numEntries,opt.replacementThreshold,procRVFit,catRVFit)) + print " --> Too few entries in nominal dataset (%g < %g). Using replacement (proc,cat) = (%s,%s) for extracting shape"%(nominal_numEntries,opt.replacementThreshold,procRVFit,catRVFit) for mp in opt.massPoints.split(","): - print(" * MH = %s GeV: numEntries = %g, sumEntries = %.6f"%(mp,datasetRVForFit[mp].numEntries(),datasetRVForFit[mp].sumEntries())) + print " * MH = %s GeV: numEntries = %g, sumEntries = %.6f"%(mp,datasetRVForFit[mp].numEntries(),datasetRVForFit[mp].sumEntries()) else: - print(" --> RV: Too few entries in nominal dataset (%g < %g). Using replacement (proc,cat) = (%s,%s) for extracting shape"%(nominal_numEntries,opt.replacementThreshold,procRVFit,catRVFit)) + print " --> RV: Too few entries in nominal dataset (%g < %g). Using replacement (proc,cat) = (%s,%s) for extracting shape"%(nominal_numEntries,opt.replacementThreshold,procRVFit,catRVFit) for mp in opt.massPoints.split(","): - print(" * MH = %s: numEntries = %g, sumEntries = %.6f"%(mp,datasetRVForFit[mp].numEntries(),datasetRVForFit[mp].sumEntries())) + print " * MH = %s: numEntries = %g, sumEntries = %.6f"%(mp,datasetRVForFit[mp].numEntries(),datasetRVForFit[mp].sumEntries()) else: if opt.skipVertexScenarioSplit: - print(" --> Using (proc,cat) = (%s,%s) for extracting shape"%(procRVFit,catRVFit)) + print " --> Using (proc,cat) = (%s,%s) for extracting shape"%(procRVFit,catRVFit) for mp in opt.massPoints.split(","): - print(" * MH = %s: numEntries = %g, sumEntries = %.6f"%(mp,datasetRVForFit[mp].numEntries(),datasetRVForFit[mp].sumEntries())) + print " * MH = %s: numEntries = %g, sumEntries = %.6f"%(mp,datasetRVForFit[mp].numEntries(),datasetRVForFit[mp].sumEntries()) else: - print(" --> RV: Using (proc,cat) = (%s,%s) for extracting shape"%(procRVFit,catRVFit)) + print " --> RV: Using (proc,cat) = (%s,%s) for extracting shape"%(procRVFit,catRVFit) for mp in opt.massPoints.split(","): - print(" * MH = %s: numEntries = %g, sumEntries = %.6f"%(mp,datasetRVForFit[mp].numEntries(),datasetRVForFit[mp].sumEntries())) + print " * MH = %s: numEntries = %g, sumEntries = %.6f"%(mp,datasetRVForFit[mp].numEntries(),datasetRVForFit[mp].sumEntries()) # Repeat for WV scenario if not opt.skipVertexScenarioSplit: @@ -241,35 +242,35 @@ def get_options(): f.Close() # Check if replacement dataset has too few entries: if so throw error if( datasetWVForFit[MHNominal].numEntries() < opt.replacementThreshold )|( datasetWVForFit[MHNominal].sumEntries() < 0. ): - print(" --> [ERROR] replacement dataset (%s,%s) has too few entries (%g < %g)"%(procReplacementFit,catReplacementFit,datasetWVForFit[MHNominal].numEntries,opt.replacementThreshold)) + print " --> [ERROR] replacement dataset (%s,%s) has too few entries (%g < %g)"%(procReplacementFit,catReplacementFit,datasetWVForFit[MHNominal].numEntries,opt.replacementThreshold) sys.exit(1) else: procWVFit, catWVFit = procReplacementFit, catReplacementFit - print(" --> WV: Too few entries in nominal dataset (%g < %g). Using replacement (proc,cat) = (%s,%s) for extracting shape"%(nominal_numEntries,opt.replacementThreshold,procWVFit,catWVFit)) + print " --> WV: Too few entries in nominal dataset (%g < %g). Using replacement (proc,cat) = (%s,%s) for extracting shape"%(nominal_numEntries,opt.replacementThreshold,procWVFit,catWVFit) for mp in opt.massPoints.split(","): - print(" * MH = %s: numEntries = %g, sumEntries = %.6f"%(mp,datasetWVForFit[mp].numEntries(),datasetWVForFit[mp].sumEntries())) + print " * MH = %s: numEntries = %g, sumEntries = %.6f"%(mp,datasetWVForFit[mp].numEntries(),datasetWVForFit[mp].sumEntries()) else: - print(" --> WV: Using (proc,cat) = (%s,%s) for extracting shape"%(procWVFit,catRVFit)) + print " --> WV: Using (proc,cat) = (%s,%s) for extracting shape"%(procWVFit,catRVFit) for mp in opt.massPoints.split(","): - print(" * MH = %s: numEntries = %g, sumEntries = %.6f"%(mp,datasetWVForFit[mp].numEntries(),datasetWVForFit[mp].sumEntries())) + print " * MH = %s: numEntries = %g, sumEntries = %.6f"%(mp,datasetWVForFit[mp].numEntries(),datasetWVForFit[mp].sumEntries()) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # BEAMSPOT REWEIGH if not opt.skipBeamspotReweigh: # Datasets for fit - for mp,d in datasetRVForFit.items(): + for mp,d in datasetRVForFit.iteritems(): drw = beamspotReweigh(datasetRVForFit[mp],opt.beamspotWidthData,opt.beamspotWidthMC,xvar,dZ,_x=opt.xvar) datasetRVForFit[mp] = drw if not opt.skipVertexScenarioSplit: - for mp,d in datasetWVForFit.items(): + for mp,d in datasetWVForFit.iteritems(): drw = beamspotReweigh(datasetWVForFit[mp],opt.beamspotWidthData,opt.beamspotWidthMC,xvar,dZ,_x=opt.xvar) datasetWVForFit[mp] = drw - print(" --> Beamspot reweigh: RV(sumEntries) = %.6f, WV(sumEntries) = %.6f"%(datasetRVForFit[mp].sumEntries(),datasetWVForFit[mp].sumEntries())) + print " --> Beamspot reweigh: RV(sumEntries) = %.6f, WV(sumEntries) = %.6f"%(datasetRVForFit[mp].sumEntries(),datasetWVForFit[mp].sumEntries()) else: - print(" --> Beamspot reweigh: sumEntries = %.6f"%datasetRVForFit[mp].sumEntries()) + print " --> Beamspot reweigh: sumEntries = %.6f"%datasetRVForFit[mp].sumEntries() # Nominal datasets for saving to output Workspace: preserve norm for eff * acc calculation - for mp,d in nominalDatasets.items(): + for mp,d in nominalDatasets.iteritems(): drw = beamspotReweigh(d,opt.beamspotWidthData,opt.beamspotWidthMC,xvar,dZ,_x=opt.xvar,preserveNorm=True) nominalDatasets[mp] = drw @@ -278,16 +279,16 @@ def get_options(): if not opt.useDCB: with open("%s/outdir_%s/fTest/json/nGauss_%s.json"%(swd__,opt.ext,catRVFit)) as jf: ngauss = json.load(jf) nRV = int(ngauss["%s__%s"%(procRVFit,catRVFit)]['nRV']) - if opt.skipVertexScenarioSplit: print(" --> Fitting function: convolution of nGaussians (%g)"%nRV) + if opt.skipVertexScenarioSplit: print " --> Fitting function: convolution of nGaussians (%g)"%nRV else: with open("%s/outdir_%s/fTest/json/nGauss_%s.json"%(swd__,opt.ext,catWVFit)) as jf: ngauss = json.load(jf) nWV = int(ngauss["%s__%s"%(procWVFit,catWVFit)]['nWV']) - print(" --> Fitting function: convolution of nGaussians (RV=%g,WV=%g)"%(nRV,nWV)) + print " --> Fitting function: convolution of nGaussians (RV=%g,WV=%g)"%(nRV,nWV) else: - print(" --> Fitting function: DCB + 1 Gaussian") + print " --> Fitting function: DCB + 1 Gaussian" if opt.doVoigtian: - print(" --> Will add natural Higgs width as parameter in Pdf (Gaussians -> Voigtians)") + print " --> Will add natural Higgs width as parameter in Pdf (Gaussians -> Voigtians)" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # FIT: simultaneous signal fit (ssf) @@ -312,14 +313,14 @@ def get_options(): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # FINAL MODEL: construction -print("\n --> Constructing final model") +print "\n --> Constructing final model" fm = FinalModel(ssfMap,opt.proc,opt.cat,opt.ext,opt.year,sqrts__,nominalDatasets,xvar,MH,MHLow,MHHigh,opt.massPoints,xsbrMap,procSyst,opt.scales,opt.scalesCorr,opt.scalesGlobal,opt.smears,opt.doVoigtian,opt.useDCB,opt.skipVertexScenarioSplit,opt.skipSystematics,opt.doEffAccFromJson) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # SAVE: to output workspace foutDir = "%s/outdir_%s/signalFit/output"%(swd__,opt.ext) foutName = "%s/outdir_%s/signalFit/output/CMS-HGG_sigfit_%s_%s_%s_%s.root"%(swd__,opt.ext,opt.ext,opt.proc,opt.year,opt.cat) -print("\n --> Saving output workspace to file: %s"%foutName) +print "\n --> Saving output workspace to file: %s"%foutName if not os.path.isdir(foutDir): os.system("mkdir %s"%foutDir) fout = ROOT.TFile(foutName,"RECREATE") outWS = ROOT.RooWorkspace("%s_%s"%(outputWSName__,sqrts__),"%s_%s"%(outputWSName__,sqrts__)) @@ -330,7 +331,7 @@ def get_options(): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # PLOTTING if opt.doPlots: - print("\n --> Making plots...") + print "\n --> Making plots..." if not os.path.isdir("%s/outdir_%s/signalFit/Plots"%(swd__,opt.ext)): os.system("mkdir %s/outdir_%s/signalFit/Plots"%(swd__,opt.ext)) if opt.skipVertexScenarioSplit: plotPdfComponents(ssfRV,opt.xvar,_outdir="%s/outdir_%s/signalFit/Plots"%(swd__,opt.ext),_extension="total_",_proc=procRVFit,_cat=catRVFit) @@ -340,3 +341,4 @@ def get_options(): # Plot interpolation plotInterpolation(fm,_outdir="%s/outdir_%s/signalFit/Plots"%(swd__,opt.ext)) plotSplines(fm,_outdir="%s/outdir_%s/signalFit/Plots"%(swd__,opt.ext),_nominalMass=MHNominal) + print("plottt") diff --git a/Signal/tools/commonObjects.py b/Signal/tools/commonObjects.py new file mode 100644 index 00000000..7aee6588 --- /dev/null +++ b/Signal/tools/commonObjects.py @@ -0,0 +1,44 @@ +import os + +# Paths and directory +cmsswbase__ = os.environ['CMSSW_BASE'] +cwd__ = os.environ['CMSSW_BASE']+"/src/flashggFinalFit" +swd__ = "%s/Signal"%cwd__ +bwd__ = "%s/Background"%cwd__ +dwd__ = "%s/Datacard"%cwd__ +fwd__ = "%s/Combine"%cwd__ +pwd__ = "%s/Plots"%cwd__ +twd__ = "%s/Trees2WS"%cwd__ + +# Centre of mass energy string +sqrts__ = "13TeV" + +# Luminosity map in fb^-1: for using UL 2018w +lumiMap = {'2016':36.33, '2016pre':19.48, '2016post':16.76, '2017':41.48, '2018':59.83, 'combined':137.65, 'merged':137.65} +# If using ReReco samples then switch to lumiMap below (missing data in 2018 EGamma data set) +#lumiMap = {'2016':36.33, '2017':41.48, '2018':59.35, 'combined':137.17, 'merged':137.17} +lumiScaleFactor = 1000. # Converting from pb to fb + +# Constants +BR_W_lnu = 3.*10.86*0.01 +BR_Z_ll = 3*3.3658*0.01 +BR_Z_nunu = 20.00*0.01 +BR_Z_qq = 69.91*0.01 +BR_W_qq = 67.41*0.01 + +# Production modes and decay channel: for extract XS from combine +productionModes = ['ggH','qqH','ttH','tHq','tHW','WH','ZH','bbH'] +decayMode = 'hgg' + +# flashgg input WS objects +inputWSName__ = "tagsDumper/cms_hgg_13TeV" +inputNuisanceExtMap = {'scales':'','scalesCorr':'','smears':''} +# Signal output WS objects +outputWSName__ = "wsig" +outputWSObjectTitle__ = "hggpdfsmrel" +# outputWSObjectTitle__ = "hjjpdfsmrel" +outputWSNuisanceTitle__ = "CMS_hgg_nuisance" +# outputWSNuisanceTitle__ = "CMS_hjj_nuisance" +outputNuisanceExtMap = {'scales':'%sscale'%sqrts__,'scalesCorr':'%sscaleCorr'%sqrts__,'smears':'%ssmear'%sqrts__,'scalesGlobal':'%sscale'%sqrts__} +# Bkg output WS objects +bkgWSName__ = "multipdf" diff --git a/Signal/tools/commonTools.py b/Signal/tools/commonTools.py new file mode 100644 index 00000000..1bd64439 --- /dev/null +++ b/Signal/tools/commonTools.py @@ -0,0 +1,134 @@ +import os, sys +import glob +import re +import ROOT +import math +from collections import OrderedDict as od +from commonObjects import * + +# Function for iterating over ROOT argsets in workspace +def rooiter(x): + iter = x.iterator() + ret = iter.Next() + while ret: + yield ret + ret = iter.Next() + +def extractWSFileNames( _inputWSDir ): + if not os.path.isdir(_inputWSDir): + print " --> [ERROR] No such directory (%s)" + return False + return glob.glob("%s/output_*.root"%_inputWSDir) + +def extractListOfProcs( _listOfWSFileNames ): + procs = [] + for fName in _listOfWSFileNames: + print(fName) + p = fName.split("pythia8_")[1].split(".root")[0] + if p not in procs: procs.append(p) + return ",".join(procs) + +def extractListOfCats( _listOfWSFileNames ): + f0 = ROOT.TFile(_listOfWSFileNames[0]) + ws = f0.Get(inputWSName__) + allData = ws.allData() + cats = [] + for d in allData: + # Skip systematics shifts + if "sigma" in d.GetName(): continue + # Skip NOTAG + elif "NOTAG" in d.GetName(): continue + # Add to list: name of the form {proc}_{mass}_{sqrts}_{cat} + cats.append(d.GetName().split("_%s_"%sqrts__)[-1]) + ws.Delete() + f0.Close() + return ",".join(cats) + +def extractListOfCatsFromData( _fileName ): + f = ROOT.TFile(_fileName) + ws = f.Get(inputWSName__) + allData = ws.allData() + cats = [] + for d in allData: + c = d.GetName().split("Data_%s_"%sqrts__)[-1] + cats.append(c) + cats.sort() + ws.Delete() + f.Close() + return ",".join(cats) + +def containsNOTAG( _listOfWSFileNames ): + f0 = ROOT.TFile(_listOfWSFileNames[0]) + ws = f0.Get(inputWSName__) + allData = ws.allData() + for d in allData: + if "NOTAG" in d.GetName(): return True + return False + +# Function to return signal production (and decay extension if required) from input file name +def signalFromFileName(_fileName): + p, d = None, None + if "ggZH" in _fileName: + p = "ggzh" + if "ZToLL" in _fileName: d = "_ZToLL" + elif "ZToNuNu" in _fileName: d = "_ZToNuNu" + else: d = "_ZToQQ" + elif "GluGlu" in _fileName: p = "ggh" + elif "VBF" in _fileName: p = "vbf" + elif "WH" in _fileName: p = "wh" + elif "ZH" in _fileName: p = "zh" + elif "ttH" in _fileName: p = "tth" + elif "THQ" in _fileName: p = "thq" + elif "THW" in _fileName: p = "thw" + elif "bbH" in _fileName: p = "bbh" + else: + print " --> [ERROR]: cannot extract production mode from input file name. Please update tools.commonTools.signalFromFileName" + exit(1) + return p,d + +# Function for converting STXS process to production mode in dataset name +procToDataMap = od() +procToDataMap['GG2H'] = 'ggh' +procToDataMap['VBF'] = 'vbf' +procToDataMap['WH2HQQ'] = 'wh' +procToDataMap['ZH2HQQ'] = 'zh' +procToDataMap['QQ2HLNU'] = 'wh' +procToDataMap['QQ2HLL'] = 'zh' +procToDataMap['TTH'] = 'tth' +procToDataMap['BBH'] = 'bbh' +procToDataMap['THQ'] = 'thq' +procToDataMap['THW'] = 'thw' +procToDataMap['GG2HQQ'] = 'ggzh' +procToDataMap['GG2HLL'] = 'ggzh' +procToDataMap['GG2HNUNU'] = 'ggzh' +def procToData( _proc ): + k = _proc.split("_")[0] + if k in procToDataMap: _proc = re.sub( k, procToDataMap[k], _proc ) + return _proc + +def dataToProc( _d ): + dataToProcMap = {v:k for k,v in procToDataMap.iteritems()} + if _d in dataToProcMap: return dataToProcMap[_d] + else: return _d + +# Mapping of process to name in datacard +procToDatacardNameMap = od() +procToDatacardNameMap['GG2H'] = "ggH" +procToDatacardNameMap['VBF'] = "qqH" +procToDatacardNameMap['WH2HQQ'] = "WH_had" +procToDatacardNameMap["ZH2HQQ"] = "ZH_had" +procToDatacardNameMap["QQ2HLNU"] = "WH_lep" +procToDatacardNameMap["QQ2HLL"] = "ZH_lep" +procToDatacardNameMap["TTH"] = "ttH" +procToDatacardNameMap["BBH"] = "bbH" +procToDatacardNameMap["THQ"] = "tHq" +procToDatacardNameMap["THW"] = "tHW" +procToDatacardNameMap["TH"] = "tHq" +procToDatacardNameMap["GG2HQQ"] = "ggZH_had" +procToDatacardNameMap["GG2HLL"] = "ggZH_ll" +procToDatacardNameMap["GG2HNUNU"] = "ggZH_nunu" + +def procToDatacardName( _proc ): + k = _proc.split("_")[0] + if k in procToDatacardNameMap: _proc = re.sub( k, procToDatacardNameMap[k], _proc ) + return _proc diff --git a/Signal/tools/finalModel.py b/Signal/tools/finalModel.py index 2fd27ced..42fcdcb6 100644 --- a/Signal/tools/finalModel.py +++ b/Signal/tools/finalModel.py @@ -132,11 +132,13 @@ def __init__(self,_ssfMap,_proc,_cat,_ext,_year,_sqrts,_datasets,_xvar,_MH,_MHLo if not self.skipSystematics: self.buildNuisanceMap() # Build final pdfs if not self.skipVertexScenarioSplit: + self.buildRVFracFunction() self.buildPdf(self.ssfMap['RV'],ext="rv",useDCB=self.useDCB) self.buildPdf(self.ssfMap['WV'],ext="wv",useDCB=self.useDCB) self.Pdfs['final'] = ROOT.RooAddPdf("%s_%s"%(outputWSObjectTitle__,self.name),"%s_%s"%(outputWSObjectTitle__,self.name),ROOT.RooArgList(self.Pdfs['rv'],self.Pdfs['wv']),ROOT.RooArgList(self.Functions['fracRV'])) else: + self.buildPdf(self.ssfMap['Total'],ext='total',useDCB=self.useDCB) self.Pdfs['final'] = self.Pdfs['total'] # Build final normalisation, datasets and extended Pdfs @@ -154,14 +156,15 @@ def buildXSBRSplines(self): # XS if(self.proc == 'gghh'): if self.xvar == "CMS_hgg_mass": - xs = np.ones(101) + xs = np.ones(101)*0.001 else: - xs = np.ones(int(self.MHHigh)-int(self.MHLow)) + xs = np.ones(int(self.MHHigh)-int(self.MHLow))*0.001 else: - mp = self.xsbrMap[self.proc]['mode'] - fp = self.xsbrMap[self.proc]['factor'] if 'factor' in self.xsbrMap[self.proc] else 1. - xs = fp*self.XSBR[mp] + # mp = self.xsbrMap[self.proc]['mode'] + # fp = self.xsbrMap[self.proc]['factor'] if 'factor' in self.xsbrMap[self.proc] else 1. + # xs = fp*self.XSBR[mp] + xs = np.ones(int(self.MHHigh)-int(self.MHLow))*0.5071 self.Splines['xs'] = ROOT.RooSpline1D("fxs_%s_%s"%(self.proc,self.sqrts),"fxs_%s_%s"%(self.proc,self.sqrts),self.MH,len(mh),mh,xs) # BR @@ -171,9 +174,10 @@ def buildXSBRSplines(self): else: br = np.ones(int(self.MHHigh)-int(self.MHLow)) else: - md = self.xsbrMap['decay']['mode'] - fd = self.xsbrMap['decay']['factor'] if 'factor' in self.xsbrMap['decay'] else 1. - br = fd*self.XSBR[md] + # md = self.xsbrMap['decay']['mode'] + # fd = self.xsbrMap['decay']['factor'] if 'factor' in self.xsbrMap['decay'] else 1. + # br = fd*self.XSBR[md] + br = np.ones(int(self.MHHigh)-int(self.MHLow))*0.00227 self.Splines['br'] = ROOT.RooSpline1D("fbr_%s_%s"%(self.proc,self.sqrts),"fbr_%s_%s"%(self.proc,self.sqrts),self.MH,len(mh),mh,br) def buildEffAccSpline(self): @@ -185,7 +189,7 @@ def buildEffAccSpline(self): if self.doEffAccFromJson: jfname = "%s/outdir_%s/getEffAcc/json/effAcc_M%s_%s.json"%(swd__,self.ext,mp,self.ext) if not os.path.exists(jfname): - print(" --> [ERROR] effAcc json file (%s) does not exist for mass point = %s. Run getEffAcc first."%(jfname,mp)) + print " --> [ERROR] effAcc json file (%s) does not exist for mass point = %s. Run getEffAcc first."%(jfname,mp) sys.exit(1) with open(jfname,'r') as jf: ea_data = json.load(jf) ea.append(float(ea_data['%s__%s'%(self.proc,self.cat)])) @@ -238,20 +242,20 @@ def buildNuisanceMap(self): # Extract calcPhotonSyst output psname = "%s/outdir_%s/calcPhotonSyst/pkl/%s.pkl"%(swd__,self.ext,self.cat) if not os.path.exists(psname): - print(" --> [ERROR] Photon systematics do not exist (%s). Please run calcPhotonSyst mode first or skip systematics (--skipSystematics)"%psname) + print " --> [ERROR] Photon systematics do not exist (%s). Please run calcPhotonSyst mode first or skip systematics (--skipSystematics)"%psname sys.exit(1) - with open(psname,"rb") as fpkl: psdata = pickle.load(fpkl) + with open(psname,"r") as fpkl: psdata = pickle.load(fpkl) # Get row for proc: option to use diagonal process r = psdata[psdata['proc']==self.procSyst] if len(r) == 0: - print(" --> [WARNING] Process %s is not in systematics pkl (%s). Skipping systematics."%(self.proc,psname)) + print " --> [WARNING] Process %s is not in systematics pkl (%s). Skipping systematics."%(self.proc,psname) self.skipSystematics = True else: # Add scales, scalesCorr, scalesGlobal, smears for sType in ['scales','scalesCorr','scalesGlobal','smears']: - for syst in getattr(self,sType).split(","): + for syst in getattr(self,sType).split(","): if syst == '': continue # If corr/global nor in sType then build separate nuisance per year i.e. de-correlate @@ -259,14 +263,14 @@ def buildNuisanceMap(self): else: sExt = "_%s"%self.year # Extract info - systOpts = syst.split(":") - sName = "%s_%s"%(systOpts[0],outputNuisanceExtMap[sType]) + systOpts = syst.split(":") + sName = "%s_%s"%(systOpts[0],outputNuisanceExtMap[sType]) # Extract constant values and make nuisance if sType == 'scalesGlobal': cMean, cSigma, cRate = 0.,0.,0. - else: cMean, cSigma, cRate = r["%s_mean"%sName].values[0], r["%s_sigma"%sName].values[0], r["%s_rate"%sName].values[0] - sOpts = systOpts[1:] if len(systOpts) > 1 else [] - self.makeNuisance("%s%s"%(sName,sExt),cMean,cSigma,cRate,sType,sOpts) + else: cMean, cSigma, cRate = r["%s_mean"%sName].values[0], r["%s_sigma"%sName].values[0], r["%s_rate"%sName].values[0] + sOpts = systOpts[1:] if len(systOpts) > 1 else [] + self.makeNuisance("%s%s"%(sName,sExt),cMean,cSigma,cRate,sType,sOpts) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Function to get RV fraction func @@ -289,11 +293,14 @@ def buildRVFracFunction(self): if len(frv) == 1: frv, mh = [frv[0],frv[0],frv[0]], [float(self.MHLow),mh[0],float(self.MHHigh)] # Convert to numpy arrays and make spline frv, mh = np.asarray(frv), np.asarray(mh) + print("check fracRV before") self.Splines['fracRV'] = ROOT.RooSpline1D("%s_%s_rvFracSpline"%(outputWSObjectTitle__,self.name),"%s_%s_rvFracSpline"%(outputWSObjectTitle__,self.name),self.MH,len(mh),mh,frv) # Create function: if not skip systematics then add nuisance for RV fraction if self.skipSystematics: + print("check fracRV") self.Functions['fracRV'] = ROOT.RooFormulaVar("%s_%s_rvFrac"%(outputWSObjectTitle__,self.name),"%s_%s_rvFrac"%(outputWSObjectTitle__,self.name),"TMath::Min(@0,1.0)",ROOT.RooArgList(self.Splines['fracRV'])) else: + print("check fracRV other") self.NuisanceMap['other'] = od() self.makeNuisance('deltafracright',1.,1.,1.,'other') self.Functions['fracRV'] = ROOT.RooFormulaVar("%s_%s_rvFrac"%(outputWSObjectTitle__,self.name),"%s_%s_rvFrac"%(outputWSObjectTitle__,self.name),"TMath::Min(@0+@1,1.0)",ROOT.RooArgList(self.Splines['fracRV'],self.NuisanceMap['other']['deltafracright']['param'])) @@ -314,30 +321,32 @@ def buildPdf(self,ssf,ext='',useDCB=False,_recursive=True): self.buildMean('dm_dcb_%s'%extStr,skipSystematics=self.skipSystematics) self.buildSigma('sigma_dcb_%s'%extStr,skipSystematics=self.skipSystematics) # Build DCB pdf - self.Pdfs['dcb_%s'%extStr] = ROOT.RooDoubleCBFast("dcb_%s"%extStr,"dcb_%s"%extStr,self.xvar,self.Functions["mean_dcb_%s"%extStr],self.Functions["sigma_dcb_%s"%extStr],self.Splines['a1_dcb_%s'%extStr],self.Splines['n1_dcb_%s'%extStr],self.Splines['a2_dcb_%s'%extStr],self.Splines['n2_dcb_%s'%extStr]) - + # self.Pdfs['dcb_%s'%extStr] = ROOT.RooDoubleCBFast("dcb_%s"%extStr,"dcb_%s"%extStr,self.xvar,self.Functions["mean_dcb_%s"%extStr],self.Functions["sigma_dcb_%s"%extStr],self.Splines['a1_dcb_%s'%extStr],self.Splines['n1_dcb_%s'%extStr],self.Splines['a2_dcb_%s'%extStr],self.Splines['n2_dcb_%s'%extStr]) + self.Pdfs['dcb_%s'%extStr] = ROOT.RooDoubleCBFast("%s_%s"%(outputWSObjectTitle__,extStr),"%s_%s"%(outputWSObjectTitle__,extStr),self.xvar,self.Functions["mean_dcb_%s"%extStr],self.Functions["sigma_dcb_%s"%extStr],self.Splines['a1_dcb_%s'%extStr],self.Splines['n1_dcb_%s'%extStr],self.Splines['a2_dcb_%s'%extStr],self.Splines['n2_dcb_%s'%extStr]) # + Gaussian: shares mean with DCB - self.Splines['sigma_gaus_%s'%extStr] = ssf.Splines['sigma_gaus'].Clone() - self.Splines['sigma_gaus_%s'%extStr].SetName("sigma_fit_gaus_%s"%extStr) - self.buildSigma('sigma_gaus_%s'%extStr,skipSystematics=self.skipSystematics) - if self.doVoigtian: - self.Pdfs['gaus_%s'%extStr] = ROOT.RooVoigtian("gaus_%s"%extStr,"gaus_%s"%extStr,self.xvar,self.Functions["mean_dcb_%s"%extStr],self.GammaH,self.Functions["sigma_gaus_%s"%extStr]) - else: - self.Pdfs['gaus_%s'%extStr] = ROOT.RooGaussian("gaus_%s"%extStr,"gaus_%s"%extStr,self.xvar,self.Functions["mean_dcb_%s"%extStr],self.Functions["sigma_gaus_%s"%extStr]) + # self.Splines['sigma_gaus_%s'%extStr] = ssf.Splines['sigma_gaus'].Clone() + # self.Splines['sigma_gaus_%s'%extStr].SetName("sigma_fit_gaus_%s"%extStr) + # self.buildSigma('sigma_gaus_%s'%extStr,skipSystematics=self.skipSystematics) + # if self.doVoigtian: + # self.Pdfs['gaus_%s'%extStr] = ROOT.RooVoigtian("gaus_%s"%extStr,"gaus_%s"%extStr,self.xvar,self.Functions["mean_dcb_%s"%extStr],self.GammaH,self.Functions["sigma_gaus_%s"%extStr]) + # else: + # self.Pdfs['gaus_%s'%extStr] = ROOT.RooGaussian("gaus_%s"%extStr,"gaus_%s"%extStr,self.xvar,self.Functions["mean_dcb_%s"%extStr],self.Functions["sigma_gaus_%s"%extStr]) # Fraction - self.Splines['frac_%s'%extStr] = ssf.Splines['frac_constrained'].Clone() - self.Splines['frac_%s'%extStr].SetName("frac_%s"%extStr) + # self.Splines['frac_%s'%extStr] = ssf.Splines['frac_constrained'].Clone() + # self.Splines['frac_%s'%extStr].SetName("frac_%s"%extStr) # Define total pdf - _pdfs, _coeffs = ROOT.RooArgList(), ROOT.RooArgList() - for pdf in ['dcb','gaus']: _pdfs.add(self.Pdfs['%s_%s'%(pdf,extStr)]) - _coeffs.add(self.Splines['frac_%s'%extStr]) - self.Pdfs[ext] = ROOT.RooAddPdf("%s_%s"%(outputWSObjectTitle__,extStr),"%s_%s"%(outputWSObjectTitle__,extStr),_pdfs,_coeffs,_recursive) - + # _pdfs, _coeffs = ROOT.RooArgList(), ROOT.RooArgList() + # for pdf in ['dcb','gaus']: _pdfs.add(self.Pdfs['%s_%s'%(pdf,extStr)]) + # _coeffs.add(self.Splines['frac_%s'%extStr]) + # self.Pdfs[ext] = ROOT.RooAddPdf("%s_%s"%(outputWSObjectTitle__,extStr),"%s_%s"%(outputWSObjectTitle__,extStr),_pdfs,_coeffs,_recursive) + # print("check ext",ext,extStr) + self.Pdfs[ext] = self.Pdfs['dcb_%s'%extStr] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - else: # For nGaussians: + else: + # For total pdf _pdfs, _coeffs = ROOT.RooArgList(), ROOT.RooArgList() for g in range(0,ssf.nGaussians): @@ -362,8 +371,8 @@ def buildPdf(self,ssf,ext='',useDCB=False,_recursive=True): self.Splines['frac_g%g_%s'%(g,extStr)].SetName("frac_g%g_%s"%(g,extStr)) _coeffs.add(self.Splines['frac_g%g_%s'%(g,extStr)]) - # Define total pdf - self.Pdfs[ext] = ROOT.RooAddPdf("%s_%s"%(outputWSObjectTitle__,extStr),"%s_%s"%(outputWSObjectTitle__,extStr),_pdfs,_coeffs,_recursive) + # Define total pdf + self.Pdfs[ext] = ROOT.RooAddPdf("%s_%s"%(outputWSObjectTitle__,extStr),"%s_%s"%(outputWSObjectTitle__,extStr),_pdfs,_coeffs,_recursive) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Functions to build mean, sigma and rate functions with systematics @@ -379,7 +388,7 @@ def buildMean(self,dmSplineName="",skipSystematics=False): formula += "*(1." # Global if 'scalesGlobal' in self.NuisanceMap: - for sName, sInfo in self.NuisanceMap['scalesGlobal'].items(): + for sName, sInfo in self.NuisanceMap['scalesGlobal'].iteritems(): formula += "+@%g"%dependents.getSize() # For adding additional factor for so in sInfo['opts']: @@ -390,7 +399,7 @@ def buildMean(self,dmSplineName="",skipSystematics=False): # Other systs: scales, scalesCorr, smears for sType in ['scales','scalesCorr','smears']: if sType in self.NuisanceMap: - for sName, sInfo in self.NuisanceMap[sType].items(): + for sName, sInfo in self.NuisanceMap[sType].iteritems(): c = sInfo['meanConst'].getVal() if abs(c)>=5.e-5: formula += "+@%g*@%g"%(dependents.getSize(),dependents.getSize()+1) @@ -410,7 +419,7 @@ def buildSigma(self,sigmaSplineName="",skipSystematics=False): formula += "*TMath::Max(1.e-2,(1." for sType in ['scales','scalesCorr','smears']: if sType in self.NuisanceMap: - for sName, sInfo in self.NuisanceMap[sType].items(): + for sName, sInfo in self.NuisanceMap[sType].iteritems(): c = sInfo['sigmaConst'].getVal() if c>=1e-4: formula += "+@%g*@%g"%(dependents.getSize(),dependents.getSize()+1) @@ -427,7 +436,7 @@ def buildRate(self,rateName,skipSystematics=False): # Add systematics for sType in ['scales','scalesCorr','smears']: if sType in self.NuisanceMap: - for sName, sInfo in self.NuisanceMap[sType].items(): + for sName, sInfo in self.NuisanceMap[sType].iteritems(): c = sInfo['rateConst'].getVal() if c>=5.e-4: formula += "+@%g*@%g"%(dependents.getSize(),dependents.getSize()+1) @@ -439,7 +448,7 @@ def buildRate(self,rateName,skipSystematics=False): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Function to build datasets to add to workspace def buildDatasets(self): - for mp, d in self.datasets.items(): + for mp, d in self.datasets.iteritems(): self.Datasets[mp] = d.Clone("sig_mass_m%s_%s"%(mp,self.name)) self.Datasets['%s_copy'%mp] = d.Clone("sig_mass_m%s_%s"%(mp,self.cat)) @@ -460,4 +469,4 @@ def save(self,wsout): wsout.imp(self.Functions['final_normThisLumi'],ROOT.RooFit.RecycleConflictNodes()) wsout.imp(self.Pdfs['final_extend'],ROOT.RooFit.RecycleConflictNodes()) wsout.imp(self.Pdfs['final_extendThisLumi'],ROOT.RooFit.RecycleConflictNodes()) - for d in self.Datasets.values(): wsout.imp(d) + for d in self.Datasets.itervalues(): wsout.imp(d) diff --git a/Signal/tools/plottingTools.py b/Signal/tools/plottingTools.py index 3e0f8fe0..48d1e29f 100644 --- a/Signal/tools/plottingTools.py +++ b/Signal/tools/plottingTools.py @@ -40,7 +40,7 @@ def getEffSigma(_h): r+=y if r>rlim: reachedLimit = True else: - print((" --> Reach nBins in effSigma calc: %s. Returning 0 for effSigma"%_h.GetName())) + print " --> Reach nBins in effSigma calc: %s. Returning 0 for effSigma"%_h.GetName() return 0 # Down: if( not reachedLimit ): @@ -51,7 +51,7 @@ def getEffSigma(_h): r+=y if r>rlim: reachedLimit = True else: - print((" --> Reach 0 in effSigma calc: %s. Returning 0 for effSigma"%_h.GetName())) + print " --> Reach 0 in effSigma calc: %s. Returning 0 for effSigma"%_h.GetName() return 0 # Calculate fractional width in bin takes above limt (assume linear) if y == 0.: dx = 0. @@ -75,7 +75,7 @@ def plotFTest(ssfs,_opt=1,_outdir='./',_extension='',_proc='',_cat='',_mass='125 hists = od() hmax, hmin = 0, 0 # Loop over nGauss fits - for k,ssf in ssfs.items(): + for k,ssf in ssfs.iteritems(): ssf.MH.setVal(int(_mass)) hists[k] = ssf.Pdfs['final'].createHistogram("h_%s_%s"%(k,_extension),ssf.xvar,ROOT.RooFit.Binning(1600)) if int(k.split("_")[-1]) == _opt: hists[k].SetLineWidth(3) @@ -107,7 +107,7 @@ def plotFTest(ssfs,_opt=1,_outdir='./',_extension='',_proc='',_cat='',_mass='125 hists['data'].SetMaximum(1.2*hmax) hists['data'].SetMinimum(1.2*hmin) hists['data'].Draw("PE") - for k,h in hists.items(): + for k,h in hists.iteritems(): if k == "data": continue h.Draw("HIST SAME") @@ -117,7 +117,7 @@ def plotFTest(ssfs,_opt=1,_outdir='./',_extension='',_proc='',_cat='',_mass='125 leg.SetLineColor(0) leg.SetTextSize(0.03) leg.AddEntry(hists['data'],"Simulation","ep") - for k,ssf in ssfs.items(): + for k,ssf in ssfs.iteritems(): if int(k.split("_")[-1]) == _opt: leg.AddEntry(hists[k],"#bf{N_{gauss} = %s}: #chi^{2}/n(dof) = %.4f"%(k.split("_")[-1],ssf.getReducedChi2()),"L") else: leg.AddEntry(hists[k],"N_{gauss} = %s: #chi^{2}/n(dof) = %.4f"%(k.split("_")[-1],ssf.getReducedChi2()),"L") leg.Draw("Same") @@ -141,7 +141,7 @@ def plotFTestResults(ssfs,_opt,_outdir="./",_extension='',_proc='',_cat='',_mass p = 0 xmax = 1 ymax = -1 - for k,ssf in ssfs.items(): + for k,ssf in ssfs.iteritems(): ssf.MH.setVal(int(_mass)) x = int(k.split("_")[-1]) if x > xmax: xmax = x @@ -236,17 +236,17 @@ def plotPdfComponents(ssf,var="CMS_hgg_mass",_outdir='./',_extension='',_proc='' hists['data'].Draw("PE") hists['final'].Draw("Same HIST") # Individual Gaussian histograms - for k,v in ssf.Pdfs.items(): + for k,v in ssf.Pdfs.iteritems(): if k == 'final': continue pdfs[k] = v - if len(list(pdfs.keys()))!=1: + if len(pdfs.keys())!=1: pdfItr = 0 - for k,v in pdfs.items(): + for k,v in pdfs.iteritems(): if pdfItr == 0: - if "gaus" in k: frac = ssf.Pdfs['final'].getComponents().getRealValue("frac_g0_constrained") - else: frac = ssf.Pdfs['final'].getComponents().getRealValue("frac_constrained") + if "gaus" in k: frac = ssf.Pdfs['final'].getComponents().getRealValue("frac_g0_constrained") + else: frac = ssf.Pdfs['final'].getComponents().getRealValue("frac_constrained") else: - frac = ssf.Pdfs['final'].getComponents().getRealValue("%s_%s_recursive_fraction_%s"%(ssf.proc,ssf.cat,k)) + frac = ssf.Pdfs['final'].getComponents().getRealValue("%s_%s_recursive_fraction_%s"%(ssf.proc,ssf.cat,k)) # Create histogram with 1600 bins hists[k] = v.createHistogram("h_%s%s"%(k,_extension),ssf.xvar,ROOT.RooFit.Binning(1600)) hists[k].Scale(frac) @@ -263,12 +263,12 @@ def plotPdfComponents(ssf,var="CMS_hgg_mass",_outdir='./',_extension='',_proc='' leg.AddEntry(hists['data'],"Simulation","ep") leg.AddEntry(hists['final'],"Parametric Model","L") leg.Draw("Same") - if len(list(pdfs.keys()))!=1: + if len(pdfs.keys())!=1: leg1 = ROOT.TLegend(0.6,0.4,0.86,0.6) leg1.SetFillStyle(0) leg1.SetLineColor(0) leg1.SetTextSize(0.035) - for k,v in pdfs.items(): leg1.AddEntry(hists[k],k,"L") + for k,v in pdfs.iteritems(): leg1.AddEntry(hists[k],k,"L") leg1.Draw("Same") # Add Latex lat = ROOT.TLatex() @@ -328,9 +328,7 @@ def plotInterpolation(_finalModel,_outdir='./',_massPoints='120,121,122,123,124, hists['data_%s'%mp].SetLineColor(colorMap[mp]) # Extract first hist and clone for axes - print(type(hists)) - print(hists.keys()) - haxes = hists[list(hists.keys())[0]].Clone() + haxes = hists[hists.keys()[0]].Clone() # haxes.GetXaxis().SetTitle("m_{#gamma#gamma} [GeV]") haxes.GetXaxis().SetTitle("m_{#j#j} [GeV]") haxes.GetYaxis().SetTitle("Events / %.2f GeV"%((_finalModel.xvar.getMax()-_finalModel.xvar.getMin())/_finalModel.xvar.getBins())) @@ -342,7 +340,7 @@ def plotInterpolation(_finalModel,_outdir='./',_massPoints='120,121,122,123,124, haxes.Draw("AXIS") # Draw rest of histograms - for k,h in hists.items(): + for k,h in hists.iteritems(): if "data" in k: h.Draw("Same EP") else: h.Draw("Same HIST") @@ -417,7 +415,7 @@ def plotSplines(_finalModel,_outdir="./",_nominalMass='125',splinesToPlot=['xs', leg.SetLineColor(0) leg.SetTextSize(0.04) # Draw graphs - for x, gr in grs.items(): + for x, gr in grs.iteritems(): gr.SetLineColor(colorMap[x]) gr.SetMarkerColor(colorMap[x]) gr.SetMarkerStyle(20) diff --git a/Signal/tools/replacementMap.py b/Signal/tools/replacementMap.py index d8a21832..ca3e9dc8 100644 --- a/Signal/tools/replacementMap.py +++ b/Signal/tools/replacementMap.py @@ -120,6 +120,9 @@ globalReplacementMap["STXS"]["procRVMap"]["RECO_WH_LEP_PTV_GT150_Tag0"] = "QQ2HLNU_PTV_150_250_0J" globalReplacementMap["STXS"]["procRVMap"]["RECO_ZH_LEP_Tag0"] = "QQ2HLL_PTV_0_75" globalReplacementMap["STXS"]["procRVMap"]["RECO_ZH_LEP_Tag1"] = "QQ2HLL_PTV_0_75" +globalReplacementMap["STXS"]["procRVMap"]["boosted_cat0"] = "ggH" +globalReplacementMap["STXS"]["procRVMap"]["boosted_cat1"] = "ggH" +globalReplacementMap["STXS"]["procRVMap"]["boosted_cat2"] = "ggH" # Replacement category for RV fit globalReplacementMap["STXS"]["catRVMap"] = od() globalReplacementMap["STXS"]["catRVMap"]["RECO_0J_PTH_0_10_Tag0"] = "RECO_0J_PTH_0_10_Tag0" @@ -206,3 +209,6 @@ globalReplacementMap["STXS"]["catRVMap"]["RECO_WH_LEP_PTV_GT150_Tag0"] = "RECO_WH_LEP_PTV_GT150_Tag0" globalReplacementMap["STXS"]["catRVMap"]["RECO_ZH_LEP_Tag0"] = "RECO_ZH_LEP_Tag0" globalReplacementMap["STXS"]["catRVMap"]["RECO_ZH_LEP_Tag1"] = "RECO_ZH_LEP_Tag1" +globalReplacementMap["STXS"]["catRVMap"]["boosted_cat0"] = "boosted_cat0" +globalReplacementMap["STXS"]["catRVMap"]["boosted_cat1"] = "boosted_cat1" +globalReplacementMap["STXS"]["catRVMap"]["boosted_cat2"] = "boosted_cat2" diff --git a/Signal/tools/signalTools.py b/Signal/tools/signalTools.py index ed0992d8..64610952 100644 --- a/Signal/tools/signalTools.py +++ b/Signal/tools/signalTools.py @@ -11,15 +11,12 @@ def reduceDataset(_d,_argset): return _d.reduce(_argset) def splitRVWV(_d,_argset,mode="RV"): # Split into RV/WV senario at dZ = 1cm - if mode == "RV": - print("RV") - return _d.reduce(_argset,"abs(dZ)<=1.") - elif mode == "WV": - return _d.reduce(_argset,"abs(dZ)>1.") + if mode == "RV": return _d.reduce(_argset,"abs(dZ)<=1.") + elif mode == "WV": return _d.reduce(_argset,"abs(dZ)>1.") else: - print((" --> [ERROR] unrecognised mode (%s) in splitRVWV function"%mode)) + print " --> [ERROR] unrecognised mode (%s) in splitRVWV function"%mode return 0 - print("RV done") + def beamspotReweigh(d,widthData,widthMC,_xvar,_dZ,_x='CMS_hgg_mass',preserveNorm=True): isumw = d.sumEntries() drw = d.emptyClone() diff --git a/Signal/tools/simultaneousFit.py b/Signal/tools/simultaneousFit.py index 21f89bff..2d2d2ae4 100644 --- a/Signal/tools/simultaneousFit.py +++ b/Signal/tools/simultaneousFit.py @@ -6,16 +6,16 @@ import scipy.stats from collections import OrderedDict as od from array import array -import ctypes + # Parameter lookup table for initialisation # So far defined up to MHPolyOrder=2 pLUT = od() pLUT['Gaussian_wdcb'] = od() -pLUT['Gaussian_wdcb']['dm_p0'] = [0.0,-100,100] +pLUT['Gaussian_wdcb']['dm_p0'] = [0.0,-10,10] pLUT['Gaussian_wdcb']['dm_p1'] = [0.01,-0.01,0.01] pLUT['Gaussian_wdcb']['dm_p2'] = [0.01,-0.01,0.01] -pLUT['Gaussian_wdcb']['sigma_p0'] = [15,1.0,100.] +pLUT['Gaussian_wdcb']['sigma_p0'] = [40,1.0,100.] pLUT['Gaussian_wdcb']['sigma_p1'] = [0.0,-0.1,0.1] pLUT['Gaussian_wdcb']['sigma_p2'] = [0.0,-0.001,0.001] pLUT['Frac'] = od() @@ -30,7 +30,7 @@ pLUT['Gaussian']['sigma_p1'] = [0.0,-0.01,0.01] pLUT['Gaussian']['sigma_p2'] = [0.0,-0.01,0.01] pLUT['FracGaussian'] = od() -pLUT['FracGaussian']['p0'] = ['func',0.01,0.99] +pLUT['FracGaussian']['p0'] = ['func',0.001,0.999] pLUT['FracGaussian']['p1'] = [0.01,-0.005,0.005] pLUT['FracGaussian']['p2'] = [0.00001,-0.00001,0.00001] @@ -72,8 +72,7 @@ def calcChi2(x,pdf,d,errorType="Poisson",_verbose=False,fitRange=[105,150]): if ndata*ndata == 0: continue npdf = pdf.getVal(ROOT.RooArgSet(x))*normFactor*d.binVolume() - # eLo, eHi = ROOT.Double(), ROOT.Double() - eLo, eHi = ctypes.c_double(), ctypes.c_double() + eLo, eHi = ROOT.Double(), ROOT.Double() d.weightError(eLo,eHi,ROOT.RooAbsData.SumW2) bins.append(i) nPdf.append(npdf) @@ -84,7 +83,7 @@ def calcChi2(x,pdf,d,errorType="Poisson",_verbose=False,fitRange=[105,150]): # Convert to numpy array nPdf = np.asarray(nPdf) nData = np.asarray(nData) - eDataSumW2 = np.asarray([e.value for e in eDataSumW2], dtype=float) + eDataSumW2 = np.asarray(eDataSumW2) if errorType == 'Poisson': # Change error to poisson intervals: take max interval as error @@ -106,10 +105,10 @@ def calcChi2(x,pdf,d,errorType="Poisson",_verbose=False,fitRange=[105,150]): e = eDataSumW2 terms = (nPdf-nData)**2/(eDataSumW2**2) - # If verbose: print(to screen) + # If verbose: print to screen if _verbose: for i in range(len(terms)): - print((" --> [DEBUG] Bin %g : nPdf = %.6f, nData = %.6f, e(%s) = %.6f --> chi2 term = %.6f"%(bins[i],nPdf[i],nData[i],errorType,e[i],terms[i]))) + print " --> [DEBUG] Bin %g : nPdf = %.6f, nData = %.6f, e(%s) = %.6f --> chi2 term = %.6f"%(bins[i],nPdf[i],nData[i],errorType,e[i],terms[i]) # Sum terms result = terms.sum() @@ -125,17 +124,22 @@ def nChi2Addition(X,ssf,verbose=False): chi2sum = 0 K = 0 # number of non empty bins C = len(X)-1 # number of fit params (-1 for MH) - for mp,d in ssf.DataHists.items(): - + for mp,d in ssf.DataHists.iteritems(): ssf.MH.setVal(int(mp)) MHLow=int(ssf.MHLow) MHHigh=int(ssf.MHHigh) - chi2, k = calcChi2(ssf.xvar,ssf.Pdfs['final'],d,_verbose=verbose,fitRange=[MHLow,MHHigh]) + + if ssf.MY.getVal() == 125: + chi2, k = calcChi2(ssf.xvar,ssf.Pdfs['final'],d,_verbose=verbose) + else: + chi2, k = calcChi2(ssf.xvar,ssf.Pdfs['final'],d,_verbose=verbose,fitRange=[MHLow,MHHigh]) ##2d + # chi2, k = calcChi2(ssf.xvar,ssf.Pdfs['final'],d,_verbose=verbose) chi2sum += chi2 K += k # N degrees of freedom ndof = K-C + # print("check ndof",ndof,K,C) ssf.setNdof(ndof) return chi2sum @@ -211,7 +215,7 @@ def extractX0(self): # Function to normalise datasets and convert to RooDataHists for calc chi2 def prepareDataHists(self): # Loop over datasets and normalise to 1 - for k,d in self.datasetForFit.items(): + for k,d in self.datasetForFit.iteritems(): sumw = d.sumEntries() drw = d.emptyClone() self.Vars['weight'] = ROOT.RooRealVar("weight","weight",-10000,10000) @@ -227,22 +231,24 @@ def buildDCBplusGaussian(self,_recursive=True): # DCB pLUT['DCB'] = od() - pLUT['DCB']['dm_p0'] = [self.jetmass,float(self.MHLow),float(self.MHHigh)] + pLUT['DCB']['dm_p0'] = [float(self.jetmass),float(self.MHLow),float(self.MHHigh)] + + # pLUT['DCB']['dm_p0'] = [2600,2400,2700] pLUT['DCB']['dm_p1'] = [0.0,-10,10] pLUT['DCB']['dm_p2'] = [0.0,-0.1,0.1] - pLUT['DCB']['sigma_p0'] = [13,0.0,100.] + pLUT['DCB']['sigma_p0'] = [20,0.0,100.] pLUT['DCB']['sigma_p1'] = [0.0,-0.1,0.1] pLUT['DCB']['sigma_p2'] = [0.0,-0.001,0.001] pLUT['DCB']['n1_p0'] = [35.,0.01,1000.] pLUT['DCB']['n1_p1'] = [0.0,-0.1,0.1] pLUT['DCB']['n1_p2'] = [0.0,-0.001,0.001] - pLUT['DCB']['n2_p0'] = [5.6,0.01,500] + pLUT['DCB']['n2_p0'] = [4.5,0.01,500] pLUT['DCB']['n2_p1'] = [0.0,-0.1,0.1] pLUT['DCB']['n2_p2'] = [0.0,-0.001,0.001] - pLUT['DCB']['a1_p0'] = [0.65,0.01,100] + pLUT['DCB']['a1_p0'] = [0.11,0.01,100] pLUT['DCB']['a1_p1'] = [0.0,-0.1,0.1] pLUT['DCB']['a1_p2'] = [0.0,-0.001,0.001] - pLUT['DCB']['a2_p0'] = [1.2,0.01,100] + pLUT['DCB']['a2_p0'] = [0.11,0.01,100] pLUT['DCB']['a2_p1'] = [0.0,-0.1,0.1] pLUT['DCB']['a2_p2'] = [0.0,-0.001,0.001] # Define polynominal functions (in dMH) @@ -252,7 +258,7 @@ def buildDCBplusGaussian(self,_recursive=True): # Create coeff for polynominal of order MHPolyOrder: y = a+bx+cx^2+... for po in range(0,self.MHPolyOrder+1): self.Vars['%s_p%g'%(k,po)] = ROOT.RooRealVar("%s_p%g"%(k,po),"%s_p%g"%(k,po),pLUT['DCB']["%s_p%s"%(f,po)][0],pLUT['DCB']["%s_p%s"%(f,po)][1],pLUT['DCB']["%s_p%s"%(f,po)][2]) - self.Varlists[k].add( self.Vars['%s_p%g'%(k,po)] ) + self.Varlists[k].add( self.Vars['%s_p%g'%(k,po)] ) # Define polynominali self.constOne.setConstant(True) self.Polynomials[k] = ROOT.RooPolyVar(k,k,self.constOne,self.Varlists[k]) @@ -272,27 +278,27 @@ def buildDCBplusGaussian(self,_recursive=True): # Define polynomial self.Polynomials[k] = ROOT.RooPolyVar(k,k,self.constOne,self.Varlists[k]) # Build Gaussian - self.Pdfs['gaus'] = ROOT.RooGaussian("gaus","gaus",self.xvar,self.Polynomials['mean_dcb'],self.Polynomials['sigma_gaus']) + # self.Pdfs['gaus'] = ROOT.RooGaussian("gaus","gaus",self.xvar,self.Polynomials['mean_dcb'],self.Polynomials['sigma_gaus']) # Relative fraction: also polynomial of order MHPolyOrder - self.Varlists['frac'] = ROOT.RooArgList("frac_coeffs") - for po in range(0,self.MHPolyOrder+1): - self.Vars['frac_p%g'%po] = ROOT.RooRealVar("frac_p%g"%po,"frac_p%g"%po,pLUT['Frac']['p%g'%po][0],pLUT['Frac']['p%g'%po][1],pLUT['Frac']['p%g'%po][2]) - self.Varlists['frac'].add( self.Vars['frac_p%g'%po] ) + # self.Varlists['frac'] = ROOT.RooArgList("frac_coeffs") + # for po in range(0,self.MHPolyOrder+1): + # self.Vars['frac_p%g'%po] = ROOT.RooRealVar("frac_p%g"%po,"frac_p%g"%po,pLUT['Frac']['p%g'%po][0],pLUT['Frac']['p%g'%po][1],pLUT['Frac']['p%g'%po][2]) + # self.Varlists['frac'].add( self.Vars['frac_p%g'%po] ) # Define Polynomial - self.Polynomials['frac'] = ROOT.RooPolyVar('frac','frac',self.dMH,self.Varlists['frac']) + # self.Polynomials['frac'] = ROOT.RooPolyVar('frac','frac',self.dMH,self.Varlists['frac']) # Constrain fraction to not be above 1 or below 0 - self.Polynomials['frac_constrained'] = ROOT.RooFormulaVar("frac_constrained","frac_constrained","(@0>0)*(@0<1)*@0+(@0>1.0)*0.9999",ROOT.RooArgList(self.Polynomials['frac'])) - self.Coeffs['frac_constrained'] = self.Polynomials['frac_constrained' ] + # self.Polynomials['frac_constrained'] = ROOT.RooFormulaVar("frac_constrained","frac_constrained","(@0>0)*(@0<1)*@0+(@0>1.0)*0.9999",ROOT.RooArgList(self.Polynomials['frac'])) + # self.Coeffs['frac_constrained'] = self.Polynomials['frac_constrained' ] # Define total PDF - _pdfs, _coeffs = ROOT.RooArgList(), ROOT.RooArgList() - for pdf in ['dcb','gaus']: _pdfs.add(self.Pdfs[pdf]) - _coeffs.add(self.Coeffs['frac_constrained']) + # _pdfs, _coeffs = ROOT.RooArgList(), ROOT.RooArgList() + # for pdf in ['dcb','gaus']: _pdfs.add(self.Pdfs[pdf]) + # _coeffs.add(self.Coeffs['frac_constrained']) # self.Pdfs['final']=self.Pdfs['dcb'] - self.Pdfs['final'] = ROOT.RooAddPdf("%s_%s"%(self.proc,self.cat),"%s_%s"%(self.proc,self.cat),_pdfs,_coeffs,_recursive) - # self.Pdfs['final'] = ROOT.RooFFTConvPdf("%s_%s"%(self.proc,self.cat),"%s_%s"%(self.proc,self.cat),self.xvar,self.Pdfs['dcb'],self.Pdfs['gaus']) + # self.Pdfs['final'] = ROOT.RooAddPdf("%s_%s"%(self.proc,self.cat),"%s_%s"%(self.proc,self.cat),_pdfs,_coeffs,_recursive) + self.Pdfs['final'] = self.Pdfs['dcb'] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -305,18 +311,18 @@ def buildNGaussians(self,nGaussians,_recursive=True): for g in range(0,nGaussians): # Define polynominal functions for mean and sigma (in MH) for f in ['dm','sigma']: - k = "%s_g%g"%(f,g) - self.Varlists[k] = ROOT.RooArgList("%s_coeffs"%k) - # Create coeff for polynominal of order MHPolyOrder: y = a+bx+cx^2+... - for po in range(0,self.MHPolyOrder+1): + k = "%s_g%g"%(f,g) + self.Varlists[k] = ROOT.RooArgList("%s_coeffs"%k) + # Create coeff for polynominal of order MHPolyOrder: y = a+bx+cx^2+... + for po in range(0,self.MHPolyOrder+1): # p0 value of sigma is function of g (creates gaussians of increasing width) if(f == "sigma")&(po==0): self.Vars['%s_p%g'%(k,po)] = ROOT.RooRealVar("%s_p%g"%(k,po),"%s_p%g"%(k,po),(g+1)*1.0,pLUT['Gaussian']["%s_p%s"%(f,po)][1],pLUT['Gaussian']["%s_p%s"%(f,po)][2]) - else: + else: self.Vars['%s_p%g'%(k,po)] = ROOT.RooRealVar("%s_p%g"%(k,po),"%s_p%g"%(k,po),pLUT['Gaussian']["%s_p%s"%(f,po)][0],pLUT['Gaussian']["%s_p%s"%(f,po)][1],pLUT['Gaussian']["%s_p%s"%(f,po)][2]) - self.Varlists[k].add( self.Vars['%s_p%g'%(k,po)] ) - # Define polynominal - self.Polynomials[k] = ROOT.RooPolyVar(k,k,self.dMH,self.Varlists[k]) + self.Varlists[k].add( self.Vars['%s_p%g'%(k,po)] ) + # Define polynominal + self.Polynomials[k] = ROOT.RooPolyVar(k,k,self.dMH,self.Varlists[k]) # Mean function self.Polynomials['mean_g%g'%g] = ROOT.RooFormulaVar("mean_g%g"%g,"mean_g%g"%g,"(@0+@1)",ROOT.RooArgList(self.MH,self.Polynomials['dm_g%g'%g])) # Build Gaussian @@ -324,19 +330,19 @@ def buildNGaussians(self,nGaussians,_recursive=True): # Relative fractions: also polynomials of order MHPolyOrder (define up to n=nGaussians-1) if g < nGaussians-1: - self.Varlists['frac_g%g'%g] = ROOT.RooArgList("frac_g%g_coeffs"%g) - for po in range(0,self.MHPolyOrder+1): - if po == 0: - self.Vars['frac_g%g_p%g'%(g,po)] = ROOT.RooRealVar("frac_g%g_p%g"%(g,po),"frac_g%g_p%g"%(g,po),0.5-0.05*g,pLUT['FracGaussian']['p%g'%po][1],pLUT['FracGaussian']['p%g'%po][2]) - else: - self.Vars['frac_g%g_p%g'%(g,po)] = ROOT.RooRealVar("frac_g%g_p%g"%(g,po),"frac_g%g_p%g"%(g,po),pLUT['FracGaussian']['p%g'%po][0],pLUT['FracGaussian']['p%g'%po][1],pLUT['FracGaussian']['p%g'%po][2]) - self.Varlists['frac_g%g'%g].add( self.Vars['frac_g%g_p%g'%(g,po)] ) - # Define Polynomial - self.Polynomials['frac_g%g'%g] = ROOT.RooPolyVar("frac_g%g"%g,"frac_g%g"%g,self.dMH,self.Varlists['frac_g%g'%g]) - # Constrain fraction to not be above 1 or below 0 - self.Polynomials['frac_g%g_constrained'%g] = ROOT.RooFormulaVar('frac_g%g_constrained'%g,'frac_g%g_constrained'%g,"(@0>0)*(@0<1)*@0+ (@0>1.0)*0.9999",ROOT.RooArgList(self.Polynomials['frac_g%g'%g])) - self.Coeffs['frac_g%g_constrained'%g] = self.Polynomials['frac_g%g_constrained'%g] - # End of loop over n Gaussians + self.Varlists['frac_g%g'%g] = ROOT.RooArgList("frac_g%g_coeffs"%g) + for po in range(0,self.MHPolyOrder+1): + if po == 0: + self.Vars['frac_g%g_p%g'%(g,po)] = ROOT.RooRealVar("frac_g%g_p%g"%(g,po),"frac_g%g_p%g"%(g,po),0.5-0.05*g,pLUT['FracGaussian']['p%g'%po][1],pLUT['FracGaussian']['p%g'%po][2]) + else: + self.Vars['frac_g%g_p%g'%(g,po)] = ROOT.RooRealVar("frac_g%g_p%g"%(g,po),"frac_g%g_p%g"%(g,po),pLUT['FracGaussian']['p%g'%po][0],pLUT['FracGaussian']['p%g'%po][1],pLUT['FracGaussian']['p%g'%po][2]) + self.Varlists['frac_g%g'%g].add( self.Vars['frac_g%g_p%g'%(g,po)] ) + # Define Polynomial + self.Polynomials['frac_g%g'%g] = ROOT.RooPolyVar("frac_g%g"%g,"frac_g%g"%g,self.dMH,self.Varlists['frac_g%g'%g]) + # Constrain fraction to not be above 1 or below 0 + self.Polynomials['frac_g%g_constrained'%g] = ROOT.RooFormulaVar('frac_g%g_constrained'%g,'frac_g%g_constrained'%g,"(@0>0)*(@0<1)*@0+ (@0>1.0)*0.9999",ROOT.RooArgList(self.Polynomials['frac_g%g'%g])) + self.Coeffs['frac_g%g_constrained'%g] = self.Polynomials['frac_g%g_constrained'%g] + # End of loop over n Gaussians # Define total PDF _pdfs, _coeffs = ROOT.RooArgList(), ROOT.RooArgList() @@ -353,9 +359,9 @@ def runFit(self): fv.remove(self.xvar) fv.remove(self.constOne) self.FitParameters = ROOT.RooArgList(fv) - + # print(self.FitParameters) # Create initial vector of parameters and calculate initial Chi2 - if self.verbose: print("\n --> (%s) Initialising fit parameters"%self.name) + if self.verbose: print "\n --> (%s) Initialising fit parameters"%self.name x0 = self.extractX0() xbounds = self.extractXBounds() self.Chi2 = self.getChi2() @@ -363,7 +369,7 @@ def runFit(self): if self.verbose: self.printFitParameters(title="Pre-fit") # Run fit - if self.verbose: print((" --> (%s) Running fit"%self.name)) + if self.verbose: print " --> (%s) Running fit"%self.name self.FitResult = minimize(nChi2Addition,x0,args=self,bounds=xbounds,method=self.minimizerMethod) self.Chi2 = self.getChi2() #self.Chi2 = nChi2Addition(self.FitResult['x'],self) @@ -375,7 +381,7 @@ def runFit(self): def buildSplines(self): # Loop over polynomials - for k, poly in self.Polynomials.items(): + for k, poly in self.Polynomials.iteritems(): _x, _y = [], [] _mh = 100. while(_mh<180.1): @@ -389,23 +395,23 @@ def buildSplines(self): self.Splines[k] = ROOT.RooSpline1D(poly.GetName(),poly.GetName(),self.MH,len(_x),arr_x,arr_y) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - # Function to print(fit values) + # Function to print fit values def printFitParameters(self,title="Fit"): - print((" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")) - print((" --> (%s) %s parameter values:"%(self.name,title))) + print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + print " --> (%s) %s parameter values:"%(self.name,title) # Skip MH - for i in range(1,len(self.FitParameters)): print((" * %-20s = %.6f"%(self.FitParameters[i].GetName(),self.FitParameters[i].getVal()))) - print((" ~~~~~~~~~~~~~~~~")) - print((" * chi2 = %.6f, n(dof) = %g --> chi2/n(dof) = %.3f"%(self.getChi2(),int(self.Ndof),self.getChi2()/int(self.Ndof)))) - print((" ~~~~~~~~~~~~~~~~")) - print((" * [VERBOSE] chi2 = %.6f"%(self.getChi2(verbose=True)))) - print((" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")) + for i in range(1,len(self.FitParameters)): print " * %-20s = %.6f"%(self.FitParameters[i].GetName(),self.FitParameters[i].getVal()) + print " ~~~~~~~~~~~~~~~~" + print " * chi2 = %.6f, n(dof) = %g --> chi2/n(dof) = %.3f"%(self.getChi2(),int(self.Ndof),self.getChi2()/int(self.Ndof)) + print " ~~~~~~~~~~~~~~~~" + print " * [VERBOSE] chi2 = %.6f"%(self.getChi2(verbose=True)) + print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Function to set vars from json file def setVars(self,_json): with open(_json,"r") as jf: _vals = json.load(jf) - for k,v in _vals.items(): self.Vars[k].setVal(v) + for k,v in _vals.iteritems(): self.Vars[k].setVal(v) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Function to re-calculate chi2 after setting vars diff --git a/Signal/tools/submissionTools.py b/Signal/tools/submissionTools.py index b5a51611..b761a068 100644 --- a/Signal/tools/submissionTools.py +++ b/Signal/tools/submissionTools.py @@ -4,7 +4,7 @@ from commonObjects import * def run(cmd): - print("%s\n\n"%cmd) + print "%s\n\n"%cmd os.system(cmd) def writePreamble(_file): @@ -62,7 +62,7 @@ def writeSubFiles(_opts): pcidx = pidx*_opts['nCats']+cidx p,c = _opts['procs'].split(",")[pidx], _opts['cats'].split(",")[cidx] _f.write("if [ $1 -eq %g ]; then\n"%pcidx) - _f.write(" python3 %s/scripts/signalFit.py --inputWSDir %s --ext %s --proc %s --cat %s --year %s --analysis %s --massPoints %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],p,c,_opts['year'],_opts['analysis'],_opts['massPoints'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) + _f.write(" python %s/scripts/signalFit.py --inputWSDir %s --ext %s --proc %s --cat %s --year %s --analysis %s --massPoints %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],p,c,_opts['year'],_opts['analysis'],_opts['massPoints'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) _f.write("fi\n") # For looping over categories @@ -72,35 +72,35 @@ def writeSubFiles(_opts): _f.write("if [ $1 -eq %g ]; then\n"%cidx) for pidx in range(_opts['nProcs']): p = _opts['procs'].split(",")[pidx] - _f.write(" python3 %s/scripts/signalFit.py --inputWSDir %s --ext %s --proc %s --cat %s --year %s --analysis %s --massPoints %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],p,c,_opts['year'],_opts['analysis'],_opts['massPoints'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) + _f.write(" python %s/scripts/signalFit.py --inputWSDir %s --ext %s --proc %s --cat %s --year %s --analysis %s --massPoints %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],p,c,_opts['year'],_opts['analysis'],_opts['massPoints'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) _f.write("fi\n") elif _opts['mode'] == "calcPhotonSyst": for cidx in range(_opts['nCats']): c = _opts['cats'].split(",")[cidx] _f.write("if [ $1 -eq %g ]; then\n"%cidx) - _f.write(" python3 %s/scripts/calcPhotonSyst.py --cat %s --procs %s --ext %s --inputWSDir %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,c,_opts['procs'],_opts['ext'],_opts['inputWSDir'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) + _f.write(" python %s/scripts/calcPhotonSyst.py --cat %s --procs %s --ext %s --inputWSDir %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,c,_opts['procs'],_opts['ext'],_opts['inputWSDir'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) _f.write("fi\n") elif _opts['mode'] == "fTest": for cidx in range(_opts['nCats']): c = _opts['cats'].split(",")[cidx] _f.write("if [ $1 -eq %g ]; then\n"%cidx) - _f.write(" python3 %s/scripts/fTest.py --cat %s --procs %s --ext %s --inputWSDir %s %s\n"%(swd__,c,_opts['procs'],_opts['ext'],_opts['inputWSDir'],_opts['modeOpts'])) + _f.write(" python %s/scripts/fTest.py --cat %s --procs %s --ext %s --inputWSDir %s %s\n"%(swd__,c,_opts['procs'],_opts['ext'],_opts['inputWSDir'],_opts['modeOpts'])) _f.write("fi\n") elif _opts['mode'] == "packageSignal": for cidx in range(_opts['nCats']): c = _opts['cats'].split(",")[cidx] _f.write("if [ $1 -eq %g ]; then\n"%cidx) - _f.write(" python3 %s/scripts/packageSignal.py --cat %s --outputExt %s --massPoints %s %s\n"%(swd__,c,_opts['ext'],_opts['massPoints'],_opts['modeOpts'])) + _f.write(" python %s/scripts/packageSignal.py --cat %s --outputExt %s --massPoints %s %s\n"%(swd__,c,_opts['ext'],_opts['massPoints'],_opts['modeOpts'])) _f.write("fi\n") # For single script elif _opts['mode'] == 'getEffAcc': - _f.write("python3 %s/scripts/getEffAcc.py --inputWSDir %s --ext %s --procs %s --massPoints %s %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],_opts['procs'],_opts['massPoints'],_opts['modeOpts'])) + _f.write("python %s/scripts/getEffAcc.py --inputWSDir %s --ext %s --procs %s --massPoints %s %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],_opts['procs'],_opts['massPoints'],_opts['modeOpts'])) elif _opts['mode'] == 'getDiagProc': - _f.write("python3 %s/scripts/getDiagProc.py --inputWSDir %s --ext %s %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],_opts['modeOpts'])) + _f.write("python %s/scripts/getDiagProc.py --inputWSDir %s --ext %s %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],_opts['modeOpts'])) # Close .sh file _f.close() @@ -129,7 +129,7 @@ def writeSubFiles(_opts): p,c = _opts['procs'].split(",")[pidx], _opts['cats'].split(",")[cidx] _f = open("%s/%s_%g.sh"%(_jobdir,_executable,pcidx),"w") writePreamble(_f) - _f.write("python3 %s/scripts/signalFit.py --inputWSDir %s --ext %s --proc %s --cat %s --year %s --analysis %s --massPoints %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],p,c,_opts['year'],_opts['analysis'],_opts['massPoints'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) + _f.write("python %s/scripts/signalFit.py --inputWSDir %s --ext %s --proc %s --cat %s --year %s --analysis %s --massPoints %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],p,c,_opts['year'],_opts['analysis'],_opts['massPoints'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) _f.close() os.system("chmod 775 %s/%s_%g.sh"%(_jobdir,_executable,pcidx)) @@ -141,7 +141,7 @@ def writeSubFiles(_opts): writePreamble(_f) for pidx in range(_opts['nProcs']): p = _opts['procs'].split(",")[pidx] - _f.write("python3 %s/scripts/signalFit.py --inputWSDir %s --ext %s --proc %s --cat %s --year %s --analysis %s --massPoints %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n\n"%(swd__,_opts['inputWSDir'],_opts['ext'],p,c,_opts['year'],_opts['analysis'],_opts['massPoints'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) + _f.write("python %s/scripts/signalFit.py --inputWSDir %s --ext %s --proc %s --cat %s --year %s --analysis %s --massPoints %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n\n"%(swd__,_opts['inputWSDir'],_opts['ext'],p,c,_opts['year'],_opts['analysis'],_opts['massPoints'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) _f.close() os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) @@ -150,7 +150,7 @@ def writeSubFiles(_opts): c = _opts['cats'].split(",")[cidx] _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") writePreamble(_f) - _f.write("python3 %s/scripts/calcPhotonSyst.py --cat %s --procs %s --ext %s --inputWSDir %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,c,_opts['procs'],_opts['ext'],_opts['inputWSDir'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) + _f.write("python %s/scripts/calcPhotonSyst.py --cat %s --procs %s --ext %s --inputWSDir %s --scales \'%s\' --scalesCorr \'%s\' --scalesGlobal \'%s\' --smears \'%s\' %s\n"%(swd__,c,_opts['procs'],_opts['ext'],_opts['inputWSDir'],_opts['scales'],_opts['scalesCorr'],_opts['scalesGlobal'],_opts['smears'],_opts['modeOpts'])) _f.close() os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) @@ -159,7 +159,7 @@ def writeSubFiles(_opts): c = _opts['cats'].split(",")[cidx] _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") writePreamble(_f) - _f.write("python3 %s/scripts/fTest.py --cat %s --procs %s --ext %s --inputWSDir %s %s\n"%(swd__,c,_opts['procs'],_opts['ext'],_opts['inputWSDir'],_opts['modeOpts'])) + _f.write("python %s/scripts/fTest.py --cat %s --procs %s --ext %s --inputWSDir %s %s\n"%(swd__,c,_opts['procs'],_opts['ext'],_opts['inputWSDir'],_opts['modeOpts'])) _f.close() os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) @@ -168,7 +168,7 @@ def writeSubFiles(_opts): c = _opts['cats'].split(",")[cidx] _f = open("%s/%s_%s.sh"%(_jobdir,_executable,c),"w") writePreamble(_f) - _f.write("python3 %s/scripts/packageSignal.py --cat %s --outputExt %s --massPoints %s %s\n"%(swd__,c,_opts['ext'],_opts['massPoints'],_opts['modeOpts'])) + _f.write("python %s/scripts/packageSignal.py --cat %s --outputExt %s --massPoints %s %s\n"%(swd__,c,_opts['ext'],_opts['massPoints'],_opts['modeOpts'])) _f.close() os.system("chmod 775 %s/%s_%s.sh"%(_jobdir,_executable,c)) @@ -176,13 +176,13 @@ def writeSubFiles(_opts): elif _opts['mode'] == "getEffAcc": _f = open("%s/%s.sh"%(_jobdir,_executable),"w") writePreamble(_f) - _f.write("python3 %s/scripts/getEffAcc.py --inputWSDir %s --ext %s --procs %s --massPoints %s %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],_opts['procs'],_opts['massPoints'],_opts['modeOpts'])) + _f.write("python %s/scripts/getEffAcc.py --inputWSDir %s --ext %s --procs %s --massPoints %s %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],_opts['procs'],_opts['massPoints'],_opts['modeOpts'])) _f.close() os.system("chmod 775 %s/%s.sh"%(_jobdir,_executable)) elif _opts['mode'] == "getDiagProc": _f = open("%s/%s.sh"%(_jobdir,_executable),"w") writePreamble(_f) - _f.write("python3 %s/scripts/getDiagProc.py --inputWSDir %s --ext %s %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],_opts['modeOpts'])) + _f.write("python %s/scripts/getDiagProc.py --inputWSDir %s --ext %s %s\n"%(swd__,_opts['inputWSDir'],_opts['ext'],_opts['modeOpts'])) _f.close() os.system("chmod 775 %s/%s.sh"%(_jobdir,_executable)) @@ -196,7 +196,7 @@ def submitFiles(_opts): _executable = "condor_%s_%s"%(_opts['mode'],_opts['ext']) cmdLine = "cd %s; condor_submit %s.sub; cd %s"%(_jobdir,_executable,swd__) run(cmdLine) - print(" --> Finished submitting files") + print " --> Finished submitting files" # SGE elif _opts['batch'] in ['IC','SGE']: @@ -225,7 +225,7 @@ def submitFiles(_opts): _subfile = "%s/%s"%(_jobdir,_executable) cmdLine = "qsub -q hep.q %s -o %s.log -e %s.err %s.sh"%(jobOptsStr,_subfile,_subfile,_subfile) run(cmdLine) - print(" --> Finished submitting files") + print " --> Finished submitting files" # Running locally elif _opts['batch'] == 'local': @@ -250,6 +250,6 @@ def submitFiles(_opts): _subfile = "%s/%s"%(_jobdir,_executable) cmdLine = "bash %s.sh"%_subfile run(cmdLine) - print(" --> Finished running files") + print " --> Finished running files" diff --git a/Trees2WS/RunWSScripts.py b/Trees2WS/RunWSScripts.py index f48d2777..735a4fa3 100644 --- a/Trees2WS/RunWSScripts.py +++ b/Trees2WS/RunWSScripts.py @@ -29,13 +29,13 @@ def get_options(): parser.add_option('--batch', dest='batch', default='IC', help='Batch') parser.add_option('--queue', dest='queue', default='hep.q', help='Queue: can take a while if including all systematics for many categories') parser.add_option('--jobOpts', dest='jobOpts', default='', help="Additional options to add to job submission. For Condor separate individual options with a colon (specify all within quotes e.g. \"option_xyz = abc+option_123 = 456\")") - parser.add_option('--printOnly', dest='printOnly', default=False, action="store_true", help="Dry run: print(submission files only")) + parser.add_option('--printOnly', dest='printOnly', default=False, action="store_true", help="Dry run: print submission files only") return parser.parse_args() (opt,args) = get_options() -print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUNNING WS SCRIPTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") +print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUNNING WS SCRIPTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" def leave(): - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUNNING WS SCRIPTS (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUNNING WS SCRIPTS (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" sys.exit(1) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -58,24 +58,24 @@ def leave(): # Check if mode in allowed options if options['mode'] not in ['trees2ws','trees2ws_data','haddMC','haddData','mass_shift']: - print(" --> [ERROR] mode %s not allowed. Please use one of the following: ['trees2ws','trees2ws_data','haddMC','haddData','mass_shift']. Leaving..."%options['mode']) + print " --> [ERROR] mode %s not allowed. Please use one of the following: ['trees2ws','trees2ws_data','haddMC','haddData','mass_shift']. Leaving..."%options['mode'] leave() -# print(info to user) -print(" --> Input directory: %s"%options['inputDir']) -print(" --> Year: %s"%(options['year'])) +# print info to user +print " --> Input directory: %s"%options['inputDir'] +print " --> Year: %s"%(options['year']) if options['mode'] in ['trees2ws','trees2ws_data']: - print(" --> Input config: %s"%options['inputConfig']) + print " --> Input config: %s"%options['inputConfig'] if options['mode'] in ['haddMC']: - print(" --> flashgg path: %s"%options['flashggPath']) - print(" --> Output RooWorkspace directory: %s"%options['outputWSDir']) + print " --> flashgg path: %s"%options['flashggPath'] + print " --> Output RooWorkspace directory: %s"%options['outputWSDir'] -if options['mode'] == "trees2ws": print(" --> Converting ROOT Trees to FinalFits compatible RooWorkspace for MC...") -elif options['mode'] == "trees2ws_data": print(" --> Converting ROOT Trees to FinalFits compatible RooWorkspace for data...") -elif options['mode'] == "haddMC": print(" --> Hadd MC workspaces...") -elif options['mode'] == "haddData": print(" --> Hadd data workspaces...") -elif options['mode'] == "mass_shift": print(" --> Ad-hoc shifting of mass in RooWorkspaces...") -print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") +if options['mode'] == "trees2ws": print " --> Converting ROOT Trees to FinalFits compatible RooWorkspace for MC..." +elif options['mode'] == "trees2ws_data": print " --> Converting ROOT Trees to FinalFits compatible RooWorkspace for data..." +elif options['mode'] == "haddMC": print " --> Hadd MC workspaces..." +elif options['mode'] == "haddData": print " --> Hadd data workspaces..." +elif options['mode'] == "mass_shift": print " --> Ad-hoc shifting of mass in RooWorkspaces..." +print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Make directory to store job scripts and output @@ -83,13 +83,13 @@ def leave(): # Write submission files: style depends on batch system writeSubFiles(options) -print(" --> Finished writing submission scripts") +print " --> Finished writing submission scripts" # Submit scripts to batch system if not options['printOnly']: submitFiles(options) else: - print(" --> Running with printOnly option. Will not submit scripts") + print " --> Running with printOnly option. Will not submit scripts" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ leave() diff --git a/Trees2WS/config_sys.py b/Trees2WS/config_sys.py index 883949c2..395b5c7e 100644 --- a/Trees2WS/config_sys.py +++ b/Trees2WS/config_sys.py @@ -14,9 +14,13 @@ 'theoryWeightContainers':{}, # Theory weights to add to nominal + NOTAG RooDatasets, value corresponds to number of weights (0-N) # List of systematics: use string YEAR for year-dependent systematics - 'systematics':['fnuf', 'JER', 'JES', 'material', 'scale', 'smear','FJER','FJES'], + 'systematics':['fnuf', 'material', 'scale', 'smear','FJER','FJES'], # Analysis categories: python list of cats or use 'auto' to extract from input tree 'cats':'auto' } + +# ,'FJESAbsoluteMPFBias','FJESAbsoluteScale','FJESAbsoluteStat','FJESFlavorQCD','FJESFragmentation','FJESPileUpDataMC','FJESPileUpPtBB','FJESPileUpPtEC1','FJESPileUpPtEC2','FJESPileUpPtHF', +# 'FJESPileUpPtRef','FJESRelativeBal','FJESRelativeFSR','FJESRelativeJEREC1','FJESRelativeJEREC2','FJESRelativeJERHF','FJESRelativePtBB','FJESRelativePtEC1','FJESRelativePtEC2','FJESRelativePtHF','FJESRelativeSample','FJESRelativeStatEC', +# 'FJESRelativeStatFSR','FJESRelativeStatHF','FJESSinglePionECAL','FJESSinglePionHCAL','FJESTimePtEta' \ No newline at end of file diff --git a/Trees2WS/config_sys_boost.py b/Trees2WS/config_sys_boost.py new file mode 100644 index 00000000..df488cf0 --- /dev/null +++ b/Trees2WS/config_sys_boost.py @@ -0,0 +1,25 @@ +# Input config file for running trees2ws + +trees2wsCfg = { + + # Name of RooDirectory storing input tree + 'inputTreeDir':'', + + # Variables to be added to dataframe: use wildcard * for common strings + 'mainVars':["CMS_hgg_mass","fathbbjet_mass","weight","dZ","*sigma"], # Vars to add to nominal RooDatasets + 'dataVars':["CMS_hgg_mass","fathbbjet_mass","weight"], # Vars for data workspace (trees2ws_data.py script) + 'stxsVar':'', # Var for STXS splitting: if using option doSTXSSplitting + 'notagVars':["weight","*sigma"], # Vars to add to NOTAG RooDataset + 'systematicsVars':["CMS_hgg_mass","fathbbjet_mass","weight"], # Variables to add to sytematic RooDataHists + 'theoryWeightContainers':{}, # Theory weights to add to nominal + NOTAG RooDatasets, value corresponds to number of weights (0-N) + + # List of systematics: use string YEAR for year-dependent systematics + 'systematics':['fnuf', 'material', 'scale', 'smear','FJER','FJES'], + + # Analysis categories: python list of cats or use 'auto' to extract from input tree + 'cats':'auto' + +} +# ,'FJESAbsoluteMPFBias','FJESAbsoluteScale','FJESAbsoluteStat','FJESFlavorQCD','FJESFragmentation','FJESPileUpDataMC','FJESPileUpPtBB','FJESPileUpPtEC1','FJESPileUpPtEC2','FJESPileUpPtHF', +# 'FJESPileUpPtRef','FJESRelativeBal','FJESRelativeFSR','FJESRelativeJEREC1','FJESRelativeJEREC2','FJESRelativeJERHF','FJESRelativePtBB','FJESRelativePtEC1','FJESRelativePtEC2','FJESRelativePtHF','FJESRelativeSample','FJESRelativeStatEC', +# 'FJESRelativeStatFSR','FJESRelativeStatHF','FJESSinglePionECAL','FJESSinglePionHCAL','FJESTimePtEta' \ No newline at end of file diff --git a/Trees2WS/config_sys_res.py b/Trees2WS/config_sys_res.py new file mode 100644 index 00000000..f2568c40 --- /dev/null +++ b/Trees2WS/config_sys_res.py @@ -0,0 +1,25 @@ +# Input config file for running trees2ws + +trees2wsCfg = { + + # Name of RooDirectory storing input tree + 'inputTreeDir':'', + + # Variables to be added to dataframe: use wildcard * for common strings + 'mainVars':["CMS_hgg_mass","Dijet_mass","weight","dZ","*sigma"], # Vars to add to nominal RooDatasets + 'dataVars':["CMS_hgg_mass","Dijet_mass","weight"], # Vars for data workspace (trees2ws_data.py script) + 'stxsVar':'', # Var for STXS splitting: if using option doSTXSSplitting + 'notagVars':["weight","*sigma"], # Vars to add to NOTAG RooDataset + 'systematicsVars':["CMS_hgg_mass","Dijet_mass","weight"], # Variables to add to sytematic RooDataHists + 'theoryWeightContainers':{}, # Theory weights to add to nominal + NOTAG RooDatasets, value corresponds to number of weights (0-N) + + # List of systematics: use string YEAR for year-dependent systematics + 'systematics':['fnuf', 'material', 'scale', 'smear','JER','JES'], + + # Analysis categories: python list of cats or use 'auto' to extract from input tree + 'cats':'auto' + +} +# ,'JESAbsoluteMPFBias','JESAbsoluteScale','JESAbsoluteStat','JESFlavorQCD','JESFragmentation','JESPileUpDataMC','JESPileUpPtBB','JESPileUpPtEC1','JESPileUpPtEC2','JESPileUpPtHF', +# 'JESPileUpPtRef','JESRelativeBal','JESRelativeFSR','JESRelativeJEREC1','JESRelativeJEREC2','JESRelativeJERHF','JESRelativePtBB','JESRelativePtEC1','JESRelativePtEC2','JESRelativePtHF','JESRelativeSample','JESRelativeStatEC', +# 'JESRelativeStatFSR','JESRelativeStatHF','JESSinglePionECAL','JESSinglePionHCAL','JESTimePtEta' \ No newline at end of file diff --git a/Trees2WS/makedir.sh b/Trees2WS/makedir.sh new file mode 100644 index 00000000..cf9e7d66 --- /dev/null +++ b/Trees2WS/makedir.sh @@ -0,0 +1,5 @@ +for i in {100,1000,1100,1200,125,1300,1400,150,1600,170,1800,190,2000,2200,2400,250,2500,2600,2800,300,350,400,450,500,550,60,600,650,70,700,80,800,900} +do + mkdir -r res_$i + mkdir -r boost_$i +done \ No newline at end of file diff --git a/Trees2WS/mass_shifter.py b/Trees2WS/mass_shifter.py index 49681922..72124280 100644 --- a/Trees2WS/mass_shifter.py +++ b/Trees2WS/mass_shifter.py @@ -18,11 +18,11 @@ xvar = opt.xvar if not os.path.exists( opt.inputWSFile ): - print(" --> [ERROR] input file %s does not exist. Leaving..."%(opt.inputWSFile)) + print " --> [ERROR] input file %s does not exist. Leaving..."%(opt.inputWSFile) sys.exit(1) if str(opt.inputMass) not in opt.inputWSFile: - print(" --> [ERROR] input file %s does not correspond to input mass (%s). Leaving..."%(opt.inputWSFile,str(opt.inputMass))) + print " --> [ERROR] input file %s does not correspond to input mass (%s). Leaving..."%(opt.inputWSFile,str(opt.inputMass)) # Calculate shift shift = float(opt.inputMass-opt.targetMass) @@ -56,7 +56,7 @@ n_components = n_orig.split("_%s_"%sqrts__) n_shift = re.sub(str(opt.inputMass),str(opt.targetMass),n_components[0])+"_%s_"%sqrts__+n_components[-1] - if verbose: print("%s --> %s"%(n_orig,n_shift)) + if verbose: print "%s --> %s"%(n_orig,n_shift) # Create an empty clone of original dataset shifted_datasets[n_shift] = d_orig.emptyClone( n_shift ) diff --git a/Trees2WS/run_data.sh b/Trees2WS/run_data.sh new file mode 100644 index 00000000..d3852cb8 --- /dev/null +++ b/Trees2WS/run_data.sh @@ -0,0 +1,9 @@ +# python trees2ws_data.py --inputConfig config_simple.py --inputTreeFile /eos/user/z/zhjie/output_loose/data/root/data_cat0.root +# python trees2ws_data.py --inputConfig config_simple.py --inputTreeFile /eos/user/z/zhjie/output_loose/data/root/data_cat1.root +# python trees2ws_data.py --inputConfig config_simple.py --inputTreeFile /eos/user/z/zhjie/output_loose/data/root/data_cat2.root +# python trees2ws_data.py --inputConfig config_simple.py --inputTreeFile /eos/user/z/zhjie/output_loose/data/root/data_cat3.root +# python trees2ws_data.py --inputConfig config_simple.py --inputTreeFile /eos/user/z/zhjie/output_loose/data/data_cat3.root +# python trees2ws_data.py --inputConfig config_simple.py --inputTreeFile /eos/user/z/zhjie/output_loose/data/data_cat4.root +python trees2ws_data.py --inputConfig config_simple.py --inputTreeFile /eos/user/z/zhjie/ForJieZhang/cut/data/flashgg_boost/data_cat0.root +python trees2ws_data.py --inputConfig config_simple.py --inputTreeFile /eos/user/z/zhjie/ForJieZhang/cut/data/flashgg_boost/data_cat1.root +python trees2ws_data.py --inputConfig config_simple.py --inputTreeFile /eos/user/z/zhjie/ForJieZhang/cut/data/flashgg_boost/data_cat2.root diff --git a/Trees2WS/run_dnn.sh b/Trees2WS/run_dnn.sh new file mode 100644 index 00000000..1438bad2 --- /dev/null +++ b/Trees2WS/run_dnn.sh @@ -0,0 +1,3 @@ + + +python trees2ws.py --inputConfig config_test.py --inputTreeFile /eos/user/z/zhjie/allsys/merged_all.root --inputMass 125 --productionMode gghh --year 2017 --doSystematics diff --git a/Trees2WS/run_res.sh b/Trees2WS/run_res.sh new file mode 100644 index 00000000..02d36b45 --- /dev/null +++ b/Trees2WS/run_res.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# for i in {100,1000,1100,1200,125,1300,1400,150,1600,170,1800,190,2000,2200,2400,250,2500,2600,2800,300,350,400,450,500,550,60,600,650,70,700,80,800,900} +# for i in {800,700} +# do + folder='/eos/user/z/zhjie/ForJieZhang/cut/signal/flashgg_boost' + for file in $folder/*.root + do + python trees2ws.py --inputConfig config_simple.py --inputTreeFile $file --inputMass 125 --productionMode gghh --year 2017 + done +done/ \ No newline at end of file diff --git a/Trees2WS/run_y.sh b/Trees2WS/run_y.sh new file mode 100644 index 00000000..561c796e --- /dev/null +++ b/Trees2WS/run_y.sh @@ -0,0 +1,65 @@ + +python trees2ws.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2017/root/signal-MX-650-MY-90_2017_cat0.root --inputMass 125 --productionMode gghh --year 2017 +python trees2ws.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2018/root/signal-MX-650-MY-90_2018_cat0.root --inputMass 125 --productionMode gghh --year 2018 +python trees2ws.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2016pre/root/signal-MX-650-MY-90_2016pre_cat0.root --inputMass 125 --productionMode gghh --year 2016pre +python trees2ws.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2016post/root/signal-MX-650-MY-90_2016post_cat0.root --inputMass 125 --productionMode gghh --year 2016post + + +python trees2ws.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2017/root/signal-MX-650-MY-90_2017_cat1.root --inputMass 125 --productionMode gghh --year 2017 +python trees2ws.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2018/root/signal-MX-650-MY-90_2018_cat1.root --inputMass 125 --productionMode gghh --year 2018 +python trees2ws.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2016pre/root/signal-MX-650-MY-90_2016pre_cat1.root --inputMass 125 --productionMode gghh --year 2016pre +python trees2ws.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2016post/root/signal-MX-650-MY-90_2016post_cat1.root --inputMass 125 --productionMode gghh --year 2016post + + +python trees2ws.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2017/root/signal-MX-650-MY-90_2017_cat2.root --inputMass 125 --productionMode gghh --year 2017 +python trees2ws.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2018/root/signal-MX-650-MY-90_2018_cat2.root --inputMass 125 --productionMode gghh --year 2018 +python trees2ws.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2016pre/root/signal-MX-650-MY-90_2016pre_cat2.root --inputMass 125 --productionMode gghh --year 2016pre +python trees2ws.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2016post/root/signal-MX-650-MY-90_2016post_cat2.root --inputMass 125 --productionMode gghh --year 2016post + + + +python trees2ws_data.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/data/root/data_cat0.root +python trees2ws_data.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/data/root/data_cat1.root +python trees2ws_data.py --inputConfig config_simple.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/data/root/data_cat2.root + +cd /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2017/root +mkdir ws_gghh_cat0 +mkdir ws_gghh_cat1 +mkdir ws_gghh_cat2 +mv ws_gghh/*cat0*.root ws_gghh_cat0/output_NMSSM_XToYHTo2B2G_MX-650_MY-90_cat0_2017_M125_pythia8_gghh.root +mv ws_gghh/*cat1*.root ws_gghh_cat1/output_NMSSM_XToYHTo2B2G_MX-650_MY-90_cat1_2017_M125_pythia8_gghh.root +mv ws_gghh/*cat2*.root ws_gghh_cat2/output_NMSSM_XToYHTo2B2G_MX-650_MY-90_cat2_2017_M125_pythia8_gghh.root + +cd /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2018/root +mkdir ws_gghh_cat0 +mkdir ws_gghh_cat1 +mkdir ws_gghh_cat2 +mv ws_gghh/*cat0*.root ws_gghh_cat0/output_NMSSM_XToYHTo2B2G_MX-650_MY-90_cat0_2018_M125_pythia8_gghh.root +mv ws_gghh/*cat1*.root ws_gghh_cat1/output_NMSSM_XToYHTo2B2G_MX-650_MY-90_cat1_2018_M125_pythia8_gghh.root +mv ws_gghh/*cat2*.root ws_gghh_cat2/output_NMSSM_XToYHTo2B2G_MX-650_MY-90_cat2_2018_M125_pythia8_gghh.root + +cd /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2016pre/root +mkdir ws_gghh_cat0 +mkdir ws_gghh_cat1 +mkdir ws_gghh_cat2 +mv ws_gghh/*cat0*.root ws_gghh_cat0/output_NMSSM_XToYHTo2B2G_MX-650_MY-90_cat0_2016pre_M125_pythia8_gghh.root +mv ws_gghh/*cat1*.root ws_gghh_cat1/output_NMSSM_XToYHTo2B2G_MX-650_MY-90_cat1_2016pre_M125_pythia8_gghh.root +mv ws_gghh/*cat2*.root ws_gghh_cat2/output_NMSSM_XToYHTo2B2G_MX-650_MY-90_cat2_2016pre_M125_pythia8_gghh.root + + +cd /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/2016post/root +mkdir ws_gghh_cat0 +mkdir ws_gghh_cat1 +mkdir ws_gghh_cat2 +mv ws_gghh/*cat0*.root ws_gghh_cat0/output_NMSSM_XToYHTo2B2G_MX-650_MY-90_cat0_2016post_M125_pythia8_gghh.root +mv ws_gghh/*cat1*.root ws_gghh_cat1/output_NMSSM_XToYHTo2B2G_MX-650_MY-90_cat1_2016post_M125_pythia8_gghh.root +mv ws_gghh/*cat2*.root ws_gghh_cat2/output_NMSSM_XToYHTo2B2G_MX-650_MY-90_cat2_2016post_M125_pythia8_gghh.root + + +cd /eos/cms/store/group/phys_higgs/cmshgg/zhjie/check_650_90/res/data/root +mkdir ws_cat0 +mkdir ws_cat1 +mkdir ws_cat2 +mv ws/*cat0*.root ws_cat0/allData.root +mv ws/*cat1*.root ws_cat1/allData.root +mv ws/*cat2*.root ws_cat2/allData.root \ No newline at end of file diff --git a/Trees2WS/test.py b/Trees2WS/test.py new file mode 100644 index 00000000..7ca363ba --- /dev/null +++ b/Trees2WS/test.py @@ -0,0 +1,4 @@ +from root_numpy import array2tree + + +print("ccc") \ No newline at end of file diff --git a/Trees2WS/test.sh b/Trees2WS/test.sh new file mode 100644 index 00000000..14cb846c --- /dev/null +++ b/Trees2WS/test.sh @@ -0,0 +1,7 @@ +python trees2ws.py --inputConfig config_sys.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/output/output_1000/opt/2017/root/addsys/output_NMSSM_XToYHTo2B2G_MX-1000_MY-100_cat0_2017_M125_pythia8_gghh.root --inputMass 125 --productionMode gghh --year 2017 --jetmass 100 --low 70 --high 190 --doSystematics +python trees2ws.py --inputConfig config_sys.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/output/output_1000/opt/2017/root/addsys/output_NMSSM_XToYHTo2B2G_MX-1000_MY-125_cat0_2017_M125_pythia8_gghh.root --inputMass 125 --productionMode gghh --year 2017 --jetmass 125 --low 70 --high 190 --doSystematics +python trees2ws.py --inputConfig config_sys.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/output/output_1000/opt/2017/root/addsys/output_NMSSM_XToYHTo2B2G_MX-1000_MY-150_cat0_2017_M125_pythia8_gghh.root --inputMass 125 --productionMode gghh --year 2017 --jetmass 150 --low 70 --high 190 --doSystematics +python trees2ws.py --inputConfig config_sys.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/output/output_1000/opt/2017/root/addsys/output_NMSSM_XToYHTo2B2G_MX-1000_MY-60_cat0_2017_M125_pythia8_gghh.root --inputMass 125 --productionMode gghh --year 2017 --jetmass 60 --low 70 --high 190 --doSystematics +python trees2ws.py --inputConfig config_sys.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/output/output_1000/opt/2017/root/addsys/output_NMSSM_XToYHTo2B2G_MX-1000_MY-70_cat0_2017_M125_pythia8_gghh.root --inputMass 125 --productionMode gghh --year 2017 --jetmass 70 --low 70 --high 190 --doSystematics +python trees2ws.py --inputConfig config_sys.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/output/output_1000/opt/2017/root/addsys/output_NMSSM_XToYHTo2B2G_MX-1000_MY-80_cat0_2017_M125_pythia8_gghh.root --inputMass 125 --productionMode gghh --year 2017 --jetmass 80 --low 70 --high 190 --doSystematics +python trees2ws.py --inputConfig config_sys.py --inputTreeFile /eos/cms/store/group/phys_higgs/cmshgg/zhjie/output/output_1000/opt/2017/root/addsys/output_NMSSM_XToYHTo2B2G_MX-1000_MY-90_cat0_2017_M125_pythia8_gghh.root --inputMass 125 --productionMode gghh --year 2017 --jetmass 90 --low 70 --high 190 --doSystematics diff --git a/Trees2WS/tools/commonTools.py b/Trees2WS/tools/commonTools.py index ce8c6662..a7f10ed8 100644 --- a/Trees2WS/tools/commonTools.py +++ b/Trees2WS/tools/commonTools.py @@ -16,7 +16,7 @@ def rooiter(x): def extractWSFileNames( _inputWSDir ): if not os.path.isdir(_inputWSDir): - print(" --> [ERROR] No such directory (%s)") + print " --> [ERROR] No such directory (%s)" return False return glob.glob("%s/output_*.root"%_inputWSDir) @@ -81,7 +81,7 @@ def signalFromFileName(_fileName): elif "THW" in _fileName: p = "thw" elif "bbH" in _fileName: p = "bbh" else: - print(" --> [ERROR]: cannot extract production mode from input file name. Please update tools.commonTools.signalFromFileName") + print " --> [ERROR]: cannot extract production mode from input file name. Please update tools.commonTools.signalFromFileName" exit(1) return p,d diff --git a/Trees2WS/tools/submissionTools.py b/Trees2WS/tools/submissionTools.py index 7951f2d6..efd9a833 100644 --- a/Trees2WS/tools/submissionTools.py +++ b/Trees2WS/tools/submissionTools.py @@ -5,7 +5,7 @@ from commonTools import * def run(cmd): - print("%s\n\n"%cmd) + print "%s\n\n"%cmd os.system(cmd) def writePreamble(_file,_otherBase=None): @@ -208,7 +208,7 @@ def submitFiles(_opts): _executable = "condor_%s_%s"%(_opts['mode'],_opts['ext']) cmdLine = "cd %s; condor_submit %s.sub; cd %s"%(_jobdir,_executable,twd__) run(cmdLine) - print(" --> Finished submitting files") + print " --> Finished submitting files" # SGE elif _opts['batch'] in ['IC','SGE']: @@ -243,7 +243,7 @@ def submitFiles(_opts): cmdLine = "qsub -q %s %s -o %s.log -e %s.err %s.sh"%(_opts['queue'],jobOptsStr,_subfile,_subfile,_subfile) run(cmdLine) - print(" --> Finished submitting files") + print " --> Finished submitting files" # Running locally elif _opts['batch'] == 'local': @@ -275,4 +275,4 @@ def submitFiles(_opts): cmdLine = "bash %s.sh"%(_subfile) run(cmdLine) - print(" --> Finished running files") + print " --> Finished running files" diff --git a/Trees2WS/trees2ws.py b/Trees2WS/trees2ws.py index cb113a82..3b5fc62e 100644 --- a/Trees2WS/trees2ws.py +++ b/Trees2WS/trees2ws.py @@ -43,9 +43,9 @@ def get_options(): from tools.commonObjects import * from tools.STXS_tools import * -print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG TREES 2 WS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") +print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG TREES 2 WS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " def leave(): - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG TREES 2 WS (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG TREES 2 WS (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" sys.exit(1) # Function to add vars to workspace @@ -106,10 +106,10 @@ def make_argset(_ws=None,_varNames=None): cats = _cfg['cats'] else: - print("[ERROR] %s config file does not exist. Leaving..."%opt.inputConfig) + print "[ERROR] %s config file does not exist. Leaving..."%opt.inputConfig leave() else: - print("[ERROR] Please specify config file to run from. Leaving..."%opt.inputConfig) + print "[ERROR] Please specify config file to run from. Leaving..."%opt.inputConfig leave() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -147,7 +147,7 @@ def make_argset(_ws=None,_varNames=None): if "sigma" in tn: continue if "NOTAG" in tn: cats.append("NOTAG") if "NOTAG" not in cats: - print(" --> [WARNING] NOTAG tree does not exist in input file. Not including NOTAG") + print " --> [WARNING] NOTAG tree does not exist in input file. Not including NOTAG" # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # 1) Convert tree to pandas dataframe @@ -157,10 +157,10 @@ def make_argset(_ws=None,_varNames=None): # Loop over categories: fill dataframe for cat in cats: - print(" --> Extracting events from category: %s"%cat) + print " --> Extracting events from category: %s"%cat if inputTreeDir == '': treeName = "%s_%s_%s_%s"%(opt.productionMode,opt.inputMass,sqrts__,cat) else: treeName = "%s/%s_%s_%s_%s"%(inputTreeDir,opt.productionMode,opt.inputMass,sqrts__,cat) - print(" * tree: %s"%treeName) + print " * tree: %s"%treeName # Extract tree from uproot t = f[treeName] if len(t) == 0: continue @@ -220,7 +220,7 @@ def make_argset(_ws=None,_varNames=None): if cat == "NOTAG": continue sdf = pandas.DataFrame() for s in systematics: - print(" --> Systematic: %s"%re.sub("YEAR",opt.year,s)) + print " --> Systematic: %s"%re.sub("YEAR",opt.year,s) for direction in ['Up','Down']: streeName = "%s_%s%s01sigma"%(treeName,s,direction) # If year in streeName then replace by year being processed @@ -267,7 +267,7 @@ def make_argset(_ws=None,_varNames=None): outputWSDir = "/".join(opt.inputTreeFile.split("/")[:-1])+"/ws_%s"%stxsBin if not os.path.exists(outputWSDir): os.system("mkdir %s"%outputWSDir) outputWSFile = outputWSDir+"/"+re.sub(".root","_%s.root"%stxsBin,opt.inputTreeFile.split("/")[-1]) - print(" --> Creating output workspace for STXS bin: %s (%s)"%(stxsBin,outputWSFile)) + print " --> Creating output workspace for STXS bin: %s (%s)"%(stxsBin,outputWSFile) else: df = data @@ -277,7 +277,7 @@ def make_argset(_ws=None,_varNames=None): outputWSDir = "/".join(opt.inputTreeFile.split("/")[:-1])+"/ws_%s"%dataToProc(opt.productionMode) if not os.path.exists(outputWSDir): os.system("mkdir %s"%outputWSDir) outputWSFile = outputWSDir+"/"+re.sub(".root","_%s.root"%dataToProc(opt.productionMode),opt.inputTreeFile.split("/")[-1]) - print(" --> Creating output workspace: (%s)"%outputWSFile) + print " --> Creating output workspace: (%s)"%outputWSFile # Open file and initiate workspace fout = ROOT.TFile(outputWSFile,"RECREATE") diff --git a/Trees2WS/trees2ws_data.py b/Trees2WS/trees2ws_data.py index 354f1f5a..3e08268a 100644 --- a/Trees2WS/trees2ws_data.py +++ b/Trees2WS/trees2ws_data.py @@ -17,8 +17,6 @@ def get_options(): return parser.parse_args() (opt,args) = get_options() jetmass=int(opt.jetmass) -high=int(opt.high) -low=int(opt.low) from collections import OrderedDict as od from importlib import import_module @@ -31,9 +29,9 @@ def get_options(): from tools.commonObjects import * -print(" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG TREES 2 WS (DATA) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ") +print " ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG TREES 2 WS (DATA) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ " def leave(): - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG TREES 2 WS (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HGG TREES 2 WS (END) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" sys.exit(1) # Function to add vars to workspace @@ -54,7 +52,7 @@ def add_vars_to_workspace(_ws=None,_dataVars=None,jetmass=125,low=100,high=180): _vars[var] = ROOT.RooRealVar(var,var,0.) elif var == "Dijet_mass": _vars[var] = ROOT.RooRealVar(var,var,jetmass,low,high) - _vars[var].setBins((high-low)/5) + _vars[var].setBins((high-low)/10) else: _vars[var] = ROOT.RooRealVar(var,var,1.,-999999,999999) _vars[var].setBins(1) @@ -82,15 +80,16 @@ def make_argset(_ws=None,_varNames=None): cats = _cfg['cats'] else: - print("[ERROR] %s config file does not exist. Leaving..."%opt.inputConfig) + print "[ERROR] %s config file does not exist. Leaving..."%opt.inputConfig leave() else: - print("[ERROR] Please specify config file to run from. Leaving..."%opt.inputConfig) + print "[ERROR] Please specify config file to run from. Leaving..."%opt.inputConfig leave() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # UPROOT file f = uproot.open(opt.inputTreeFile) + if inputTreeDir == '': listOfTreeNames = f.keys() else: listOfTreeNames = f[inputTreeDir].keys() # If cats = 'auto' then determine from list of trees @@ -102,7 +101,17 @@ def make_argset(_ws=None,_varNames=None): elif "ERROR" in tn: continue c = tn.split("_%s_"%sqrts__)[-1].split(";")[0] cats.append(c) - +if len(cats) >1: + print("check cat") + sys.exit() +for cat in cats: + if inputTreeDir == '': treeName = "Data_%s_%s"%(sqrts__,cat) + else: treeName = "%s/Data_%s_%s"%(inputTreeDir,sqrts__,cat) + tree = f[treeName] + jet_mass = tree.array("Dijet_mass") + max_mass = (int(max(jet_mass))//10)*10+20 + min_mass = (int(min(jet_mass))//10)*10-20 + if min_mass<0:min_mass=0 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Open input ROOT file f = ROOT.TFile(opt.inputTreeFile) @@ -112,24 +121,25 @@ def make_argset(_ws=None,_varNames=None): else: outputWSDir = "/".join(opt.inputTreeFile.split("/")[:-1])+"/ws" if not os.path.exists(outputWSDir): os.system("mkdir %s"%outputWSDir) outputWSFile = outputWSDir+"/"+opt.inputTreeFile.split("/")[-1] -print(" --> Creating output workspace: (%s)"%outputWSFile) +print " --> Creating output workspace: (%s)"%outputWSFile fout = ROOT.TFile(outputWSFile,"RECREATE") foutdir = fout.mkdir(inputWSName__.split("/")[0]) foutdir.cd() ws = ROOT.RooWorkspace(inputWSName__.split("/")[1],inputWSName__.split("/")[1]) # Add variables to workspace -varNames = add_vars_to_workspace(ws,dataVars,jetmass,low,high) +print("check",int(max(jet_mass)),int(min(jet_mass)),min_mass,max_mass) +varNames = add_vars_to_workspace(ws,dataVars,jetmass,min_mass,max_mass) # Make argset aset = make_argset(ws,varNames) # Loop over categories and for cat in cats: - print(" --> Extracting events from category: %s"%cat) + print " --> Extracting events from category: %s"%cat if inputTreeDir == '': treeName = "Data_%s_%s"%(sqrts__,cat) else: treeName = "%s/Data_%s_%s"%(inputTreeDir,sqrts__,cat) - print(" * tree: %s"%treeName) + print " * tree: %s"%treeName t = f.Get(treeName) # Define dataset for cat