Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 68 additions & 8 deletions CombineTools/bin/PostFitShapesFromWorkspace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ int main(int argc, char* argv[]) {
bool skip_prefit = false;
bool skip_proc_errs = false;
bool total_shapes = false;
bool verbose = false;
std::vector<std::string> reverse_bins_;
// Containers to parse processes that are to be merged at runtime
std::vector<std::string> input_merge_procs_;
std::map<std::string, std::string> merged_procs;

po::options_description help_config("Help");
help_config.add_options()
Expand Down Expand Up @@ -103,7 +107,12 @@ int main(int argc, char* argv[]) {
("total-shapes",
po::value<bool>(&total_shapes)->default_value(total_shapes)->implicit_value(true),
"Save signal- and background shapes added for all channels/categories")
("reverse-bins", po::value<vector<string>>(&reverse_bins_)->multitoken(), "List of bins to reverse the order for");
("verbose,v",
po::value<bool>(&verbose)->default_value(verbose)->implicit_value(true),
"Genererate additional output if uncertainties in a given bin are large")
("reverse-bins", po::value<vector<string>>(&reverse_bins_)->multitoken(), "List of bins to reverse the order for")
("merge-procs,p", po::value<vector<string>>(&input_merge_procs_)->multitoken(),
"Merge these processes. Regex expression allowed. Format: NEWPROCESSNAME='expression'");


po::variables_map vm;
Expand Down Expand Up @@ -140,6 +149,8 @@ int main(int argc, char* argv[]) {
// Create CH instance and parse the workspace
ch::CombineHarvester cmb;
cmb.SetFlag("workspaces-use-clone", true);
// Allow regex expressions to combine processes on the fly
cmb.SetFlag("filters-use-regex", true);
ch::ParseCombineWorkspace(cmb, *ws, "ModelConfig", data, false);

// Only evaluate in case parameters to freeze are provided
Expand Down Expand Up @@ -170,6 +181,13 @@ int main(int argc, char* argv[]) {
}
// cmb.GetParameter("r")->set_frozen(true);

// parse processes that are to be merged
for (auto& in: input_merge_procs_){
vector<string> parts;
boost::split(parts, in, boost::is_any_of("="));
merged_procs[parts[0]] = parts[1];
}

ch::CombineHarvester cmb_card;
cmb_card.SetFlag("workspaces-use-clone",true);
if (datacard != "") {
Expand Down Expand Up @@ -248,6 +266,28 @@ int main(int argc, char* argv[]) {
cmb_bin.cp().process({proc}).GetShapeWithUncertainty();
}
}
// Create prefit shapes for merged processes
for (auto iter: merged_procs){
// First element of the iterator is the name of the merged process
auto proc=iter.first;
std::cout << ">> Doing prefit: " << bin << "," << proc << std::endl;
// Second element is the regex expression for the processes
// that are to be merged
auto proc_regex = iter.second;
auto cmb_proc = cmb_bin.cp().process({proc_regex});
// First check for matches
if (cmb_proc.process_set().size() == 0){
std::cout << ">> WARNING: found no processes matching " << proc << std::endl;
continue;
}
if (skip_proc_errs) {
pre_shapes[bin][proc] =
cmb_proc.GetShape();
} else {
pre_shapes[bin][proc] =
cmb_proc.GetShapeWithUncertainty();
}
}

// The fill total signal and total bkg hists
std::cout << ">> Doing prefit: " << bin << "," << "TotalBkg" << std::endl;
Expand Down Expand Up @@ -320,15 +360,15 @@ int main(int argc, char* argv[]) {
std::cout << ">> Doing postfit: TotalBkg" << std::endl;
post_shapes_tot["TotalBkg"] =
no_sampling ? cmb_bkgs.GetShapeWithUncertainty()
: cmb_bkgs.GetShapeWithUncertainty(res, samples);
: cmb_bkgs.GetShapeWithUncertainty(res, samples, verbose);
std::cout << ">> Doing postfit: TotalSig" << std::endl;
post_shapes_tot["TotalSig"] =
no_sampling ? cmb_sigs.GetShapeWithUncertainty()
: cmb_sigs.GetShapeWithUncertainty(res, samples);
: cmb_sigs.GetShapeWithUncertainty(res, samples, verbose);
std::cout << ">> Doing postfit: TotalProcs" << std::endl;
post_shapes_tot["TotalProcs"] =
no_sampling ? cmb.cp().GetShapeWithUncertainty()
: cmb.cp().GetShapeWithUncertainty(res, samples);
: cmb.cp().GetShapeWithUncertainty(res, samples, verbose);

if (datacard != "") {
TH1F ref = cmb_card.cp().GetObservedShape();
Expand Down Expand Up @@ -359,9 +399,29 @@ int main(int argc, char* argv[]) {
} else {
post_shapes[bin][proc] =
no_sampling ? cmb_proc.GetShapeWithUncertainty()
: cmb_proc.GetShapeWithUncertainty(res, samples);
: cmb_proc.GetShapeWithUncertainty(res, samples, verbose);
}
}

// Generate postfit distributions for merged processes
for (auto iter: merged_procs){
auto proc=iter.first;
std::cout << ">> Doing postfit: " << bin << "," << proc << std::endl;
auto proc_regex = iter.second;
auto cmb_proc = cmb_bin.cp().process({proc_regex});
if (cmb_proc.process_set().size() == 0){
std::cout << ">> WARNING: found no processes matching " << proc << std::endl;
continue;
}
if (skip_proc_errs) {
post_shapes[bin][proc] = cmb_proc.GetShape();
} else {
post_shapes[bin][proc] =
sampling ? cmb_proc.GetShapeWithUncertainty(res, samples, verbose)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The deafult behaviour of PostFitShapesFromWorkspace should be to use sampling, and now use of the actual --sampling flag prints out a warning. So the code should not check the --sampling flag, but rather the --no-sampling flag, and always do sampling unless --no-sampling has been set.

: cmb_proc.GetShapeWithUncertainty();
}
}

if (!no_sampling && covariance) {
post_yield_cov[bin] = cmb_bin.GetRateCovariance(res, samples);
post_yield_cor[bin] = cmb_bin.GetRateCorrelation(res, samples);
Expand All @@ -372,15 +432,15 @@ int main(int argc, char* argv[]) {
std::cout << ">> Doing postfit: " << bin << "," << "TotalBkg" << std::endl;
post_shapes[bin]["TotalBkg"] =
no_sampling ? cmb_bkgs.GetShapeWithUncertainty()
: cmb_bkgs.GetShapeWithUncertainty(res, samples);
: cmb_bkgs.GetShapeWithUncertainty(res, samples, verbose);
std::cout << ">> Doing postfit: " << bin << "," << "TotalSig" << std::endl;
post_shapes[bin]["TotalSig"] =
no_sampling ? cmb_sigs.GetShapeWithUncertainty()
: cmb_sigs.GetShapeWithUncertainty(res, samples);
: cmb_sigs.GetShapeWithUncertainty(res, samples, verbose);
std::cout << ">> Doing postfit: " << bin << "," << "TotalProcs" << std::endl;
post_shapes[bin]["TotalProcs"] =
no_sampling ? cmb_bin.cp().GetShapeWithUncertainty()
: cmb_bin.cp().GetShapeWithUncertainty(res, samples);
: cmb_bin.cp().GetShapeWithUncertainty(res, samples, verbose);

if (datacard != "") {
TH1F ref = cmb_card.cp().bin({bin}).GetObservedShape();
Expand Down
4 changes: 2 additions & 2 deletions CombineTools/interface/CombineHarvester.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,8 @@ class CombineHarvester {
* version of this method instead - this method will be removed in an
* upcoming release
*/
TH1F GetShapeWithUncertainty(RooFitResult const* fit, unsigned n_samples);
TH1F GetShapeWithUncertainty(RooFitResult const& fit, unsigned n_samples);
TH1F GetShapeWithUncertainty(RooFitResult const* fit, unsigned n_samples, const bool& verbose = false);
TH1F GetShapeWithUncertainty(RooFitResult const& fit, unsigned n_samples, const bool& verbose = false);
TH1F GetObservedShape();

TH2F GetRateCovariance(RooFitResult const& fit, unsigned n_samples);
Expand Down
51 changes: 43 additions & 8 deletions CombineTools/src/CombineHarvester_Evaluate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "TDirectory.h"
#include "TH1.h"
#include "TH2.h"
#include "TString.h"
#include "CombineHarvester/CombineTools/interface/Observation.h"
#include "CombineHarvester/CombineTools/interface/Process.h"
#include "CombineHarvester/CombineTools/interface/Systematic.h"
Expand Down Expand Up @@ -126,16 +127,21 @@ TH1F CombineHarvester::GetShapeWithUncertainty() {
}

TH1F CombineHarvester::GetShapeWithUncertainty(RooFitResult const* fit,
unsigned n_samples) {
return GetShapeWithUncertainty(*fit, n_samples);
unsigned n_samples,
const bool& verbose) {
return GetShapeWithUncertainty(*fit, n_samples, verbose);
}

TH1F CombineHarvester::GetShapeWithUncertainty(RooFitResult const& fit,
unsigned n_samples) {
unsigned n_samples,
const bool& verbose)
{
auto lookup = GenerateProcSystMap();
TH1F shape = GetShapeInternal(lookup);
std::vector<std::string> full_rand_shape_summary;
for (int i = 1; i <= shape.GetNbinsX(); ++i) {
shape.SetBinError(i, 0.0);
full_rand_shape_summary.push_back("");
}
// Create a backup copy of the current parameter values
auto backup = GetParameters();
Expand All @@ -152,22 +158,51 @@ TH1F CombineHarvester::GetShapeWithUncertainty(RooFitResult const& fit,
for (unsigned n = 0; n < p_vec.size(); ++n) {
r_vec[n] = dynamic_cast<RooRealVar const*>(rands.at(n));
p_vec[n] = GetParameter(r_vec[n]->GetName());
// if(! p_vec[n]) std::cout << "could not initialize parameter '" << r_vec[n]->GetName() << "'" << std::endl;
}

TString tmp;

// Main loop through n_samples
for (unsigned i = 0; i < n_samples; ++i) {
// Randomise and update values
fit.randomizePars();
std::string rand_shape_summary = "";
for (int n = 0; n < n_pars; ++n) {
if (p_vec[n]) p_vec[n]->set_val(r_vec[n]->getVal());
if (p_vec[n] && !p_vec[n]->frozen()) {
p_vec[n]->set_val(r_vec[n]->getVal());
tmp.Form("setting parameter '%s' to %f\n", p_vec[n]->name().c_str(), r_vec[n]->getVal());
// std::cout << tmp.Data();
rand_shape_summary = tmp.Data();
// std::cout << rand_shape_summary.c_str() << std::endl;
TH1F rand_shape = this->GetShapeInternal(lookup);
for (int j = 1; j <= rand_shape.GetNbinsX(); ++j) {
tmp.Form("\tBin Content %i: %f\n", j, rand_shape.GetBinContent(j));
// std::cout << tmp.Data();
full_rand_shape_summary.at(j-1) = rand_shape_summary;
full_rand_shape_summary.at(j-1) += tmp.Data();
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like quite a lot of information (checking the updated shape for bin for every updated parameter value). I wonder if it is maybe a bit too much, and could be avoided? But perhaps this was useful in some debugging which I'm simply not aware of. It just seems it would be a lot to even read through for large models, and perhaps targeting more specific information might be better?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes definitely! I originally implemented this when we generated post-fit uncertainty bands for our ttHbb analysis and so some very large fluctuations in the bands. The intend there was really to find the reason for these fluctuations, which is why I wanted to output as much information as possible.

I think the comment by Andrew is valid, and it might be better to simply change to toys themselves into a dedicated root file. I can also update this PR accordingly if we want to have something like that from the start, or we can do it at some later point in time

}
// else if (!p_vec[n]) {
// std::cout << "could not find parameter '" << r_vec[n]->GetName() << "'"<< std::endl;
// }
}

TH1F rand_shape = this->GetShapeInternal(lookup);
for (int i = 1; i <= shape.GetNbinsX(); ++i) {
for (int j = 1; j <= shape.GetNbinsX(); ++j) {

double err =
std::fabs(rand_shape.GetBinContent(i) - shape.GetBinContent(i));
shape.SetBinError(i, err*err + shape.GetBinError(i));
std::fabs(rand_shape.GetBinContent(j) - shape.GetBinContent(j));
if (std::fabs(err/shape.GetBinContent(j)) >= 0.5 and verbose){
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I may be over optimizing here, but perhaps some of these extra steps could be avoided by default when not using the verbose mode? Perhaps just an if( verbose) before this loop, and in other places, for example?

I'm also wondering if it would be useful to make this 0.5 threshold a configurable parameter?

std::cout << "rel. error > 0.5 detected in bin " << j << " for toy " << i << std::endl;
std::cout << "nominal bin content (" << j << "):\t" << shape.GetBinContent(j) <<std::endl;
std::cout << "randomized bin content (" << j << "):\t" << rand_shape.GetBinContent(j) <<std::endl;
std::cout << "parameters: " << std::endl;
std::cout << "random shape evolution" << std::endl;
std::cout << full_rand_shape_summary.at(j-1).c_str() << std::endl;
}
shape.SetBinError(j, err*err + shape.GetBinError(j));
}
this->UpdateParameters(backup);
}
for (int i = 1; i <= shape.GetNbinsX(); ++i) {
shape.SetBinError(i, std::sqrt(shape.GetBinError(i)/double(n_samples)));
Expand Down
2 changes: 1 addition & 1 deletion CombineTools/src/CombineHarvester_Python.cc
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ TH1F (CombineHarvester::*Overload1_GetShapeWithUncertainty)(
void) = &CombineHarvester::GetShapeWithUncertainty;

TH1F (CombineHarvester::*Overload2_GetShapeWithUncertainty)(
RooFitResult const&, unsigned) = &CombineHarvester::GetShapeWithUncertainty;
RooFitResult const&, unsigned, bool const&) = &CombineHarvester::GetShapeWithUncertainty;

void (CombineHarvester::*Overload1_UpdateParameters)(
RooFitResult const&) = &CombineHarvester::UpdateParameters;
Expand Down