diff --git a/Configuration/ProcessModifiers/python/genWeightAddition_cff.py b/Configuration/ProcessModifiers/python/genWeightAddition_cff.py new file mode 100644 index 0000000000000..b36d81743c81b --- /dev/null +++ b/Configuration/ProcessModifiers/python/genWeightAddition_cff.py @@ -0,0 +1,3 @@ +import FWCore.ParameterSet.Config as cms + +genWeightAddition = cms.Modifier() diff --git a/Configuration/StandardSequences/python/Generator_cff.py b/Configuration/StandardSequences/python/Generator_cff.py index a1b3cf18d8232..41e83ba0592c9 100644 --- a/Configuration/StandardSequences/python/Generator_cff.py +++ b/Configuration/StandardSequences/python/Generator_cff.py @@ -5,6 +5,8 @@ # from PhysicsTools.HepMCCandAlgos.genParticles_cfi import * from GeneratorInterface.Core.generatorSmeared_cfi import * +from GeneratorInterface.Core.genWeights_cfi import genWeights +from GeneratorInterface.Core.lheWeights_cfi import lheWeights from RecoJets.Configuration.RecoGenJets_cff import * from RecoMET.Configuration.RecoGenMET_cff import * from RecoJets.Configuration.GenJetParticles_cff import * @@ -53,6 +55,9 @@ VertexSmearing = cms.Sequence(cms.SequencePlaceholder("VtxSmeared")) GenSmeared = cms.Sequence(generatorSmeared) GeneInfo = cms.Sequence(GeneInfoTask) +genWeightsSeq = cms.Sequence(genWeights*lheWeights) +genWeights.allowUnassociatedWeights = True # This should be off, but needed until Pythia bug is fixed +lheWeights.failIfInvalidXML = False # Also would ideally be true, but is needed at least for the tau embedding unit test genJetMET = cms.Sequence(genJetMETTask) from SimPPS.Configuration.GenPPS_cff import * @@ -74,3 +79,8 @@ hltResults = cms.InputTag('TriggerResults'), triggerConditions = cms.vstring() ) + +pgenWithWeight = cms.Sequence(cms.SequencePlaceholder("randomEngineStateProducer")+VertexSmearing+GenSmeared+GeneInfo+genWeightsSeq+genJetMET, PPSTransportTask) + +from Configuration.ProcessModifiers.genWeightAddition_cff import genWeightAddition +genWeightAddition.toReplaceWith(pgen, pgenWithWeight) diff --git a/DataFormats/NanoAOD/src/classes_def.xml b/DataFormats/NanoAOD/src/classes_def.xml index e6c1551b8fc89..fec2d614b9d0f 100644 --- a/DataFormats/NanoAOD/src/classes_def.xml +++ b/DataFormats/NanoAOD/src/classes_def.xml @@ -1,3 +1,4 @@ + @@ -11,6 +12,9 @@ + + + diff --git a/GeneratorInterface/Configuration/python/GeneratorInterface_EventContent_cff.py b/GeneratorInterface/Configuration/python/GeneratorInterface_EventContent_cff.py index 8361074f778d8..8ee8cb381a5a3 100644 --- a/GeneratorInterface/Configuration/python/GeneratorInterface_EventContent_cff.py +++ b/GeneratorInterface/Configuration/python/GeneratorInterface_EventContent_cff.py @@ -20,6 +20,10 @@ 'keep GenEventInfoProduct_generator_*_*', 'keep edmHepMCProduct_generatorSmeared_*_*', 'keep edmHepMCProduct_LHCTransport_*_*', + 'keep GenWeightProduct_lheWeights_*_*', + 'keep GenWeightProduct_genWeights_*_*', + 'keep GenWeightInfoProduct_lheWeights_*_*', + 'keep GenWeightInfoProduct_genWeights_*_*', 'keep GenFilterInfo_*_*_*', 'keep *_genParticles_*_*' ) @@ -36,6 +40,10 @@ 'keep GenEventInfoProduct_generator_*_*', 'keep edmHepMCProduct_generatorSmeared_*_*', 'keep edmHepMCProduct_LHCTransport_*_*', + 'keep GenWeightProduct_lheWeights_*_*', + 'keep GenWeightProduct_genWeights_*_*', + 'keep GenWeightInfoProduct_lheWeights_*_*', + 'keep GenWeightInfoProduct_genWeights_*_*', 'keep GenFilterInfo_*_*_*', 'keep *_genParticles_*_*' ) @@ -50,6 +58,10 @@ 'keep GenLumiInfoHeader_generator_*_*', 'keep GenLumiInfoProduct_generator_*_*', 'keep GenEventInfoProduct_generator_*_*', + 'keep GenWeightProduct_lheWeights_*_*', + 'keep GenWeightProduct_genWeights_*_*', + 'keep GenWeightInfoProduct_lheWeights_*_*', + 'keep GenWeightInfoProduct_genWeights_*_*', 'keep GenFilterInfo_*_*_*', 'keep *_genParticles_*_*' ) diff --git a/GeneratorInterface/Core/BuildFile.xml b/GeneratorInterface/Core/BuildFile.xml index 1da6ba2a8bab9..ed15c8ea3be92 100644 --- a/GeneratorInterface/Core/BuildFile.xml +++ b/GeneratorInterface/Core/BuildFile.xml @@ -12,6 +12,7 @@ + diff --git a/GeneratorInterface/Core/interface/GenWeightHelper.h b/GeneratorInterface/Core/interface/GenWeightHelper.h new file mode 100644 index 0000000000000..b6377ed1907a3 --- /dev/null +++ b/GeneratorInterface/Core/interface/GenWeightHelper.h @@ -0,0 +1,27 @@ +#ifndef GeneratorInterface_Core_GenWeightHelper_h +#define GeneratorInterface_Core_GenWeightHelper_h + +#include + +#include +#include +#include +#include +#include + +#include "GeneratorInterface/Core/interface/WeightHelper.h" +#include "SimDataFormats/GeneratorProducts/interface/GenLumiInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/PartonShowerWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/PdfWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/ScaleWeightGroupInfo.h" + +namespace gen { + class GenWeightHelper : public WeightHelper { + public: + GenWeightHelper(); + std::vector> parseWeightGroupsFromNames(std::vector weightNames, + bool addUnassociatedGroup) const; + }; +} // namespace gen + +#endif diff --git a/GeneratorInterface/Core/interface/LHEWeightHelper.h b/GeneratorInterface/Core/interface/LHEWeightHelper.h new file mode 100644 index 0000000000000..a8956612f0bc6 --- /dev/null +++ b/GeneratorInterface/Core/interface/LHEWeightHelper.h @@ -0,0 +1,53 @@ +#ifndef GeneratorInterface_Core_LHEWeightHelper_h +#define GeneratorInterface_Core_LHEWeightHelper_h + +#include + +#include +#include +#include +#include +#include + +#include "GeneratorInterface/Core/interface/WeightHelper.h" +#include "SimDataFormats/GeneratorProducts/interface/LHERunInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/MEParamWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/PdfWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/ScaleWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/UnknownWeightGroupInfo.h" + +namespace gen { + class LHEWeightHelper : public WeightHelper { + public: + LHEWeightHelper() : WeightHelper(){}; + + enum class ErrorType { Empty, SwapHeader, HTMLStyle, NoWeightGroup, TrailingStr, Unknown, NoError }; + const std::unordered_map errorTypeAsString_ = { + {ErrorType::Empty, "Empty header"}, + {ErrorType::SwapHeader, "Header info out of order"}, + {ErrorType::HTMLStyle, "Header is invalid HTML"}, + {ErrorType::TrailingStr, "Header has extraneous info"}, + {ErrorType::Unknown, "Unregonized error"}, + {ErrorType::NoError, "No error here!"}}; + + std::vector> parseWeights(std::vector headerLines, + bool addUnassociated) const; + bool isConsistent(const std::string& fullHeader) const; + void swapHeaders(std::vector& headerLines) const; + void setFailIfInvalidXML(bool value) { failIfInvalidXML_ = value; } + bool failIfInvalidXML() const { return failIfInvalidXML_; } + + private: + std::string weightgroupKet_ = ""; + std::string weightTag_ = ""; + bool failIfInvalidXML_ = false; + std::string parseGroupName(tinyxml2::XMLElement* el) const; + ParsedWeight parseWeight(tinyxml2::XMLElement* inner, std::string groupName, int groupIndex, int& weightIndex) const; + bool validateAndFixHeader(std::vector& headerLines, tinyxml2::XMLDocument& xmlDoc) const; + tinyxml2::XMLError tryReplaceHtmlStyle(tinyxml2::XMLDocument& xmlDoc, std::string& fullHeader) const; + tinyxml2::XMLError tryRemoveTrailings(tinyxml2::XMLDocument& xmlDoc, std::string& fullHeader) const; + ErrorType findErrorType(int xmlError, const std::string& headerLines) const; + }; +} // namespace gen + +#endif diff --git a/GeneratorInterface/Core/interface/WeightHelper.h b/GeneratorInterface/Core/interface/WeightHelper.h new file mode 100644 index 0000000000000..7fa6c52e4b6ae --- /dev/null +++ b/GeneratorInterface/Core/interface/WeightHelper.h @@ -0,0 +1,152 @@ +#ifndef GeneratorInterface_LHEInterface_WeightHelper_h +#define GeneratorInterface_LHEInterface_WeightHelper_h + +#include + +#include +#include +#include + +#include "LHAPDF/LHAPDF.h" +#include "SimDataFormats/GeneratorProducts/interface/GenWeightInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/GenWeightProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/MEParamWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/PartonShowerWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/PdfWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/ScaleWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/UnknownWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/WeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/WeightsInfo.h" + +namespace gen { + struct ParsedWeight { + std::string id; + int index; + std::string groupname; + std::string content; + std::unordered_map attributes; + int wgtGroup_idx; + }; + + class WeightHelper { + public: + WeightHelper(); + + template + std::unique_ptr weightProduct(const GenWeightInfoProduct& weightsInfo, + std::vector weights, + float w0) const; + + void setGuessPSWeightIdx(bool guessPSWeightIdx) { + PartonShowerWeightGroupInfo::setGuessPSWeightIdx(guessPSWeightIdx); + } + void addUnassociatedGroup(std::vector>& weightGroups) const { + gen::UnknownWeightGroupInfo unassoc("unassociated"); + unassoc.setDescription("Weights with missing or invalid header meta data"); + weightGroups.push_back(std::make_unique(unassoc)); + } + int addWeightToProduct(GenWeightProduct& product, double weight, std::string name, int weightNum, int groupIndex); + void setDebug(bool value) { debug_ = value; } + + protected: + bool debug_ = false; + const unsigned int FIRST_PSWEIGHT_ENTRY = 2; + const unsigned int DEFAULT_PSWEIGHT_LENGTH = 46; + std::map currWeightAttributeMap_; + std::map currGroupAttributeMap_; + bool isScaleWeightGroup(const ParsedWeight& weight) const; + bool isMEParamWeightGroup(const ParsedWeight& weight) const; + bool isPdfWeightGroup(const ParsedWeight& weight) const; + bool isPartonShowerWeightGroup(const ParsedWeight& weight) const; + bool isOrphanPdfWeightGroup(ParsedWeight& weight) const; + void updateScaleInfo(gen::ScaleWeightGroupInfo& scaleGroup, const ParsedWeight& weight) const; + void updateMEParamInfo(const ParsedWeight& weight, int index) const; + void updatePdfInfo(gen::PdfWeightGroupInfo& pdfGroup, const ParsedWeight& weight) const; + void updatePartonShowerInfo(gen::PartonShowerWeightGroupInfo& psGroup, const ParsedWeight& weight) const; + void cleanupOrphanCentralWeight(WeightGroupInfoContainer& weightGroups) const; + bool splitPdfWeight(ParsedWeight& weight, WeightGroupInfoContainer& weightGroups) const; + + int lhapdfId(const ParsedWeight& weight, gen::PdfWeightGroupInfo& pdfGroup) const; + std::string searchAttributes(const std::string& label, const ParsedWeight& weight) const; + std::string searchAttributesByTag(const std::string& label, const ParsedWeight& weight) const; + std::string searchAttributesByRegex(const std::string& label, const ParsedWeight& weight) const; + + // Possible names for the same thing + const std::unordered_map> attributeNames_ = { + {"muf", {"muF", "MUF", "muf", "facscfact"}}, + {"mur", {"muR", "MUR", "mur", "renscfact"}}, + {"pdf", {"PDF", "PDF set", "lhapdf", "pdf", "pdf set", "pdfset"}}, + {"dyn", {"DYN_SCALE"}}, + {"dyn_name", {"dyn_scale_choice"}}, + {"up", {"_up", "Hi"}}, + {"down", {"_dn", "Lo"}}, + {"me_variation", {"mass", "sthw2", "width"}}, + }; + void printWeights(const WeightGroupInfoContainer& weightGroups) const; + std::unique_ptr buildGroup(ParsedWeight& weight) const; + WeightGroupInfoContainer buildGroups(std::vector& parsedWeights, bool addUnassociatedGroup) const; + std::string searchString(const std::string& label, const std::string& name) const; + }; + + template + std::unique_ptr WeightHelper::weightProduct(const GenWeightInfoProduct& weightsInfo, + std::vector weights, + float w0) const { + auto weightProduct = std::make_unique(w0); + weightProduct->setNumWeightSets(weightsInfo.numberOfGroups()); + gen::WeightGroupData groupData = {0, nullptr}; + // size=1 happens if there are no PS weights, so the weights vector contains + // only the central GEN weight. Size = 2 happens when Pythia produces a separate weight for the hadronization + // In general this can also be handled by the "unassociated" group, but this avoids the requirement + // that that setting always be true for workflows without the GenLumiInfoProduct (which can reasonably not exist + // for special GEN workflows) + if (!weightsInfo.numberOfGroups()) { + if (weights.size() <= 2) + return weightProduct; + else + throw cms::Exception("WeightHelper") + << "Found more than 2 weights in the event, but found no weight groups in the header."; + } + + // This gets remade every event to avoid having state-dependence in the + // helper class could think about doing caching instead + int unassociatedIdx = weightsInfo.unassociatedIdx(); + std::unique_ptr unassociatedGroup; + if (unassociatedIdx != -1) + unassociatedGroup = std::make_unique("unassociated"); + int i = 0; + for (const auto& weight : weights) { + double wgtval; + std::string wgtid; + if constexpr (std::is_same::value) { + wgtid = weight.id; + wgtval = weight.wgt; + } else if (std::is_same::value) { + wgtid = std::to_string(i); + wgtval = weight; + } + try { + groupData = weightsInfo.containingWeightGroupInfo(i, groupData.index); + } catch (const cms::Exception& e) { + if (unassociatedIdx == -1) + throw e; + if (debug_) { + std::cout << "WARNING: " << e.what() << std::endl; + } + // Access the unassociated group separately so it can be modified + unassociatedGroup->addContainedId(i, wgtid, wgtid); + groupData = {static_cast(unassociatedIdx), unassociatedGroup.get()}; + } + int entry = groupData.group->weightVectorEntry(wgtid, i); + + // TODO: is this too slow? + if (debug_) + std::cout << "Adding weight num " << i << " EntryNum " << entry << " to group " << groupData.index << std::endl; + weightProduct->addWeight(wgtval, groupData.index, entry); + i++; + } + return weightProduct; + } +} // namespace gen + +#endif diff --git a/GeneratorInterface/Core/plugins/BuildFile.xml b/GeneratorInterface/Core/plugins/BuildFile.xml index 40c9678cb4bc6..a8c9a22737fc6 100644 --- a/GeneratorInterface/Core/plugins/BuildFile.xml +++ b/GeneratorInterface/Core/plugins/BuildFile.xml @@ -3,6 +3,7 @@ + diff --git a/GeneratorInterface/Core/plugins/GenWeightProductProducer.cc b/GeneratorInterface/Core/plugins/GenWeightProductProducer.cc new file mode 100644 index 0000000000000..e753d63022aba --- /dev/null +++ b/GeneratorInterface/Core/plugins/GenWeightProductProducer.cc @@ -0,0 +1,148 @@ +#include +#include +#include +#include + +// user include files +#include + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/LuminosityBlock.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/Run.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "FWCore/Utilities/interface/transform.h" +#include "GeneratorInterface/Core/interface/GenWeightHelper.h" +#include "SimDataFormats/GeneratorProducts/interface/GenEventInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/GenLumiInfoHeader.h" +#include "SimDataFormats/GeneratorProducts/interface/GenWeightInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/LHERunInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/UnknownWeightGroupInfo.h" + +struct GenWeightInfoProdData { + bool makeNewProduct; + GenWeightInfoProduct product; +}; + +class GenWeightProductProducer : public edm::global::EDProducer> { +public: + explicit GenWeightProductProducer(const edm::ParameterSet& iConfig); + ~GenWeightProductProducer() override; + void produce(edm::StreamID, edm::Event&, const edm::EventSetup&) const override; + void globalBeginLuminosityBlockProduce(edm::LuminosityBlock& lb, edm::EventSetup const& c) const override; + std::shared_ptr globalBeginLuminosityBlock(const edm::LuminosityBlock& lb, + edm::EventSetup const& c) const override; + void globalEndLuminosityBlock(const edm::LuminosityBlock& lb, edm::EventSetup const& c) const override {} + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + gen::GenWeightHelper weightHelper_; + const edm::EDGetTokenT genLumiInfoToken_; + const edm::EDGetTokenT genEventToken_; + std::vector> weightInfoTokens_; + const bool debug_; + edm::EDPutTokenT groupPutToken_; + GenWeightInfoProduct weightsInfo_; + bool allowUnassociated_; +}; + +GenWeightProductProducer::GenWeightProductProducer(const edm::ParameterSet& iConfig) + : genLumiInfoToken_(consumes(iConfig.getParameter("genInfo"))), + genEventToken_(consumes(iConfig.getParameter("genInfo"))), + weightInfoTokens_(edm::vector_transform( + iConfig.getParameter>("weightProductLabels"), + [this](const std::string& tag) { return mayConsume(tag); })), + debug_(iConfig.getUntrackedParameter("debug", false)), + groupPutToken_(produces()), + allowUnassociated_(iConfig.getUntrackedParameter("allowUnassociatedWeights", false)) { + weightHelper_.setDebug(debug_); + produces(); + produces(); + weightHelper_.setGuessPSWeightIdx(iConfig.getUntrackedParameter("guessPSWeightIdx", false)); +} + +GenWeightProductProducer::~GenWeightProductProducer() {} + +void GenWeightProductProducer::produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + // In case there is already a product in the file when this is scheduled + // (leave the list of weightproducts empty if you always want to produce a new + // product) + const auto& productInfo = *luminosityBlockCache(iEvent.getLuminosityBlock().index()); + if (!productInfo.makeNewProduct) + return; + + edm::Handle genEventInfo; + iEvent.getByToken(genEventToken_, genEventInfo); + + float centralWeight = !genEventInfo->weights().empty() ? genEventInfo->weights().at(0) : 1.; + auto weightProduct = weightHelper_.weightProduct(productInfo.product, genEventInfo->weights(), centralWeight); + iEvent.put(std::move(weightProduct)); +} + +std::shared_ptr GenWeightProductProducer::globalBeginLuminosityBlock( + const edm::LuminosityBlock& iLumi, edm::EventSetup const& iSetup) const { + GenWeightInfoProdData productInfo; + productInfo.makeNewProduct = true; + + edm::Handle weightInfoHandle; + for (auto& token : weightInfoTokens_) { + iLumi.getByToken(token, weightInfoHandle); + if (weightInfoHandle.isValid()) { + productInfo.makeNewProduct = false; + break; + } + } + + edm::Handle genLumiInfoHandle; + iLumi.getByToken(genLumiInfoToken_, genLumiInfoHandle); + + if (genLumiInfoHandle.isValid()) { + auto weightGroups = weightHelper_.parseWeightGroupsFromNames(genLumiInfoHandle->weightNames(), allowUnassociated_); + productInfo.product = GenWeightInfoProduct(weightGroups); + } else if (allowUnassociated_) { + auto group = std::make_unique("unassociated"); + productInfo.product.addWeightGroupInfo(std::move(group)); + } + return std::make_shared(productInfo); +} + +void GenWeightProductProducer::globalBeginLuminosityBlockProduce(edm::LuminosityBlock& iLumi, + edm::EventSetup const& iSetup) const { + edm::Handle weightInfoHandle; + + const auto& productInfo = *luminosityBlockCache(iLumi.index()); + if (!productInfo.makeNewProduct) + return; + + auto weightInfoProduct = std::make_unique(productInfo.product); + iLumi.emplace(groupPutToken_, std::move(weightInfoProduct)); +} + +void GenWeightProductProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("genInfo", edm::InputTag{"generator"}) + ->setComment("tag(s) for the GenLumiInfoHeader and GenEventInfoProduct"); + desc.add>("weightProductLabels", std::vector{{""}}) + ->setComment( + "tag(s) to look for existing GenWeightProduct/GenWeightInfoProducts. " + "If they are found, a new one won't be created. Leave this argument " + "empty if you want to recreate new " + "products regardless."); + desc.addUntracked("debug", false)->setComment("Output debug info"); + desc.addUntracked("guessPSWeightIdx", false) + ->setComment( + "If not possible to parse text, guess the parton shower weight " + "indices"); + desc.addUntracked("allowUnassociatedWeights", false) + ->setComment( + "Handle weights found in the event that aren't advertised in the " + "weight header (otherwise throw exception)"); + descriptions.add("genWeights", desc); +} + +DEFINE_FWK_MODULE(GenWeightProductProducer); diff --git a/GeneratorInterface/Core/plugins/LHEWeightProductProducer.cc b/GeneratorInterface/Core/plugins/LHEWeightProductProducer.cc new file mode 100644 index 0000000000000..71b706d8b1260 --- /dev/null +++ b/GeneratorInterface/Core/plugins/LHEWeightProductProducer.cc @@ -0,0 +1,187 @@ +#include +#include +#include +#include + +// user include files +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Frameworkfwd.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/Framework/interface/Run.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/ParameterSet/interface/FileInPath.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "FWCore/Utilities/interface/ReusableObjectHolder.h" +#include "FWCore/Utilities/interface/transform.h" +#include "GeneratorInterface/Core/interface/LHEWeightHelper.h" +#include "GeneratorInterface/LHEInterface/interface/LHEEvent.h" +#include "GeneratorInterface/LHEInterface/interface/LHERunInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/GenWeightInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/LHEEventProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/LHERunInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/UnknownWeightGroupInfo.h" + +struct GenWeightInfoProdData { + bool makeNewProduct; + GenWeightInfoProduct product; +}; + +class LHEWeightProductProducer + : public edm::global::EDProducer, edm::BeginRunProducer> { +public: + explicit LHEWeightProductProducer(const edm::ParameterSet& iConfig); + ~LHEWeightProductProducer() override; + void produce(edm::StreamID, edm::Event&, const edm::EventSetup&) const override; + void globalBeginRunProduce(edm::Run& run, edm::EventSetup const& es) const override; + void globalEndRunProduce(edm::Run& run, edm::EventSetup const& es) const {}; + std::shared_ptr globalBeginRun(edm::Run const& run, edm::EventSetup const& es) const override; + void globalEndRun(edm::Run const& iRun, edm::EventSetup const&) const override {} + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + gen::LHEWeightHelper weightHelper_; + const std::vector lheLabels_; + std::vector> lheEventTokens_; + std::vector> lheRunInfoTokens_; + std::vector> weightInfoTokens_; + edm::EDPutTokenT groupPutToken_; + bool allowUnassociated_; +}; + +LHEWeightProductProducer::LHEWeightProductProducer(const edm::ParameterSet& iConfig) + : lheLabels_(iConfig.getParameter>("lheSourceLabels")), + lheEventTokens_(edm::vector_transform( + lheLabels_, [this](const std::string& tag) { return mayConsume(tag); })), + lheRunInfoTokens_(edm::vector_transform( + lheLabels_, [this](const std::string& tag) { return mayConsume(tag); })), + weightInfoTokens_(edm::vector_transform( + iConfig.getParameter>("weightProductLabels"), + [this](const edm::InputTag& tag) { return mayConsume(tag); })), + groupPutToken_(produces()), + allowUnassociated_(iConfig.getUntrackedParameter("allowUnassociatedWeights", false)) { + produces(); + produces(); + weightHelper_.setFailIfInvalidXML(iConfig.getUntrackedParameter("failIfInvalidXML", false)); + weightHelper_.setDebug(iConfig.getUntrackedParameter("debug", false)); + weightHelper_.setGuessPSWeightIdx(iConfig.getUntrackedParameter("guessPSWeightIdx", false)); +} + +LHEWeightProductProducer::~LHEWeightProductProducer() {} + +void LHEWeightProductProducer::produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + const auto& productInfo = *runCache(iEvent.getRun().index()); + + if (!productInfo.makeNewProduct) + return; + + edm::Handle lheEventInfo; + for (auto& token : lheEventTokens_) { + iEvent.getByToken(token, lheEventInfo); + if (lheEventInfo.isValid()) { + break; + } + } + + if (!lheEventInfo.isValid()) + return; + + auto weightProduct = + weightHelper_.weightProduct(productInfo.product, lheEventInfo->weights(), lheEventInfo->originalXWGTUP()); + iEvent.put(std::move(weightProduct)); + + auto newLheEventInfo = std::make_unique(*lheEventInfo); + newLheEventInfo->clearWeights(); + if (!lheEventInfo->weights().empty()) + newLheEventInfo->addWeight(lheEventInfo->weights()[0]); + iEvent.put(std::move(newLheEventInfo)); +} + +std::shared_ptr LHEWeightProductProducer::globalBeginRun(edm::Run const& run, + edm::EventSetup const& es) const { + bool hasWeightProduct = false; + edm::Handle weightInfoHandle; + for (auto& token : weightInfoTokens_) { + run.getByToken(token, weightInfoHandle); + if (weightInfoHandle.isValid()) { + hasWeightProduct = true; + break; + } + } + GenWeightInfoProdData productInfo; + productInfo.makeNewProduct = !hasWeightProduct; + if (hasWeightProduct) + return std::make_shared(productInfo); + + edm::Handle lheRunInfoHandle; + for (auto& label : lheLabels_) { + run.getByLabel(label, lheRunInfoHandle); + if (lheRunInfoHandle.isValid()) { + break; + } + } + if (!lheRunInfoHandle.isValid()) + return std::make_shared(productInfo); + + typedef std::vector::const_iterator header_cit; + LHERunInfoProduct::Header headerWeightInfo; + for (header_cit iter = lheRunInfoHandle->headers_begin(); iter != lheRunInfoHandle->headers_end(); iter++) { + if (iter->tag() == "initrwgt") { + headerWeightInfo = *iter; + break; + } + } + + gen::WeightGroupInfoContainer groups; + try { + groups = weightHelper_.parseWeights(headerWeightInfo.lines(), allowUnassociated_); + } catch (cms::Exception& e) { + std::string error = e.what(); + error += + "\n NOTE: if you want to attempt to process this sample anyway, set " + "failIfInvalidXML = False " + "in the configuration file (current value is "; + error += weightHelper_.failIfInvalidXML() ? "True" : "False"; + error += + ")\n.If you set this flag and the error persists, the issue " + " is fatal and must be solved at the LHE/gridpack level."; + throw cms::Exception("LHEWeightProductProducer") << error; + } + // Need copy for the run cache + productInfo.product = GenWeightInfoProduct(groups); + return std::make_shared(productInfo); +} + +void LHEWeightProductProducer::globalBeginRunProduce(edm::Run& run, const edm::EventSetup& es) const { + const auto& productInfo = *runCache(run.index()); + auto prod = std::make_unique(productInfo.product); + run.emplace(groupPutToken_, std::move(prod)); +} + +void LHEWeightProductProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add>("lheSourceLabels", std::vector{{"externalLHEProducer"}, {"source"}}) + ->setComment( + "tag(s) to look for LHERunInfoProduct/LHEEventProduct" + "If they are found, a new one won't be created. Leave this argument " + "empty if you want to recreate new " + "products regardless."); + desc.add>("weightProductLabels", std::vector{{""}}) + ->setComment( + "tag(s) to look for existing GenWeightProduct/GenWeightInfoProducts. " + "If they are found, a new one won't be created. Leave this argument " + "empty if you want to recreate new " + "products regardless."); + desc.addUntracked("debug", false)->setComment("Output debug info"); + desc.addUntracked("failIfInvalidXML", true) + ->setComment( + "Throw exception if XML header is invalid (rather than trying to " + "recover and parse anyway)"); + desc.addUntracked("allowUnassociatedWeights", false) + ->setComment( + "Handle weights found in the event that aren't advertised in the " + "weight header (otherwise throw exception)"); + descriptions.add("lheWeights", desc); +} + +DEFINE_FWK_MODULE(LHEWeightProductProducer); diff --git a/GeneratorInterface/Core/src/GenWeightHelper.cc b/GeneratorInterface/Core/src/GenWeightHelper.cc new file mode 100644 index 0000000000000..278f9b63c9523 --- /dev/null +++ b/GeneratorInterface/Core/src/GenWeightHelper.cc @@ -0,0 +1,62 @@ +#include "GeneratorInterface/Core/interface/GenWeightHelper.h" + +#include + +using namespace tinyxml2; + +namespace gen { + GenWeightHelper::GenWeightHelper() {} + + std::vector> GenWeightHelper::parseWeightGroupsFromNames( + std::vector weightNames, bool addUnassociated) const { + std::vector parsedWeights; + int index = 0; + int groupIndex = -1; + int showerGroupIndex = -1; + std::string curGroup = ""; + // If size is 1, it's just the central weight + // Still give the opportunity to add an unassociated group + if (weightNames.size() <= 1) + return buildGroups(parsedWeights, addUnassociated); + + for (std::string weightName : weightNames) { + if (weightName.find("LHE") != std::string::npos) { + // Parse as usual, this is the SUSY workflow + std::vector info; + boost::split(info, weightName, boost::is_any_of(",")); + std::unordered_map attributes; + std::string text = info.back(); + info.pop_back(); + for (auto i : info) { + std::vector subInfo; + boost::split(subInfo, i, boost::is_any_of("=")); + if (subInfo.size() == 2) { + attributes[boost::algorithm::trim_copy(subInfo[0])] = boost::algorithm::trim_copy(subInfo[1]); + } + } + if (attributes["group"] != curGroup) { + curGroup = attributes["group"]; + groupIndex++; + } + // Gen Weights can't have an ID, because they are just a + // std::vector in the event + attributes["id"] = ""; + parsedWeights.push_back({attributes["id"], index, curGroup, text, attributes, groupIndex}); + } else { + parsedWeights.push_back( + {"", index, weightName, weightName, std::unordered_map(), groupIndex}); + if (isPartonShowerWeightGroup(parsedWeights.back())) { + if (showerGroupIndex < 0) { + showerGroupIndex = ++groupIndex; + } + parsedWeights.back().wgtGroup_idx = showerGroupIndex; // all parton showers are grouped together + } + } + index++; + } + auto groups = buildGroups(parsedWeights, addUnassociated); + if (debug_) + printWeights(groups); + return groups; + } +} // namespace gen diff --git a/GeneratorInterface/Core/src/LHEWeightHelper.cc b/GeneratorInterface/Core/src/LHEWeightHelper.cc new file mode 100644 index 0000000000000..8642df1a82c81 --- /dev/null +++ b/GeneratorInterface/Core/src/LHEWeightHelper.cc @@ -0,0 +1,216 @@ +#include "GeneratorInterface/Core/interface/LHEWeightHelper.h" + +#include +#include +#include +#include + +using namespace tinyxml2; + +namespace gen { + bool LHEWeightHelper::validateAndFixHeader(std::vector& headerLines, + tinyxml2::XMLDocument& xmlDoc) const { + std::string fullHeader = boost::algorithm::join(headerLines, ""); + + if (debug_) + std::cout << "Full header is \n" << fullHeader << std::endl; + int xmlError = xmlDoc.Parse(fullHeader.c_str()); + ErrorType errorType; + + while (errorType = findErrorType(xmlError, fullHeader), errorType != ErrorType::NoError) { + if (failIfInvalidXML_) { + std::cout << "XML error: "; + xmlDoc.PrintError(); + throw cms::Exception("LHEWeightHelper") << "The LHE header is not valid! Weight information was not properly " + "parsed." + << " The error type is '" << errorTypeAsString_.at(errorType) << "'"; + } else if (errorType == ErrorType::HTMLStyle) { + if (debug_) + std::cout << " >>> This file uses > instead of >\n"; + xmlError = tryReplaceHtmlStyle(xmlDoc, fullHeader); + } else if (errorType == ErrorType::SwapHeader) { + if (debug_) + std::cout << " >>> Some headers in the file are swapped\n"; + std::vector fixedHeaderLines; + boost::split(fixedHeaderLines, fullHeader, boost::is_any_of("\n")); + swapHeaders(fixedHeaderLines); + fullHeader = boost::algorithm::join(fixedHeaderLines, "\n"); + xmlError = xmlDoc.Parse(fullHeader.c_str()); + } else if (errorType == ErrorType::TrailingStr) { + if (debug_) + std::cout << " >>> There is non-XML text at the end of this file\n"; + xmlError = tryRemoveTrailings(xmlDoc, fullHeader); + } else if (errorType == ErrorType::Empty) { + if (debug_) + std::cout << " >>> There are no LHE xml header, ending parsing" << std::endl; + return false; + } else if (errorType == ErrorType::NoWeightGroup) { + if (debug_) + std::cout << " >>> There are no in the LHE xml header, " + "ending parsing" + << std::endl; + return false; + } else { + std::string error = + "Fatal error when parsing the LHE header. The header is not valid " + "XML! Parsing error was "; + error += xmlDoc.ErrorStr(); + throw cms::Exception("LHEWeightHelper") << error; + } + } + return true; + } + + ParsedWeight LHEWeightHelper::parseWeight(tinyxml2::XMLElement* inner, + std::string groupName, + int groupIndex, + int& weightIndex) const { + if (debug_) + std::cout << " >> Found a weight inside the group. " << std::endl; + std::string text = ""; + if (inner->GetText()) + text = inner->GetText(); + + std::unordered_map attributes; + for (auto* att = inner->FirstAttribute(); att != nullptr; att = att->Next()) + attributes[att->Name()] = att->Value(); + if (debug_) + std::cout << " " << weightIndex << ": \"" << text << "\"" << std::endl; + return {inner->Attribute("id"), weightIndex++, groupName, text, attributes, groupIndex}; + } + + std::vector> LHEWeightHelper::parseWeights(std::vector headerLines, + bool addUnassociatedGroup) const { + tinyxml2::XMLDocument xmlDoc; + if (!validateAndFixHeader(headerLines, xmlDoc)) { + return {}; + } + + std::vector parsedWeights; + int weightIndex = 0; + int groupIndex = 0; + for (auto* e = xmlDoc.RootElement(); e != nullptr; e = e->NextSiblingElement()) { + if (debug_) + std::cout << "XML element is " << e->Name() << std::endl; + std::string groupName = ""; + if (strcmp(e->Name(), "weight") == 0) { + if (debug_) + std::cout << "Found weight unmatched to group\n"; + parsedWeights.push_back(parseWeight(e, groupName, groupIndex, weightIndex)); + } else if (strcmp(e->Name(), "weightgroup") == 0) { + groupName = parseGroupName(e); + if (debug_) + std::cout << ">>>> Found a weight group: " << groupName << std::endl; + for (auto inner = e->FirstChildElement("weight"); inner != nullptr; inner = inner->NextSiblingElement("weight")) + parsedWeights.push_back(parseWeight(inner, groupName, groupIndex, weightIndex)); + } + groupIndex++; + } + auto groups = buildGroups(parsedWeights, addUnassociatedGroup); + if (debug_) + printWeights(groups); + return groups; + } + + std::string LHEWeightHelper::parseGroupName(tinyxml2::XMLElement* el) const { + std::vector nameAlts = {"name", "type"}; + for (const auto& nameAtt : nameAlts) { + if (el->Attribute(nameAtt.c_str())) { + std::string groupName = el->Attribute(nameAtt.c_str()); + if (groupName.find('.') != std::string::npos) + groupName.erase(groupName.find('.'), groupName.size()); + return groupName; + } + } + + throw cms::Exception("LHEWeightHelper") << "Could not parse a name for weight group"; + return ""; + } + + bool LHEWeightHelper::isConsistent(const std::string& fullHeader) const { + std::vector headerLines; + boost::split(headerLines, fullHeader, boost::is_any_of("\n")); + int curLevel = 0; + + for (const auto& line : headerLines) { + if (line.find("/weightgroup") != std::string::npos) { + curLevel--; + if (curLevel != 0) { + return false; + } + } else if (line.find("weightgroup") != std::string::npos) { + curLevel++; + if (curLevel != 1) { + return false; + } + } + } + return curLevel == 0; + } + + void LHEWeightHelper::swapHeaders(std::vector& headerLines) const { + int curLevel = 0; + int open = -1; + int close = -1; + for (size_t idx = 0; idx < headerLines.size(); idx++) { + std::string& line = headerLines[idx]; + std::cout << "Line is " << line << std::endl; + ; + if (line.find("/weightgroup") != std::string::npos) { + curLevel--; + if (curLevel != 0) { + close = idx; + } + } else if (line.find("weightgroup") != std::string::npos) { + curLevel++; + if (curLevel != 1) { + open = idx; + } + } + if (open > -1 && close > -1) { + std::swap(headerLines[open], headerLines[close]); + open = -1; + close = -1; + } + } + } + + tinyxml2::XMLError LHEWeightHelper::tryReplaceHtmlStyle(tinyxml2::XMLDocument& xmlDoc, + std::string& fullHeader) const { + // in case of > instead of < + boost::replace_all(fullHeader, "<", "<"); + boost::replace_all(fullHeader, ">", ">"); + + return xmlDoc.Parse(fullHeader.c_str()); + } + + tinyxml2::XMLError LHEWeightHelper::tryRemoveTrailings(tinyxml2::XMLDocument& xmlDoc, std::string& fullHeader) const { + // delete extra strings after the last (occasionally contain + // '<' or '>') + std::size_t theLastKet = fullHeader.rfind(weightgroupKet_) + weightgroupKet_.length(); + std::size_t thelastWeight = fullHeader.rfind(weightTag_) + weightTag_.length(); + fullHeader = fullHeader.substr(0, std::max(theLastKet, thelastWeight)); + + return xmlDoc.Parse(fullHeader.c_str()); + } + + LHEWeightHelper::ErrorType LHEWeightHelper::findErrorType(int xmlError, const std::string& fullHeader) const { + if (fullHeader.empty()) + return ErrorType::Empty; + else if (!isConsistent(fullHeader)) + return ErrorType::SwapHeader; + else if (fullHeader.find("<") != std::string::npos || fullHeader.find(">") != std::string::npos) + return ErrorType::HTMLStyle; + else if (xmlError != 0) { + if (fullHeader.rfind(weightgroupKet_) == std::string::npos) + return ErrorType::NoWeightGroup; + std::string trailingCand = + fullHeader.substr(fullHeader.rfind(weightgroupKet_) + std::string(weightgroupKet_).length()); + if (trailingCand.find('<') == std::string::npos || trailingCand.find('>') == std::string::npos) + return ErrorType::TrailingStr; + else + return ErrorType::Unknown; + } + return ErrorType::NoError; + } +} // namespace gen diff --git a/GeneratorInterface/Core/src/WeightHelper.cc b/GeneratorInterface/Core/src/WeightHelper.cc new file mode 100644 index 0000000000000..01b648c51f3f2 --- /dev/null +++ b/GeneratorInterface/Core/src/WeightHelper.cc @@ -0,0 +1,333 @@ +#include "GeneratorInterface/Core/interface/WeightHelper.h" + +#include + +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/Utilities/interface/Exception.h" + +namespace gen { + WeightHelper::WeightHelper() {} + + bool WeightHelper::isScaleWeightGroup(const ParsedWeight& weight) const { + return (weight.groupname.find("scale_variation") != std::string::npos || + weight.groupname.find("Central scale variation") != std::string::npos); + } + + bool WeightHelper::isPdfWeightGroup(const ParsedWeight& weight) const { + const std::string& name = weight.groupname; + if (name.find("PDF_variation") != std::string::npos) + return true; + return LHAPDF::lookupLHAPDFID(name) != -1; + } + + bool WeightHelper::isPartonShowerWeightGroup(const ParsedWeight& weight) const { + const std::string& groupname = boost::to_lower_copy(weight.groupname); + std::vector psNames = {"isr", "fsr", "nominal", "baseline", "emission"}; + for (const auto& name : psNames) { + if (groupname.find(name) != std::string::npos) + return true; + } + return false; + } + + bool WeightHelper::isOrphanPdfWeightGroup(ParsedWeight& weight) const { + std::pair pairLHA; + CMS_SA_ALLOW try { pairLHA = LHAPDF::lookupPDF(stoi(searchAttributes("pdf", weight))); } catch (...) { + return false; + } + + if (!pairLHA.first.empty() && pairLHA.second == 0) { + weight.groupname = std::string(pairLHA.first); + return true; + } else { + return false; + } + } + + bool WeightHelper::isMEParamWeightGroup(const ParsedWeight& weight) const { + return (weight.groupname.find("mg_reweighting") != std::string::npos || + weight.groupname.find("variation") != std::string::npos); + // variation used for blanket of all variations, might need to change + } + + std::string WeightHelper::searchAttributes(const std::string& label, const ParsedWeight& weight) const { + std::string attribute = searchAttributesByTag(label, weight); + return attribute.empty() ? searchAttributesByRegex(label, weight) : attribute; + } + + std::string WeightHelper::searchAttributesByTag(const std::string& label, const ParsedWeight& weight) const { + auto& attributes = weight.attributes; + for (const auto& lab : attributeNames_.at(label)) { + if (attributes.find(lab) != attributes.end()) { + return boost::algorithm::trim_copy_if(attributes.at(lab), boost::is_any_of("\"")); + } + } + return ""; + } + + std::string WeightHelper::searchAttributesByRegex(const std::string& label, const ParsedWeight& weight) const { + auto& content = weight.content; + std::smatch match; + for (const auto& lab : attributeNames_.at(label)) { + std::regex floatExpr(lab + "\\s*=\\s*([0-9.]+(?:[eE][+-]?[0-9]+)?)"); + std::regex strExpr(lab + "\\s*=\\s*([^=]+)"); + if (std::regex_search(content, match, floatExpr)) { + return boost::algorithm::trim_copy(match.str(1)); + } else if (std::regex_search(content, match, strExpr)) { + return boost::algorithm::trim_copy(match.str(1)); + } + } + return ""; + } + + void WeightHelper::updateScaleInfo(gen::ScaleWeightGroupInfo& scaleGroup, const ParsedWeight& weight) const { + std::string muRText = searchAttributes("mur", weight); + std::string muFText = searchAttributes("muf", weight); + std::string dynNumText = searchAttributes("dyn", weight); + float muR, muF; + try { + muR = std::stof(muRText); + muF = std::stof(muFText); + } catch (std::invalid_argument& e) { + if (debug_) + std::cout << "Tried to convert (" << muR << ", " << muF << ") to a int" << std::endl; + scaleGroup.setWeightIsCorrupt(); + return; + /// do something + } + + if (dynNumText.empty()) { + scaleGroup.setMuRMuFIndex(weight.index, weight.id, muR, muF); + } else { + std::string dynType = searchAttributes("dyn_name", weight); + try { + int dynNum = std::stoi(dynNumText); + scaleGroup.setDyn(weight.index, weight.id, muR, muF, dynNum, dynType); + } catch (std::invalid_argument& e) { + scaleGroup.setWeightIsCorrupt(); + /// do something here + } + } + + if (scaleGroup.lhaid() == -1) { + std::string lhaidText = searchAttributes("pdf", weight); + try { + scaleGroup.setLhaid(std::stoi(lhaidText)); + } catch (std::invalid_argument& e) { + scaleGroup.setLhaid(-1); + // do something here + } + } + } + + int WeightHelper::lhapdfId(const ParsedWeight& weight, gen::PdfWeightGroupInfo& pdfGroup) const { + std::string lhaidText = searchAttributes("pdf", weight); + + if (debug_) + std::cout << "Looking for LHAPDF info in ID " << lhaidText << std::endl; + + if (!lhaidText.empty()) { + try { + return std::stoi(lhaidText); + } catch (std::invalid_argument& e) { + pdfGroup.setIsWellFormed(false); + } + } else if (!pdfGroup.lhaIds().empty()) { + return pdfGroup.lhaIds().back() + 1; + } else { + if (debug_) + std::cout << "Looking up LHAPDF ID from name" << weight.groupname << std::endl; + return LHAPDF::lookupLHAPDFID(weight.groupname); + } + return -1; + } + + void WeightHelper::updatePdfInfo(gen::PdfWeightGroupInfo& pdfGroup, const ParsedWeight& weight) const { + int lhaid = lhapdfId(weight, pdfGroup); + if (debug_) + std::cout << "LHAID identified as " << lhaid << std::endl; + if (pdfGroup.parentLhapdfId() < 0) { + int parentId = lhaid - LHAPDF::lookupPDF(lhaid).second; + pdfGroup.setParentLhapdfInfo(parentId); + pdfGroup.setUncertaintyType(gen::kUnknownUnc); + + std::string description = ""; + if (pdfGroup.uncertaintyType() == gen::kHessianUnc) + description += "Hessian "; + else if (pdfGroup.uncertaintyType() == gen::kMonteCarloUnc) + description += "Monte Carlo "; + description += "Uncertainty sets for LHAPDF set " + LHAPDF::lookupPDF(parentId).first; + description += " with LHAID = " + std::to_string(parentId); + description += "; "; + + pdfGroup.appendDescription(description); + } + // after setup parent info, add lhaid + pdfGroup.addLhaid(lhaid); + } + + void WeightHelper::updatePartonShowerInfo(gen::PartonShowerWeightGroupInfo& psGroup, + const ParsedWeight& weight) const { + if (psGroup.nIdsContained() == DEFAULT_PSWEIGHT_LENGTH) { + psGroup.setIsWellFormed(true); + psGroup.cacheWeightIndicesByLabel(); + } + if (weight.content.find(':') != std::string::npos && weight.content.find('=') != std::string::npos) + psGroup.setNameIsPythiaSyntax(true); + } + + bool WeightHelper::splitPdfWeight(ParsedWeight& weight, WeightGroupInfoContainer& weightGroups) const { + if (weightGroups[weight.wgtGroup_idx]->weightType() == gen::WeightType::kPdfWeights) { + auto& pdfGroup = *static_cast(weightGroups[weight.wgtGroup_idx].get()); + int lhaid = lhapdfId(weight, pdfGroup); + if (lhaid > 0 && !pdfGroup.isIdInParentSet(lhaid) && pdfGroup.parentLhapdfId() > 0) { + weightGroups.push_back(buildGroup(weight)); + weight.wgtGroup_idx++; + return true; + } + } + return false; + } + + void WeightHelper::cleanupOrphanCentralWeight(WeightGroupInfoContainer& weightGroups) const { + auto centralIt = std::find_if(std::begin(weightGroups), std::end(weightGroups), [](auto& entry) { + return entry->weightType() == gen::WeightType::kScaleWeights && + static_cast(entry.get())->containsCentralWeight(); + }); + if (centralIt == std::end(weightGroups)) + return; + + auto& centralWeight = *static_cast(centralIt->get()); + + std::vector toRemove; + for (size_t i = 0; i < weightGroups.size(); i++) { + auto& group = weightGroups[i]; + if (group->weightType() == gen::WeightType::kPdfWeights) { + auto& pdfGroup = *static_cast(group.get()); + // These are weights that contain nothing but a single central weight, + // because some versions of madgraph write the central weight separately + if (pdfGroup.nIdsContained() == 1 && pdfGroup.parentLhapdfId() == centralWeight.lhaid()) { + toRemove.push_back(i); + const auto& weightInfo = pdfGroup.weightMetaInfo(0); + centralWeight.addContainedId(weightInfo.globalIndex, weightInfo.id, weightInfo.label, 1, 1); + } + } + } + // Indices are guaranteed to be unique, delete from high to low to avoid + // changing indices + std::sort(std::begin(toRemove), std::end(toRemove), std::greater()); + for (auto i : toRemove) { + weightGroups.erase(std::begin(weightGroups) + i); + } + } + + void WeightHelper::printWeights(const WeightGroupInfoContainer& weightGroups) const { + // checks + for (const auto& group : weightGroups) { + std::cout << std::boolalpha << group->name() << " (" << group->firstId() << "-" << group->lastId() + << "): " << group->isWellFormed() << std::endl; + if (group->weightType() == gen::WeightType::kScaleWeights) { + const auto& groupScale = *static_cast(group.get()); + std::cout << groupScale.centralIndex() << " "; + std::cout << groupScale.muR1muF2Index() << " "; + std::cout << groupScale.muR1muF05Index() << " "; + std::cout << groupScale.muR2muF1Index() << " "; + std::cout << groupScale.muR2muF2Index() << " "; + std::cout << groupScale.muR2muF05Index() << " "; + std::cout << groupScale.muR05muF1Index() << " "; + std::cout << groupScale.muR05muF2Index() << " "; + std::cout << groupScale.muR05muF05Index() << " \n"; + for (auto& name : groupScale.dynNames()) { + std::cout << name << ": "; + std::cout << groupScale.scaleIndex(1.0, 1.0, name) << " "; + std::cout << groupScale.scaleIndex(1.0, 2.0, name) << " "; + std::cout << groupScale.scaleIndex(1.0, 0.5, name) << " "; + std::cout << groupScale.scaleIndex(2.0, 1.0, name) << " "; + std::cout << groupScale.scaleIndex(2.0, 2.0, name) << " "; + std::cout << groupScale.scaleIndex(2.0, 0.5, name) << " "; + std::cout << groupScale.scaleIndex(0.5, 1.0, name) << " "; + std::cout << groupScale.scaleIndex(0.5, 2.0, name) << " "; + std::cout << groupScale.scaleIndex(0.5, 0.5, name) << "\n"; + } + + } else if (group->weightType() == gen::WeightType::kPdfWeights) { + std::cout << group->description() << "\n"; + } else if (group->weightType() == gen::WeightType::kPartonShowerWeights) { + const auto& groupPS = *static_cast(group.get()); + std::vector labels = groupPS.weightLabels(); + groupPS.printVariables(); + } + } + } + + std::unique_ptr WeightHelper::buildGroup(ParsedWeight& weight) const { + if (debug_) { + std::cout << "Building group for weight group " << weight.groupname << " weight content is " << weight.content + << std::endl; + } + if (isScaleWeightGroup(weight)) { + if (debug_) + std::cout << "Weight type is scale\n"; + return std::make_unique(weight.groupname); + } else if (isPdfWeightGroup(weight)) { + if (debug_) + std::cout << "Weight type is PDF\n"; + return std::make_unique(weight.groupname); + } else if (isMEParamWeightGroup(weight)) { + if (debug_) + std::cout << "Weight type is MEParam\n"; + return std::make_unique(weight.groupname); + } else if (isPartonShowerWeightGroup(weight)) { + if (debug_) + std::cout << "Weight type is parton shower\n"; + return std::make_unique("shower"); + } else if (isOrphanPdfWeightGroup(weight)) { + if (debug_) + std::cout << "Weight type is PDF\n"; + return std::make_unique(weight.groupname); + } + if (debug_) + std::cout << "Weight type is unknown\n"; + + std::cout << "Group name is " << weight.groupname << std::endl; + + return std::make_unique(weight.groupname); + } + + WeightGroupInfoContainer WeightHelper::buildGroups(std::vector& parsedWeights, + bool addUnassociated) const { + WeightGroupInfoContainer weightGroups; + int groupOffset = 0; + for (auto& weight : parsedWeights) { + weight.wgtGroup_idx += groupOffset; + if (debug_) + std::cout << "Building group for weight " << weight.content << " group " << weight.groupname << " group index " + << weight.wgtGroup_idx << std::endl; + + int numGroups = static_cast(weightGroups.size()); + if (weight.wgtGroup_idx == numGroups) { + weightGroups.push_back(buildGroup(weight)); + } else if (weight.wgtGroup_idx >= numGroups) + throw cms::Exception("Invalid group index " + std::to_string(weight.wgtGroup_idx)); + + // split PDF groups + if (splitPdfWeight(weight, weightGroups)) + groupOffset++; + + auto& group = weightGroups[weight.wgtGroup_idx]; + group->addContainedId(weight.index, weight.id, weight.content); + if (group->weightType() == gen::WeightType::kScaleWeights) + updateScaleInfo(*static_cast(group.get()), weight); + else if (group->weightType() == gen::WeightType::kPdfWeights) + updatePdfInfo(*static_cast(group.get()), weight); + else if (group->weightType() == gen::WeightType::kPartonShowerWeights) + updatePartonShowerInfo(*static_cast(group.get()), weight); + } + cleanupOrphanCentralWeight(weightGroups); + if (addUnassociated) { + addUnassociatedGroup(weightGroups); + } + return weightGroups; + } + +} // namespace gen diff --git a/GeneratorInterface/Core/test/dumpWeightInfo.py b/GeneratorInterface/Core/test/dumpWeightInfo.py new file mode 100644 index 0000000000000..77cfd9ebcb882 --- /dev/null +++ b/GeneratorInterface/Core/test/dumpWeightInfo.py @@ -0,0 +1,43 @@ +from __future__ import print_function +from DataFormats.FWLite import Events,Handle,Runs,Lumis +import ROOT +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("infile", type=str, help="Input EDM file") +parser.add_argument("--source", type=str, help="product ID of weight product", default="genWeights") +args = parser.parse_args() + +lumis = Lumis(args.infile) +lumi = next(lumis.__iter__()) +weightInfoHandle = Handle("GenWeightInfoProduct") +lumi.getByLabel(args.source, weightInfoHandle) +weightInfoProd = weightInfoHandle.product() + +events = Events(args.infile) +event = next(events.__iter__()) +weightHandle = Handle("GenWeightProduct") +event.getByLabel(args.source, weightHandle) +weightInfo = weightHandle.product() +print("Number of weight groups in weightInfo is", len(weightInfo.weights())) +for j, weights in enumerate(weightInfo.weights()): + print("-"*10, "Looking at entry", j, "length is", len(weights),"-"*10) + matching = weightInfoProd.orderedWeightGroupInfo(j) + print(f"name = {matching.name()}; type is {matching.weightType()}; Well formed? {str(matching.isWellFormed())}") + print("Group description", matching.description()) + if matching.weightType() == 's': + for var in [(x, y) for x in ["05", "1", "2"] for y in ["05", "1", "2"]]: + name = "muR%smuF%sIndex" % (var[0], var[1]) if not (var[0] == "1" and var[1] == "1") else "centralIndex" + print(name, getattr(matching, name)()) + elif matching.weightType() == 'P': + print("uncertaintyType", "Hessian" if matching.uncertaintyType() == ROOT.gen.kHessianUnc else "MC") + print("Has alphas? ", matching.hasAlphasVariations()) + print("Weights length?", len(weights), "Contained ids lenths?", matching.nIdsContained()) + print("-"*80) + for i,weight in enumerate(weights): + try: + info = matching.weightMetaInfo(i) + print(" ID, localIndex, globalIndex, label, set:", info.id, info.localIndex, info.globalIndex, info.label, matching.name()) + except: + print(f"--> Entry {i} in group {matching.name()} does not have any associated metaInfo!") + print("-"*80) diff --git a/GeneratorInterface/LHEInterface/BuildFile.xml b/GeneratorInterface/LHEInterface/BuildFile.xml index 6fcd808f731bd..c3bdd135e435e 100644 --- a/GeneratorInterface/LHEInterface/BuildFile.xml +++ b/GeneratorInterface/LHEInterface/BuildFile.xml @@ -12,6 +12,7 @@ + diff --git a/GeneratorInterface/LHEInterface/test/createLHEFormatFromROOTFile.py b/GeneratorInterface/LHEInterface/test/createLHEFormatFromROOTFile.py new file mode 100644 index 0000000000000..33c60dbff7ca4 --- /dev/null +++ b/GeneratorInterface/LHEInterface/test/createLHEFormatFromROOTFile.py @@ -0,0 +1,107 @@ +import ROOT +import sys + + +""" +Usage +python createLHEFormatFromROOTFile.py inputfile outputfile pdgId_particle_to_undo_decay1 pdgId_particle_to_undo_decay2 pdgId_particle_to_undo_decay3 ... +""" + +args = sys.argv[:] + +class HEPPart(object): + def __init__(self,event, idx): + """ + Class to organize the description of a particle in the LHE event + event : whole event information (usually a entry in the input TTree) + idx : the index of the particle inside the LHE file + """ + for att in ["pt","eta","phi", "mass","lifetime","pdgId","status","spin","color1", "color2","mother1","mother2","incomingpz"]: + setattr(self, att, getattr(event, "LHEPart_"+att)[idx]) + self.setP4() + + def setP4(self): + self.p4 = ROOT.TLorentzVector() + if self.status != -1: + self.p4.SetPtEtaPhiM(self.pt, self.eta, self.phi, self.mass) + else: + self.p4.SetPxPyPzE(0.,0.,self.incomingpz, abs(self.incomingpz)) + def printPart(self): + """ Just to print it pretty """ + return " {pdg:d} {status:d} {mother1:d} {mother2:d} {color1:d} {color2:d} {px:e} {py:e} {pz:e} {energy:e} {mass:e} {time:e} {spin:e}\n".format(pdg=self.pdgId,status=self.status, mother1=self.mother1, mother2=self.mother2, color1=self.color1, color2=self.color2, px=self.p4.Px(), py=self.p4.Py(), pz=self.p4.Pz(), energy=self.p4.E(), mass=self.mass, time=self.lifetime, spin=self.spin) + + +class LHEPrinter(object): + def __init__(self,theFile,theTree,outputLHE,undoDecays=[], chunkers=-1, prDict={}): + """ + theFile : path to input root file with the whole LHEinformation + theTree : number of ttree inside the file, usually "Events" + outputLHE: name of output .lhe file + undoDecays: pdgId of particles whose decays we want to undo (i.e. W that are decayed with madspin, as the reweighting is called before madspin) + chunkers: process by chunks in case we want to later multithread the jobs. Argument is max size (in events) of the chunks + prDict : a dictionary indicating possible mismatchings between the ProcessID in the new gridpack and the old (matching the numbers at generate p p > x y z @0 in the run card) + """ + + self.prDict = prDict + self.outputLHE = outputLHE + self.fil = ROOT.TFile(theFile,"OPEN") + self.tree = self.fil.Get(theTree) + self.undoDecays = undoDecays + self.baseheader = "\n" + self.baseline1 = " {nparts} {prid} {weight} {scale} {aqed} {aqcd}\n" + self.baseender = "\n\n\n" + self.chunkers = chunkers + + def insideLoop(self): + """ Loop over all events and process the root file into a plain text LHE file""" + totalEvents = self.tree.GetEntries() + if self.chunkers == -1 or self.chunkers > totalEvents: + self.output = open(self.outputLHE,"w") + self.chunkers = totalEvents + 1 + else: + self.output = open(self.outputLHE+"chunk0","w") + chunk = 0 + + print "Processing %i events, please wait..."%totalEvents + iEv = 0 + pEv = 0 + chunk = 0 + for ev in self.tree: + iEv += 1 + pEv += 1 + if pEv >= self.chunkers: + pEv = 0 + chunk += 1 + self.output.close() + self.output = open(self.outputLHE+"chunk%i"%chunk,"w") + print "...Event %i/%i"%(iEv, totalEvents) + self.process(ev) + + def process(self, ev): + """First produce the global line like """ + self.output.write(self.baseheader.format(nplo = ord(str(ev.LHE_NpLO)) if ord(str(ev.LHE_NpLO)) != 255 else -1, npnlo = ord(str(ev.LHE_NpNLO)) if ord(str(ev.LHE_NpNLO)) != 255 else -1)) + + """Then we need to treat the whole thing to undo the madspin decays, update statuses and rewrite particle order""" + lhepart = [] + deletedIndexes = [] + for i in range(getattr(ev, "nLHEPart")): + testPart = HEPPart(ev,i) + testPart.mother1 = testPart.mother1 - sum([1*(testPart.mother1 > d) for d in deletedIndexes]) + testPart.mother2 = testPart.mother2 - sum([1*(testPart.mother2 > d) for d in deletedIndexes]) + if testPart.mother1 != 0: + if abs(lhepart[testPart.mother1-1].pdgId) in self.undoDecays: #If from something that decays after weighting just skip it and update particle indexes + deletedIndexes.append(i) + continue + if abs(testPart.pdgId) in self.undoDecays: + testPart.status = 1 + lhepart.append(testPart) + + """ Now we can compute properly the number of particles at LHE """ + self.output.write(self.baseline1.format(nparts=len(lhepart), prid=self.prDict[str(ord(str(ev.LHE_ProcessID)))], weight=ev.LHEWeight_originalXWGTUP, scale=ev.LHE_Scale,aqed=ev.LHE_AlphaQED,aqcd=ev.LHE_AlphaS)) + """ And save each particle information """ + for part in lhepart: + self.output.write(part.printPart()) + self.output.write(self.baseender) + +theP = LHEPrinter(args[1],"Events",args[2],undoDecays=[int(i) for i in args[4:]],chunkers=int(args[3]), prDict={str(i):i for i in range(1000)}) #PRDict by default set to not change anything as it is rare to use it +theP.insideLoop() diff --git a/PhysicsTools/NanoAOD/interface/GenWeightCounters.h b/PhysicsTools/NanoAOD/interface/GenWeightCounters.h new file mode 100644 index 0000000000000..f3e5b17acd998 --- /dev/null +++ b/PhysicsTools/NanoAOD/interface/GenWeightCounters.h @@ -0,0 +1,112 @@ +#ifndef GENWEIGHTSCOUNTERS_h +#define GENWEIGHTSCOUNTERS_h +#include +#include +#include +namespace genCounter { + + inline void mergeSumVectors(std::vector& v1, std::vector const& v2) { + if (v1.empty() && !v2.empty()) + v1.resize(v2.size(), 0); + if (!v2.empty()) + for (unsigned int i = 0, n = v1.size(); i < n; ++i) + v1[i] += v2[i]; + } + + /// ---- Cache object for running sums of weights ---- + class Counter { + public: + void clear() { + num_ = 0; + sumw_ = 0; + sumw2_ = 0; + weightSumMap_.clear(); + } + + // inc only the gen counters + void incGenOnly(double w) { + num_++; + sumw_ += w; + sumw2_ += (w * w); + } + + void incLHE(double w0, const std::vector& weightV, const std::string& wName) { + // if new type of weight, create a map element + if (weightSumMap_.find(wName) == weightSumMap_.end()) { + std::vector temp; + weightSumMap_.insert({wName, temp}); + } + if (!weightV.empty()) { + if (weightSumMap_[wName].empty()) + weightSumMap_[wName].resize(weightV.size(), 0); + for (unsigned int i = 0, n = weightV.size(); i < n; ++i) + weightSumMap_[wName][i] += (w0 * weightV[i]); + } + // incGenOnly(w0); + // incPSOnly(w0, wPS); + } + + void mergeSumMap(const Counter& other) { + num_ += other.num_; + sumw_ += other.sumw_; + sumw2_ += other.sumw2_; + // if weightMap_ for "this" is empty, create map elements with empty + // vectors before merging + if (weightSumMap_.empty() && !other.weightSumMap_.empty()) { + for (auto& wmap : other.weightSumMap_) { + std::vector temp; + weightSumMap_.insert({wmap.first, temp}); + } + } + + for (auto& wmap : weightSumMap_) { + if (other.weightSumMap_.find(wmap.first) != other.weightSumMap_.end()) + mergeSumVectors(wmap.second, other.weightSumMap_.at(wmap.first)); + } + } + + // private: + // the counters + long long num_ = 0; + long double sumw_ = 0; + long double sumw2_ = 0; + std::map> weightSumMap_; + }; + + struct CounterMap { + std::map countermap; + Counter* active_el = nullptr; + std::string active_label = ""; + + void mergeSumMap(const CounterMap& other) { + for (const auto& y : other.countermap) { + countermap[y.first].mergeSumMap(y.second); + } + active_el = nullptr; + } + + void clear() { + for (auto x : countermap) + x.second.clear(); + } + + void setLabel(std::string label) { + active_el = &(countermap[label]); + active_label = label; + } + void checkLabelSet() { + if (!active_el) + throw cms::Exception("LogicError", "Called CounterMap::get() before setting the active label\n"); + } + Counter* get() { + checkLabelSet(); + return active_el; + } + std::string& getLabel() { + checkLabelSet(); + return active_label; + } + }; + +} // namespace genCounter +#endif diff --git a/PhysicsTools/NanoAOD/plugins/GenWeightsTableProducer.cc b/PhysicsTools/NanoAOD/plugins/GenWeightsTableProducer.cc index ac1085f531e73..459834c62275c 100644 --- a/PhysicsTools/NanoAOD/plugins/GenWeightsTableProducer.cc +++ b/PhysicsTools/NanoAOD/plugins/GenWeightsTableProducer.cc @@ -1,1071 +1,111 @@ -#include "FWCore/Framework/interface/global/EDProducer.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/Framework/interface/Run.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include +#include + #include "DataFormats/NanoAOD/interface/FlatTable.h" #include "DataFormats/NanoAOD/interface/MergeableCounterTable.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/Run.h" +#include "FWCore/Framework/interface/global/EDProducer.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/Utilities/interface/transform.h" +#include "PhysicsTools/NanoAOD/interface/GenWeightCounters.h" #include "SimDataFormats/GeneratorProducts/interface/GenEventInfoProduct.h" -#include "SimDataFormats/GeneratorProducts/interface/LHEEventProduct.h" -#include "SimDataFormats/GeneratorProducts/interface/LHERunInfoProduct.h" #include "SimDataFormats/GeneratorProducts/interface/GenLumiInfoHeader.h" -#include "FWCore/MessageLogger/interface/MessageLogger.h" -#include "boost/algorithm/string.hpp" - -#include - -#include -#include -#include -#include +#include "SimDataFormats/GeneratorProducts/interface/GenWeightInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/GenWeightProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/LHEEventProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/PartonShowerWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/PdfWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/ScaleWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/WeightGroupInfo.h" namespace { - /// ---- Cache object for running sums of weights ---- - struct Counter { - Counter() : num(0), sumw(0), sumw2(0), sumPDF(), sumScale(), sumRwgt(), sumNamed(), sumPS() {} - - // the counters - long long num; - long double sumw; - long double sumw2; - std::vector sumPDF, sumScale, sumRwgt, sumNamed, sumPS; - - void clear() { - num = 0; - sumw = 0; - sumw2 = 0; - sumPDF.clear(); - sumScale.clear(); - sumRwgt.clear(); - sumNamed.clear(), sumPS.clear(); - } - - // inc the counters - void incGenOnly(double w) { - num++; - sumw += w; - sumw2 += (w * w); - } - - void incPSOnly(double w0, const std::vector& wPS) { - if (!wPS.empty()) { - if (sumPS.empty()) - sumPS.resize(wPS.size(), 0); - for (unsigned int i = 0, n = wPS.size(); i < n; ++i) - sumPS[i] += (w0 * wPS[i]); - } - } - - void incLHE(double w0, - const std::vector& wScale, - const std::vector& wPDF, - const std::vector& wRwgt, - const std::vector& wNamed, - const std::vector& wPS) { - // add up weights - incGenOnly(w0); - // then add up variations - if (!wScale.empty()) { - if (sumScale.empty()) - sumScale.resize(wScale.size(), 0); - for (unsigned int i = 0, n = wScale.size(); i < n; ++i) - sumScale[i] += (w0 * wScale[i]); - } - if (!wPDF.empty()) { - if (sumPDF.empty()) - sumPDF.resize(wPDF.size(), 0); - for (unsigned int i = 0, n = wPDF.size(); i < n; ++i) - sumPDF[i] += (w0 * wPDF[i]); - } - if (!wRwgt.empty()) { - if (sumRwgt.empty()) - sumRwgt.resize(wRwgt.size(), 0); - for (unsigned int i = 0, n = wRwgt.size(); i < n; ++i) - sumRwgt[i] += (w0 * wRwgt[i]); - } - if (!wNamed.empty()) { - if (sumNamed.empty()) - sumNamed.resize(wNamed.size(), 0); - for (unsigned int i = 0, n = wNamed.size(); i < n; ++i) - sumNamed[i] += (w0 * wNamed[i]); - } - incPSOnly(w0, wPS); - } - - void merge(const Counter& other) { - num += other.num; - sumw += other.sumw; - sumw2 += other.sumw2; - if (sumScale.empty() && !other.sumScale.empty()) - sumScale.resize(other.sumScale.size(), 0); - if (sumPDF.empty() && !other.sumPDF.empty()) - sumPDF.resize(other.sumPDF.size(), 0); - if (sumRwgt.empty() && !other.sumRwgt.empty()) - sumRwgt.resize(other.sumRwgt.size(), 0); - if (sumNamed.empty() && !other.sumNamed.empty()) - sumNamed.resize(other.sumNamed.size(), 0); - if (sumPS.empty() && !other.sumPS.empty()) - sumPS.resize(other.sumPS.size(), 0); - if (!other.sumScale.empty()) - for (unsigned int i = 0, n = sumScale.size(); i < n; ++i) - sumScale[i] += other.sumScale[i]; - if (!other.sumPDF.empty()) - for (unsigned int i = 0, n = sumPDF.size(); i < n; ++i) - sumPDF[i] += other.sumPDF[i]; - if (!other.sumRwgt.empty()) - for (unsigned int i = 0, n = sumRwgt.size(); i < n; ++i) - sumRwgt[i] += other.sumRwgt[i]; - if (!other.sumNamed.empty()) - for (unsigned int i = 0, n = sumNamed.size(); i < n; ++i) - sumNamed[i] += other.sumNamed[i]; - if (!other.sumPS.empty()) - for (unsigned int i = 0, n = sumPS.size(); i < n; ++i) - sumPS[i] += other.sumPS[i]; - } - }; - - struct CounterMap { - std::map countermap; - Counter* active_el = nullptr; - std::string active_label = ""; - void merge(const CounterMap& other) { - for (const auto& y : other.countermap) - countermap[y.first].merge(y.second); - active_el = nullptr; - } - void clear() { - for (auto x : countermap) - x.second.clear(); - active_el = nullptr; - active_label = ""; - } - void setLabel(std::string label) { - active_el = &(countermap[label]); - active_label = label; - } - void checkLabelSet() { - if (!active_el) - throw cms::Exception("LogicError", "Called CounterMap::get() before setting the active label\n"); - } - Counter* get() { - checkLabelSet(); - return active_el; - } - std::string& getLabel() { - checkLabelSet(); - return active_label; - } - }; - - /// ---- RunCache object for dynamic choice of LHE IDs ---- - struct DynamicWeightChoice { - // choice of LHE weights - // ---- scale ---- - std::vector scaleWeightIDs; - std::string scaleWeightsDoc; - // ---- pdf ---- - std::vector pdfWeightIDs; - std::string pdfWeightsDoc; - // ---- rwgt ---- - std::vector rwgtIDs; - std::string rwgtWeightDoc; - }; - - struct DynamicWeightChoiceGenInfo { - // choice of LHE weights - // ---- scale ---- - std::vector scaleWeightIDs; - std::string scaleWeightsDoc; - // ---- pdf ---- - std::vector pdfWeightIDs; - std::string pdfWeightsDoc; - // ---- ps ---- - std::vector defPSWeightIDs = {6, 7, 8, 9}; - std::vector defPSWeightIDs_alt = {27, 5, 26, 4}; - bool matchPS_alt = false; - std::vector psWeightIDs; - unsigned int psBaselineID = 1; - std::string psWeightsDoc; - - void setMissingWeight(int idx) { psWeightIDs[idx] = (matchPS_alt) ? defPSWeightIDs_alt[idx] : defPSWeightIDs[idx]; } - - bool empty() const { return scaleWeightIDs.empty() && pdfWeightIDs.empty() && psWeightIDs.empty(); } - }; - - struct LumiCacheInfoHolder { - CounterMap countermap; - DynamicWeightChoiceGenInfo weightChoice; - void clear() { - countermap.clear(); - weightChoice = DynamicWeightChoiceGenInfo(); - } - }; - - float stof_fortrancomp(const std::string& str) { - std::string::size_type match = str.find('d'); - if (match != std::string::npos) { - std::string pre = str.substr(0, match); - std::string post = str.substr(match + 1); - return std::stof(pre) * std::pow(10.0f, std::stof(post)); - } else { - return std::stof(str); - } - } - /// -------------- temporary objects -------------- - struct ScaleVarWeight { - std::string wid, label; - std::pair scales; - ScaleVarWeight(const std::string& id, const std::string& text, const std::string& muR, const std::string& muF) - : wid(id), label(text), scales(stof_fortrancomp(muR), stof_fortrancomp(muF)) {} - bool operator<(const ScaleVarWeight& other) { - return (scales == other.scales ? wid < other.wid : scales < other.scales); - } - }; - struct PDFSetWeights { - std::vector wids; - std::pair lhaIDs; - PDFSetWeights(const std::string& wid, unsigned int lhaID) : wids(1, wid), lhaIDs(lhaID, lhaID) {} - bool operator<(const PDFSetWeights& other) const { return lhaIDs < other.lhaIDs; } - void add(const std::string& wid, unsigned int lhaID) { - wids.push_back(wid); - lhaIDs.second = lhaID; - } - bool maybe_add(const std::string& wid, unsigned int lhaID) { - if (lhaID == lhaIDs.second + 1) { - lhaIDs.second++; - wids.push_back(wid); - return true; - } else { - return false; - } - } - }; + typedef std::vector WeightGroupDataContainer; + typedef std::array WeightGroupsToStore; } // namespace +using CounterMap = genCounter::CounterMap; +using Counter = genCounter::Counter; -class GenWeightsTableProducer : public edm::global::EDProducer, - edm::RunCache, +class GenWeightsTableProducer : public edm::global::EDProducer, + edm::StreamCache, edm::RunSummaryCache, edm::EndRunProducer> { public: - GenWeightsTableProducer(edm::ParameterSet const& params) - : genTag_(consumes(params.getParameter("genEvent"))), - lheLabel_(params.getParameter>("lheInfo")), - lheTag_(edm::vector_transform(lheLabel_, - [this](const edm::InputTag& tag) { return mayConsume(tag); })), - lheRunTag_(edm::vector_transform( - lheLabel_, [this](const edm::InputTag& tag) { return mayConsume(tag); })), - genLumiInfoHeadTag_( - mayConsume(params.getParameter("genLumiInfoHeader"))), - namedWeightIDs_(params.getParameter>("namedWeightIDs")), - namedWeightLabels_(params.getParameter>("namedWeightLabels")), - lheWeightPrecision_(params.getParameter("lheWeightPrecision")), - maxPdfWeights_(params.getParameter("maxPdfWeights")), - keepAllPSWeights_(params.getParameter("keepAllPSWeights")), - debug_(params.getUntrackedParameter("debug", false)), - debugRun_(debug_.load()), - hasIssuedWarning_(false), - psWeightWarning_(false) { - produces(); - produces("genModel"); - produces("LHEScale"); - produces("LHEPdf"); - produces("LHEReweighting"); - produces("LHENamed"); - produces("PS"); - produces(); - if (namedWeightIDs_.size() != namedWeightLabels_.size()) { - throw cms::Exception("Configuration", "Size mismatch between namedWeightIDs & namedWeightLabels"); - } - for (const edm::ParameterSet& pdfps : params.getParameter>("preferredPDFs")) { - const std::string& name = pdfps.getParameter("name"); - uint32_t lhaid = pdfps.getParameter("lhaid"); - preferredPDFLHAIDs_.push_back(lhaid); - lhaNameToID_[name] = lhaid; - lhaNameToID_[name + ".LHgrid"] = lhaid; - } - } - - ~GenWeightsTableProducer() override {} - - void produce(edm::StreamID id, edm::Event& iEvent, const edm::EventSetup& iSetup) const override { - // get my counter for weights - Counter* counter = streamCache(id)->countermap.get(); - - // generator information (always available) - edm::Handle genInfo; - iEvent.getByToken(genTag_, genInfo); - double weight = genInfo->weight(); - - // table for gen info, always available - auto out = std::make_unique(1, "genWeight", true); - out->setDoc("generator weight"); - out->addColumnValue("", weight, "generator weight"); - iEvent.put(std::move(out)); - - std::string model_label = streamCache(id)->countermap.getLabel(); - auto outM = std::make_unique((!model_label.empty()) ? std::string("GenModel_") + model_label : ""); - iEvent.put(std::move(outM), "genModel"); - bool getLHEweightsFromGenInfo = !model_label.empty(); - - // tables for LHE weights, may not be filled - std::unique_ptr lheScaleTab, lhePdfTab, lheRwgtTab, lheNamedTab; - std::unique_ptr genPSTab; - - edm::Handle lheInfo; - for (const auto& lheTag : lheTag_) { - iEvent.getByToken(lheTag, lheInfo); - if (lheInfo.isValid()) { + GenWeightsTableProducer(edm::ParameterSet const& params); + + void produce(edm::StreamID id, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + void fillTableIgnoringGroups(std::vector& weightTablevec, + const WeightGroupDataContainer& weightInfos, + WeightsContainer& allWeights, + size_t maxStore, + std::string tablename) const; + void addWeightGroupToTable(std::vector& weightTablevec, + const WeightGroupDataContainer& weightInfos, + WeightsContainer& allWeights) const; + // Need to either pass the handle or a pointer to avoid a copy and conversion + // to the base class + WeightGroupDataContainer weightDataPerType(edm::Handle& weightsInfoHandle, + gen::WeightType weightType, + size_t maxStore) const; + + WeightGroupsToStore groupsToStore(bool foundLheWeights, + edm::Handle& genWeightInfoHandle, + edm::Handle& lheWeightInfoHandle) const; + + std::pair> orderedScaleWeights(const std::vector& scaleWeights, + const gen::ScaleWeightGroupInfo& scaleGroup) const; + + std::pair> preferredPSweights(const std::vector& psWeights, + const gen::PartonShowerWeightGroupInfo& pswV) const; + + // Lumiblock + std::shared_ptr globalBeginLuminosityBlock(edm::LuminosityBlock const& iLumi, + edm::EventSetup const&) const override { + // Set equal to the max number of groups + // subtrack 1 for each weight group you find + bool foundLheWeights = false; + edm::Handle lheWeightInfoHandle; + for (auto& token : lheWeightInfoTokens_) { + iLumi.getRun().getByToken(token, lheWeightInfoHandle); + if (lheWeightInfoHandle.isValid()) { + foundLheWeights = true; break; } } - const auto genWeightChoice = &(streamCache(id)->weightChoice); - if (lheInfo.isValid()) { - if (getLHEweightsFromGenInfo && !hasIssuedWarning_.exchange(true)) - edm::LogWarning("LHETablesProducer") - << "Found both a LHEEventProduct and a GenLumiInfoHeader: will only save weights from LHEEventProduct.\n"; - // get the dynamic choice of weights - const DynamicWeightChoice* weightChoice = runCache(iEvent.getRun().index()); - // go fill tables - fillLHEWeightTables(counter, - weightChoice, - genWeightChoice, - weight, - *lheInfo, - *genInfo, - lheScaleTab, - lhePdfTab, - lheRwgtTab, - lheNamedTab, - genPSTab); - } else if (getLHEweightsFromGenInfo) { - fillLHEPdfWeightTablesFromGenInfo( - counter, genWeightChoice, weight, *genInfo, lheScaleTab, lhePdfTab, lheNamedTab, genPSTab); - lheRwgtTab = std::make_unique(1, "LHEReweightingWeights", true); - //lheNamedTab.reset(new nanoaod::FlatTable(1, "LHENamedWeights", true)); - //genPSTab.reset(new nanoaod::FlatTable(1, "PSWeight", true)); - } else { - // Still try to add the PS weights - fillOnlyPSWeightTable(counter, genWeightChoice, weight, *genInfo, genPSTab); - // make dummy values - lheScaleTab = std::make_unique(1, "LHEScaleWeights", true); - lhePdfTab = std::make_unique(1, "LHEPdfWeights", true); - lheRwgtTab = std::make_unique(1, "LHEReweightingWeights", true); - lheNamedTab = std::make_unique(1, "LHENamedWeights", true); - if (!hasIssuedWarning_.exchange(true)) { - edm::LogWarning("LHETablesProducer") << "No LHEEventProduct, so there will be no LHE Tables\n"; - } - } - - iEvent.put(std::move(lheScaleTab), "LHEScale"); - iEvent.put(std::move(lhePdfTab), "LHEPdf"); - iEvent.put(std::move(lheRwgtTab), "LHEReweighting"); - iEvent.put(std::move(lheNamedTab), "LHENamed"); - iEvent.put(std::move(genPSTab), "PS"); - } - - void fillLHEWeightTables(Counter* counter, - const DynamicWeightChoice* weightChoice, - const DynamicWeightChoiceGenInfo* genWeightChoice, - double genWeight, - const LHEEventProduct& lheProd, - const GenEventInfoProduct& genProd, - std::unique_ptr& outScale, - std::unique_ptr& outPdf, - std::unique_ptr& outRwgt, - std::unique_ptr& outNamed, - std::unique_ptr& outPS) const { - bool lheDebug = debug_.exchange( - false); // make sure only the first thread dumps out this (even if may still be mixed up with other output, but nevermind) - - const std::vector& scaleWeightIDs = weightChoice->scaleWeightIDs; - const std::vector& pdfWeightIDs = weightChoice->pdfWeightIDs; - const std::vector& rwgtWeightIDs = weightChoice->rwgtIDs; - - double w0 = lheProd.originalXWGTUP(); - - std::vector wScale(scaleWeightIDs.size(), 1), wPDF(pdfWeightIDs.size(), 1), wRwgt(rwgtWeightIDs.size(), 1), - wNamed(namedWeightIDs_.size(), 1); - for (auto& weight : lheProd.weights()) { - if (lheDebug) - printf("Weight %+9.5f rel %+9.5f for id %s\n", weight.wgt, weight.wgt / w0, weight.id.c_str()); - // now we do it slowly, can be optimized - auto mScale = std::find(scaleWeightIDs.begin(), scaleWeightIDs.end(), weight.id); - if (mScale != scaleWeightIDs.end()) - wScale[mScale - scaleWeightIDs.begin()] = weight.wgt / w0; - - auto mPDF = std::find(pdfWeightIDs.begin(), pdfWeightIDs.end(), weight.id); - if (mPDF != pdfWeightIDs.end()) - wPDF[mPDF - pdfWeightIDs.begin()] = weight.wgt / w0; - - auto mRwgt = std::find(rwgtWeightIDs.begin(), rwgtWeightIDs.end(), weight.id); - if (mRwgt != rwgtWeightIDs.end()) - wRwgt[mRwgt - rwgtWeightIDs.begin()] = weight.wgt / w0; - - auto mNamed = std::find(namedWeightIDs_.begin(), namedWeightIDs_.end(), weight.id); - if (mNamed != namedWeightIDs_.end()) - wNamed[mNamed - namedWeightIDs_.begin()] = weight.wgt / w0; - } - - std::vector wPS; - std::string psWeightDocStr; - setPSWeightInfo(genProd.weights(), genWeightChoice, wPS, psWeightDocStr); - - outPS = std::make_unique(wPS.size(), "PSWeight", false); - outPS->addColumn("", wPS, psWeightDocStr, lheWeightPrecision_); - - outScale = std::make_unique(wScale.size(), "LHEScaleWeight", false); - outScale->addColumn("", wScale, weightChoice->scaleWeightsDoc, lheWeightPrecision_); - - outPdf = std::make_unique(wPDF.size(), "LHEPdfWeight", false); - outPdf->addColumn("", wPDF, weightChoice->pdfWeightsDoc, lheWeightPrecision_); - - outRwgt = std::make_unique(wRwgt.size(), "LHEReweightingWeight", false); - outRwgt->addColumn("", wRwgt, weightChoice->rwgtWeightDoc, lheWeightPrecision_); - - outNamed = std::make_unique(1, "LHEWeight", true); - outNamed->addColumnValue("originalXWGTUP", lheProd.originalXWGTUP(), "Nominal event weight in the LHE file"); - for (unsigned int i = 0, n = wNamed.size(); i < n; ++i) { - outNamed->addColumnValue(namedWeightLabels_[i], - wNamed[i], - "LHE weight for id " + namedWeightIDs_[i] + ", relative to nominal", - lheWeightPrecision_); - } - - counter->incLHE(genWeight, wScale, wPDF, wRwgt, wNamed, wPS); - } - - void fillLHEPdfWeightTablesFromGenInfo(Counter* counter, - const DynamicWeightChoiceGenInfo* weightChoice, - double genWeight, - const GenEventInfoProduct& genProd, - std::unique_ptr& outScale, - std::unique_ptr& outPdf, - std::unique_ptr& outNamed, - std::unique_ptr& outPS) const { - const std::vector& scaleWeightIDs = weightChoice->scaleWeightIDs; - const std::vector& pdfWeightIDs = weightChoice->pdfWeightIDs; - - auto weights = genProd.weights(); - double w0 = (weights.size() > 1) ? weights.at(1) : 1.; - double originalXWGTUP = (weights.size() > 1) ? weights.at(1) : 1.; - - std::vector wScale, wPDF, wPS; - for (auto id : scaleWeightIDs) - wScale.push_back(weights.at(id) / w0); - for (auto id : pdfWeightIDs) { - wPDF.push_back(weights.at(id) / w0); - } - - std::string psWeightsDocStr; - setPSWeightInfo(genProd.weights(), weightChoice, wPS, psWeightsDocStr); - - outScale = std::make_unique(wScale.size(), "LHEScaleWeight", false); - outScale->addColumn("", wScale, weightChoice->scaleWeightsDoc, lheWeightPrecision_); - - outPdf = std::make_unique(wPDF.size(), "LHEPdfWeight", false); - outPdf->addColumn("", wPDF, weightChoice->pdfWeightsDoc, lheWeightPrecision_); - - outPS = std::make_unique(wPS.size(), "PSWeight", false); - outPS->addColumn("", wPS, psWeightsDocStr, lheWeightPrecision_); - - outNamed = std::make_unique(1, "LHEWeight", true); - outNamed->addColumnValue("originalXWGTUP", originalXWGTUP, "Nominal event weight in the LHE file"); - /*for (unsigned int i = 0, n = wNamed.size(); i < n; ++i) { - outNamed->addColumnValue(namedWeightLabels_[i], wNamed[i], "LHE weight for id "+namedWeightIDs_[i]+", relative to nominal", lheWeightPrecision_); - }*/ - - counter->incLHE(genWeight, wScale, wPDF, std::vector(), std::vector(), wPS); - } - - void fillOnlyPSWeightTable(Counter* counter, - const DynamicWeightChoiceGenInfo* genWeightChoice, - double genWeight, - const GenEventInfoProduct& genProd, - std::unique_ptr& outPS) const { - std::vector wPS; - std::string psWeightDocStr; - setPSWeightInfo(genProd.weights(), genWeightChoice, wPS, psWeightDocStr); - outPS = std::make_unique(wPS.size(), "PSWeight", false); - outPS->addColumn("", wPS, psWeightDocStr, lheWeightPrecision_); - - counter->incGenOnly(genWeight); - counter->incPSOnly(genWeight, wPS); - } - - void setPSWeightInfo(const std::vector& genWeights, - const DynamicWeightChoiceGenInfo* genWeightChoice, - std::vector& wPS, - std::string& psWeightDocStr) const { - wPS.clear(); - // isRegularPSSet = keeping all weights and the weights are a usual size, ie - // all weights are PS weights (don't use header incase missing names) - bool isRegularPSSet = keepAllPSWeights_ && (genWeights.size() == 14 || genWeights.size() == 46); - if (!genWeightChoice->psWeightIDs.empty() && !isRegularPSSet) { - psWeightDocStr = genWeightChoice->psWeightsDoc; - double psNom = genWeights.at(genWeightChoice->psBaselineID); - for (auto wgtidx : genWeightChoice->psWeightIDs) { - wPS.push_back(genWeights.at(wgtidx) / psNom); - } - } else { - int vectorSize = - keepAllPSWeights_ ? (genWeights.size() - 2) : ((genWeights.size() == 14 || genWeights.size() == 46) ? 4 : 1); - - if (vectorSize > 1) { - double nominal = genWeights.at(1); // Called 'Baseline' in GenLumiInfoHeader - if (keepAllPSWeights_) { - for (int i = 0; i < vectorSize; i++) { - wPS.push_back(genWeights.at(i + 2) / nominal); - } - psWeightDocStr = "All PS weights (w_var / w_nominal)"; - } else { - if (!psWeightWarning_.exchange(true)) - edm::LogWarning("LHETablesProducer") - << "GenLumiInfoHeader not found: Central PartonShower weights will fill with the 6-10th entries \n" - << " This may incorrect for some mcs (madgraph 2.6.1 with its `isr:murfact=0.5` have a differnt " - "order )"; - for (std::size_t i = 6; i < 10; i++) { - wPS.push_back(genWeights.at(i) / nominal); - } - psWeightDocStr = - "PS weights (w_var / w_nominal); [0] is ISR=2 FSR=1; [1] is ISR=1 FSR=2" - "[2] is ISR=0.5 FSR=1; [3] is ISR=1 FSR=0.5;"; - } - } else { - wPS.push_back(1.0); - psWeightDocStr = "dummy PS weight (1.0) "; - } - } - } - - // create an empty counter - std::shared_ptr globalBeginRun(edm::Run const& iRun, edm::EventSetup const&) const override { - edm::Handle lheInfo; - - bool lheDebug = debugRun_.exchange( - false); // make sure only the first thread dumps out this (even if may still be mixed up with other output, but nevermind) - auto weightChoice = std::make_shared(); - - // getByToken throws since we're not in the endRun (see https://github.com/cms-sw/cmssw/pull/18499) - //if (iRun.getByToken(lheRunTag_, lheInfo)) { - for (const auto& lheLabel : lheLabel_) { - iRun.getByLabel(lheLabel, lheInfo); - if (lheInfo.isValid()) { + edm::Handle genWeightInfoHandle; + for (auto& token : genWeightInfoTokens_) { + iLumi.getByToken(token, genWeightInfoHandle); + if (genWeightInfoHandle.isValid()) { break; } } - if (lheInfo.isValid()) { - std::vector scaleVariationIDs; - std::vector pdfSetWeightIDs; - std::vector lheReweighingIDs; - bool isFirstGroup = true; - - std::regex weightgroupmg26x(""); - std::regex weightgroup(""); - std::regex weightgroupRwgt(""); - std::regex endweightgroup(""); - std::regex scalewmg26x( - ""); - std::regex scalewmg26xNew( - ""); - - // MUR=2.0 - std::regex scalew( - "\\s*(?:lhapdf=\\d+|dyn=\\s*-?\\d+)?\\s*((?:mu[rR]|renscfact)" - "=(\\S+)\\s+(?:mu[Ff]|facscfact)=(\\S+)(\\s+.*)?)"); - std::regex pdfw( - "\\s*(?:PDF set|lhapdf|PDF|pdfset)\\s*=\\s*(\\d+)\\s*(?:\\s.*)?"); - std::regex pdfwOld("\\s*Member \\s*(\\d+)\\s*(?:.*)"); - std::regex pdfwmg26x( - "\\s*(?:PDF=(\\d+)\\s*MemberID=(\\d+))?\\s*(?:\\s.*)?"); - // - - // PDF=325300 MemberID=0 - std::regex pdfwmg26xNew( - "" - "\\s*(?:PDF=(\\d+)\\s*MemberID=(\\d+))?\\s*(?:\\s.*)?"); - - std::regex rwgt("(.+)?()?"); - std::smatch groups; - for (auto iter = lheInfo->headers_begin(), end = lheInfo->headers_end(); iter != end; ++iter) { - if (iter->tag() != "initrwgt") { - if (lheDebug) - std::cout << "Skipping LHE header with tag" << iter->tag() << std::endl; - continue; - } - if (lheDebug) - std::cout << "Found LHE header with tag" << iter->tag() << std::endl; - std::vector lines = iter->lines(); - bool missed_weightgroup = - false; //Needed because in some of the samples ( produced with MG26X ) a small part of the header info is ordered incorrectly - bool ismg26x = false; - bool ismg26xNew = false; - for (unsigned int iLine = 0, nLines = lines.size(); iLine < nLines; - ++iLine) { //First start looping through the lines to see which weightgroup pattern is matched - boost::replace_all(lines[iLine], "<", "<"); - boost::replace_all(lines[iLine], ">", ">"); - if (std::regex_search(lines[iLine], groups, weightgroupmg26x)) { - ismg26x = true; - } else if (std::regex_search(lines[iLine], groups, scalewmg26xNew) || - std::regex_search(lines[iLine], groups, pdfwmg26xNew)) { - ismg26xNew = true; - } - } - for (unsigned int iLine = 0, nLines = lines.size(); iLine < nLines; ++iLine) { - if (lheDebug) - std::cout << lines[iLine]; - if (std::regex_search(lines[iLine], groups, ismg26x ? weightgroupmg26x : weightgroup)) { - std::string groupname = groups.str(2); - if (ismg26x) - groupname = groups.str(1); - if (lheDebug) - std::cout << ">>> Looks like the beginning of a weight group for '" << groupname << "'" << std::endl; - if (groupname.find("scale_variation") == 0 || groupname == "Central scale variation" || isFirstGroup) { - if (lheDebug && groupname.find("scale_variation") != 0 && groupname != "Central scale variation") - std::cout << ">>> First weight is not scale variation, but assuming is the Central Weight" << std::endl; - else if (lheDebug) - std::cout << ">>> Looks like scale variation for theory uncertainties" << std::endl; - isFirstGroup = false; - for (++iLine; iLine < nLines; ++iLine) { - if (lheDebug) { - std::cout << " " << lines[iLine]; - } - if (std::regex_search( - lines[iLine], groups, ismg26x ? scalewmg26x : (ismg26xNew ? scalewmg26xNew : scalew))) { - if (lheDebug) - std::cout << " >>> Scale weight " << groups[1].str() << " for " << groups[3].str() << " , " - << groups[4].str() << " , " << groups[5].str() << std::endl; - if (ismg26xNew) { - scaleVariationIDs.emplace_back(groups.str(4), groups.str(1), groups.str(3), groups.str(2)); - } else { - scaleVariationIDs.emplace_back(groups.str(1), groups.str(2), groups.str(3), groups.str(4)); - } - } else if (std::regex_search(lines[iLine], endweightgroup)) { - if (lheDebug) - std::cout << ">>> Looks like the end of a weight group" << std::endl; - if (!missed_weightgroup) { - break; - } else - missed_weightgroup = false; - } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) { - if (lheDebug) - std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end " - "of the group." - << std::endl; - if (ismg26x || ismg26xNew) - missed_weightgroup = true; - --iLine; // rewind by one, and go back to the outer loop - break; - } - } - } else if (groupname == "PDF_variation" || groupname.find("PDF_variation ") == 0) { - if (lheDebug) - std::cout << ">>> Looks like a new-style block of PDF weights for one or more pdfs" << std::endl; - for (++iLine; iLine < nLines; ++iLine) { - if (lheDebug) - std::cout << " " << lines[iLine]; - if (std::regex_search(lines[iLine], groups, pdfw)) { - unsigned int lhaID = std::stoi(groups.str(2)); - if (lheDebug) - std::cout << " >>> PDF weight " << groups.str(1) << " for " << groups.str(2) << " = " << lhaID - << std::endl; - if (pdfSetWeightIDs.empty() || !pdfSetWeightIDs.back().maybe_add(groups.str(1), lhaID)) { - pdfSetWeightIDs.emplace_back(groups.str(1), lhaID); - } - } else if (std::regex_search(lines[iLine], endweightgroup)) { - if (lheDebug) - std::cout << ">>> Looks like the end of a weight group" << std::endl; - if (!missed_weightgroup) { - break; - } else - missed_weightgroup = false; - } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) { - if (lheDebug) - std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end " - "of the group." - << std::endl; - if (ismg26x || ismg26xNew) - missed_weightgroup = true; - --iLine; // rewind by one, and go back to the outer loop - break; - } - } - } else if (groupname == "PDF_variation1" || groupname == "PDF_variation2") { - if (lheDebug) - std::cout << ">>> Looks like a new-style block of PDF weights for multiple pdfs" << std::endl; - unsigned int lastid = 0; - for (++iLine; iLine < nLines; ++iLine) { - if (lheDebug) - std::cout << " " << lines[iLine]; - if (std::regex_search(lines[iLine], groups, pdfw)) { - unsigned int id = std::stoi(groups.str(1)); - unsigned int lhaID = std::stoi(groups.str(2)); - if (lheDebug) - std::cout << " >>> PDF weight " << groups.str(1) << " for " << groups.str(2) << " = " << lhaID - << std::endl; - if (id != (lastid + 1) || pdfSetWeightIDs.empty()) { - pdfSetWeightIDs.emplace_back(groups.str(1), lhaID); - } else { - pdfSetWeightIDs.back().add(groups.str(1), lhaID); - } - lastid = id; - } else if (std::regex_search(lines[iLine], endweightgroup)) { - if (lheDebug) - std::cout << ">>> Looks like the end of a weight group" << std::endl; - if (!missed_weightgroup) { - break; - } else - missed_weightgroup = false; - } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) { - if (lheDebug) - std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end " - "of the group." - << std::endl; - if (ismg26x || ismg26xNew) - missed_weightgroup = true; - --iLine; // rewind by one, and go back to the outer loop - break; - } - } - } else if (lhaNameToID_.find(groupname) != lhaNameToID_.end()) { - if (lheDebug) - std::cout << ">>> Looks like an old-style PDF weight for an individual pdf" << std::endl; - unsigned int firstLhaID = lhaNameToID_.find(groupname)->second; - bool first = true; - for (++iLine; iLine < nLines; ++iLine) { - if (lheDebug) - std::cout << " " << lines[iLine]; - if (std::regex_search( - lines[iLine], groups, ismg26x ? pdfwmg26x : (ismg26xNew ? pdfwmg26xNew : pdfwOld))) { - unsigned int member = 0; - if (!ismg26x && !ismg26xNew) { - member = std::stoi(groups.str(2)); - } else if (ismg26xNew) { - if (!groups.str(3).empty()) { - member = std::stoi(groups.str(3)); - } - } else { - if (!groups.str(4).empty()) { - member = std::stoi(groups.str(4)); - } - } - unsigned int lhaID = member + firstLhaID; - if (lheDebug) - std::cout << " >>> PDF weight " << groups.str(1) << " for " << member << " = " << lhaID - << std::endl; - //if (member == 0) continue; // let's keep also the central value for now - if (first) { - pdfSetWeightIDs.emplace_back(groups.str(1), lhaID); - first = false; - } else { - pdfSetWeightIDs.back().add(groups.str(1), lhaID); - } - } else if (std::regex_search(lines[iLine], endweightgroup)) { - if (lheDebug) - std::cout << ">>> Looks like the end of a weight group" << std::endl; - if (!missed_weightgroup) { - break; - } else - missed_weightgroup = false; - } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) { - if (lheDebug) - std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end " - "of the group." - << std::endl; - if (ismg26x || ismg26xNew) - missed_weightgroup = true; - --iLine; // rewind by one, and go back to the outer loop - break; - } - } - } else if (groupname == "mass_variation" || groupname == "sthw2_variation" || - groupname == "width_variation") { - if (lheDebug) - std::cout << ">>> Looks like an EW parameter weight" << std::endl; - for (++iLine; iLine < nLines; ++iLine) { - if (lheDebug) - std::cout << " " << lines[iLine]; - if (std::regex_search(lines[iLine], groups, rwgt)) { - std::string rwgtID = groups.str(1); - if (lheDebug) - std::cout << " >>> LHE reweighting weight: " << rwgtID << std::endl; - if (std::find(lheReweighingIDs.begin(), lheReweighingIDs.end(), rwgtID) == lheReweighingIDs.end()) { - // we're only interested in the beggining of the block - lheReweighingIDs.emplace_back(rwgtID); - } - } else if (std::regex_search(lines[iLine], endweightgroup)) { - if (lheDebug) - std::cout << ">>> Looks like the end of a weight group" << std::endl; - } - } - } else { - for (++iLine; iLine < nLines; ++iLine) { - if (lheDebug) - std::cout << " " << lines[iLine]; - if (std::regex_search(lines[iLine], groups, endweightgroup)) { - if (lheDebug) - std::cout << ">>> Looks like the end of a weight group" << std::endl; - if (!missed_weightgroup) { - break; - } else - missed_weightgroup = false; - } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) { - if (lheDebug) - std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end " - "of the group." - << std::endl; - if (ismg26x || ismg26xNew) - missed_weightgroup = true; - --iLine; // rewind by one, and go back to the outer loop - break; - } - } - } - } else if (std::regex_search(lines[iLine], groups, weightgroupRwgt)) { - std::string groupname = groups.str(1); - if (groupname.find("mg_reweighting") != std::string::npos) { - if (lheDebug) - std::cout << ">>> Looks like a LHE weights for reweighting" << std::endl; - for (++iLine; iLine < nLines; ++iLine) { - if (lheDebug) - std::cout << " " << lines[iLine]; - if (std::regex_search(lines[iLine], groups, rwgt)) { - std::string rwgtID = groups.str(1); - if (lheDebug) - std::cout << " >>> LHE reweighting weight: " << rwgtID << std::endl; - if (std::find(lheReweighingIDs.begin(), lheReweighingIDs.end(), rwgtID) == lheReweighingIDs.end()) { - // we're only interested in the beggining of the block - lheReweighingIDs.emplace_back(rwgtID); - } - } else if (std::regex_search(lines[iLine], endweightgroup)) { - if (lheDebug) - std::cout << ">>> Looks like the end of a weight group" << std::endl; - if (!missed_weightgroup) { - break; - } else - missed_weightgroup = false; - } else if (std::regex_search(lines[iLine], ismg26x ? weightgroupmg26x : weightgroup)) { - if (lheDebug) - std::cout << ">>> Looks like the beginning of a new weight group, I will assume I missed the end " - "of the group." - << std::endl; - if (ismg26x) - missed_weightgroup = true; - --iLine; // rewind by one, and go back to the outer loop - break; - } - } - } - } - } - //std::cout << "============= END [ " << iter->tag() << " ] ============ \n\n" << std::endl; - - // ----- SCALE VARIATIONS ----- - std::sort(scaleVariationIDs.begin(), scaleVariationIDs.end()); - if (lheDebug) - std::cout << "Found " << scaleVariationIDs.size() << " scale variations: " << std::endl; - std::stringstream scaleDoc; - scaleDoc << "LHE scale variation weights (w_var / w_nominal); "; - for (unsigned int isw = 0, nsw = scaleVariationIDs.size(); isw < nsw; ++isw) { - const auto& sw = scaleVariationIDs[isw]; - if (isw) - scaleDoc << "; "; - scaleDoc << "[" << isw << "] is " << sw.label; - weightChoice->scaleWeightIDs.push_back(sw.wid); - if (lheDebug) - printf(" id %s: scales ren = % .2f fact = % .2f text = %s\n", - sw.wid.c_str(), - sw.scales.first, - sw.scales.second, - sw.label.c_str()); - } - if (!scaleVariationIDs.empty()) - weightChoice->scaleWeightsDoc = scaleDoc.str(); - - // ------ PDF VARIATIONS (take the preferred one) ----- - if (lheDebug) { - std::cout << "Found " << pdfSetWeightIDs.size() << " PDF set errors: " << std::endl; - for (const auto& pw : pdfSetWeightIDs) - printf("lhaIDs %6d - %6d (%3lu weights: %s, ... )\n", - pw.lhaIDs.first, - pw.lhaIDs.second, - pw.wids.size(), - pw.wids.front().c_str()); - } - - // ------ LHE REWEIGHTING ------- - if (lheDebug) { - std::cout << "Found " << lheReweighingIDs.size() << " reweighting weights" << std::endl; - } - std::copy(lheReweighingIDs.begin(), lheReweighingIDs.end(), std::back_inserter(weightChoice->rwgtIDs)); - - std::stringstream pdfDoc; - pdfDoc << "LHE pdf variation weights (w_var / w_nominal) for LHA IDs "; - bool found = false; - for (const auto& pw : pdfSetWeightIDs) { - for (uint32_t lhaid : preferredPDFLHAIDs_) { - if (pw.lhaIDs.first != lhaid && pw.lhaIDs.first != (lhaid + 1)) - continue; // sometimes the first weight is not saved if that PDF is the nominal one for the sample - if (pw.wids.size() == 1) - continue; // only consider error sets - pdfDoc << pw.lhaIDs.first << " - " << pw.lhaIDs.second; - weightChoice->pdfWeightIDs = pw.wids; - if (maxPdfWeights_ < pw.wids.size()) { - weightChoice->pdfWeightIDs.resize(maxPdfWeights_); // drop some replicas - pdfDoc << ", truncated to the first " << maxPdfWeights_ << " replicas"; - } - weightChoice->pdfWeightsDoc = pdfDoc.str(); - found = true; - break; - } - if (found) - break; - } - } - } - return weightChoice; + auto tostore = groupsToStore(foundLheWeights, genWeightInfoHandle, lheWeightInfoHandle); + return std::make_shared(tostore); } + void globalEndLuminosityBlock(edm::LuminosityBlock const&, edm::EventSetup const&) const override {} // create an empty counter - std::unique_ptr beginStream(edm::StreamID) const override { - return std::make_unique(); - } + std::unique_ptr beginStream(edm::StreamID) const override { return std::make_unique(); } // inizialize to zero at begin run void streamBeginRun(edm::StreamID id, edm::Run const&, edm::EventSetup const&) const override { streamCache(id)->clear(); } + void streamBeginLuminosityBlock(edm::StreamID id, edm::LuminosityBlock const& lumiBlock, edm::EventSetup const& eventSetup) const override { - auto counterMap = &(streamCache(id)->countermap); + auto counterMap = streamCache(id); edm::Handle genLumiInfoHead; lumiBlock.getByToken(genLumiInfoHeadTag_, genLumiInfoHead); if (!genLumiInfoHead.isValid()) - edm::LogWarning("LHETablesProducer") - << "No GenLumiInfoHeader product found, will not fill generator model string.\n"; - - std::string label; - if (genLumiInfoHead.isValid()) { - label = genLumiInfoHead->configDescription(); - boost::replace_all(label, "-", "_"); - boost::replace_all(label, "/", "_"); - } - counterMap->setLabel(label); - - if (genLumiInfoHead.isValid()) { - auto weightChoice = &(streamCache(id)->weightChoice); - - std::vector scaleVariationIDs; - std::vector pdfSetWeightIDs; - weightChoice->psWeightIDs.clear(); - - std::regex scalew("LHE,\\s+id\\s+=\\s+(\\d+),\\s+(.+)\\,\\s+mur=(\\S+)\\smuf=(\\S+)"); - std::regex pdfw("LHE,\\s+id\\s+=\\s+(\\d+),\\s+(.+),\\s+Member\\s+(\\d+)\\s+of\\ssets\\s+(\\w+\\b)"); - std::regex mainPSw("sr(Def|:murfac=)(Hi|Lo|_dn|_up|0.5|2.0)"); - std::smatch groups; - auto weightNames = genLumiInfoHead->weightNames(); - std::unordered_map knownPDFSetsFromGenInfo_; - unsigned int weightIter = 0; - for (const auto& line : weightNames) { - if (std::regex_search(line, groups, scalew)) { // scale variation - auto id = groups.str(1); - auto group = groups.str(2); - auto mur = groups.str(3); - auto muf = groups.str(4); - if (group.find("Central scale variation") != std::string::npos) - scaleVariationIDs.emplace_back(groups.str(1), groups.str(2), groups.str(3), groups.str(4)); - } else if (std::regex_search(line, groups, pdfw)) { // PDF variation - auto id = groups.str(1); - auto group = groups.str(2); - auto memberid = groups.str(3); - auto pdfset = groups.str(4); - if (group.find(pdfset) != std::string::npos) { - if (knownPDFSetsFromGenInfo_.find(pdfset) == knownPDFSetsFromGenInfo_.end()) { - knownPDFSetsFromGenInfo_[pdfset] = std::atoi(id.c_str()); - pdfSetWeightIDs.emplace_back(id, std::atoi(id.c_str())); - } else - pdfSetWeightIDs.back().add(id, std::atoi(id.c_str())); - } - } else if (line == "Baseline") { - weightChoice->psBaselineID = weightIter; - } else if (line.find("isr") != std::string::npos || line.find("fsr") != std::string::npos) { - weightChoice->matchPS_alt = line.find("sr:") != std::string::npos; // (f/i)sr: for new weights - if (keepAllPSWeights_) { - weightChoice->psWeightIDs.push_back(weightIter); // PS variations - } else if (std::regex_search(line, groups, mainPSw)) { - if (weightChoice->psWeightIDs.empty()) - weightChoice->psWeightIDs = std::vector(4, -1); - int psIdx = (line.find("fsr") != std::string::npos) ? 1 : 0; - psIdx += (groups.str(2) == "Hi" || groups.str(2) == "_up" || groups.str(2) == "2.0") ? 0 : 2; - weightChoice->psWeightIDs[psIdx] = weightIter; - } - } - weightIter++; - } - if (keepAllPSWeights_) { - weightChoice->psWeightsDoc = "All PS weights (w_var / w_nominal)"; - } else if (weightChoice->psWeightIDs.size() == 4) { - weightChoice->psWeightsDoc = - "PS weights (w_var / w_nominal); [0] is ISR=2 FSR=1; [1] is ISR=1 FSR=2" - "[2] is ISR=0.5 FSR=1; [3] is ISR=1 FSR=0.5;"; - for (int i = 0; i < 4; i++) { - if (static_cast(weightChoice->psWeightIDs[i]) == -1) - weightChoice->setMissingWeight(i); - } - } else { - weightChoice->psWeightsDoc = "dummy PS weight (1.0) "; - } - - weightChoice->scaleWeightIDs.clear(); - weightChoice->pdfWeightIDs.clear(); - - std::sort(scaleVariationIDs.begin(), scaleVariationIDs.end()); - std::stringstream scaleDoc; - scaleDoc << "LHE scale variation weights (w_var / w_nominal); "; - for (unsigned int isw = 0, nsw = scaleVariationIDs.size(); isw < nsw; ++isw) { - const auto& sw = scaleVariationIDs[isw]; - if (isw) - scaleDoc << "; "; - scaleDoc << "[" << isw << "] is " << sw.label; - weightChoice->scaleWeightIDs.push_back(std::atoi(sw.wid.c_str())); - } - if (!scaleVariationIDs.empty()) - weightChoice->scaleWeightsDoc = scaleDoc.str(); - std::stringstream pdfDoc; - pdfDoc << "LHE pdf variation weights (w_var / w_nominal) for LHA names "; - bool found = false; - for (const auto& pw : pdfSetWeightIDs) { - if (pw.wids.size() == 1) - continue; // only consider error sets - for (const auto& wantedpdf : lhaNameToID_) { - auto pdfname = wantedpdf.first; - if (knownPDFSetsFromGenInfo_.find(pdfname) == knownPDFSetsFromGenInfo_.end()) - continue; - uint32_t lhaid = knownPDFSetsFromGenInfo_.at(pdfname); - if (pw.lhaIDs.first != lhaid) - continue; - pdfDoc << pdfname; - for (const auto& x : pw.wids) - weightChoice->pdfWeightIDs.push_back(std::atoi(x.c_str())); - if (maxPdfWeights_ < pw.wids.size()) { - weightChoice->pdfWeightIDs.resize(maxPdfWeights_); // drop some replicas - pdfDoc << ", truncated to the first " << maxPdfWeights_ << " replicas"; - } - weightChoice->pdfWeightsDoc = pdfDoc.str(); - found = true; - break; - } - if (found) - break; - } - } + edm::LogWarning("LHETablesProducer") << "No GenLumiInfoHeader product found, will not fill generator " + "model string.\n"; + counterMap->setLabel(genLumiInfoHead.isValid() ? genLumiInfoHead->configDescription() : ""); + std::string label = genLumiInfoHead.isValid() ? counterMap->getLabel() : "NULL"; } // create an empty counter std::shared_ptr globalBeginRunSummary(edm::Run const&, edm::EventSetup const&) const override { @@ -1075,112 +115,455 @@ class GenWeightsTableProducer : public edm::global::EDProducermerge(streamCache(id)->countermap); - } + CounterMap* runCounterMap) const override; // nothing to do per se void globalEndRunSummary(edm::Run const&, edm::EventSetup const&, CounterMap* runCounterMap) const override {} // write the total to the run - void globalEndRunProduce(edm::Run& iRun, edm::EventSetup const&, CounterMap const* runCounterMap) const override { - auto out = std::make_unique(); + void globalEndRunProduce(edm::Run& iRun, edm::EventSetup const& es, CounterMap const* runCounterMap) const override; + // nothing to do here + // void globalEndRun(edm::Run const& iRun, edm::EventSetup const& es, + // CounterMap* runCounterMap) const override {} - for (const auto& x : runCounterMap->countermap) { - auto runCounter = &(x.second); - std::string label = (!x.first.empty()) ? (std::string("_") + x.first) : ""; - std::string doclabel = (!x.first.empty()) ? (std::string(", for model label ") + x.first) : ""; + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); - out->addInt("genEventCount" + label, "event count" + doclabel, runCounter->num); - out->addFloat("genEventSumw" + label, "sum of gen weights" + doclabel, runCounter->sumw); - out->addFloat("genEventSumw2" + label, "sum of gen (weight^2)" + doclabel, runCounter->sumw2); +protected: + const std::vector> lheWeightTokens_; + const std::vector> lheWeightInfoTokens_; + const std::vector> genWeightTokens_; + const std::vector> genWeightInfoTokens_; + const edm::EDGetTokenT genEventInfoToken_; + const edm::EDGetTokenT genLumiInfoHeadTag_; + const std::vector weightgroups_; + const std::vector outputnames_; + const std::vector maxGroupsPerType_; + const std::vector pdfIds_; + int lheWeightPrecision_; + std::vector unknownOnlyIfEmpty_; + bool keepAllPSWeights_; + bool ignoreLheGroups_; + bool ignoreGenGroups_; + int nStoreUngroupedLhe_; + int nStoreUngroupedGen_; - double norm = runCounter->sumw ? 1.0 / runCounter->sumw : 1; - auto sumScales = runCounter->sumScale; - for (auto& val : sumScales) - val *= norm; - out->addVFloatWithNorm("LHEScaleSumw" + label, - "Sum of genEventWeight * LHEScaleWeight[i], divided by genEventSumw" + doclabel, - sumScales, - runCounter->sumw); - auto sumPDFs = runCounter->sumPDF; - for (auto& val : sumPDFs) - val *= norm; - out->addVFloatWithNorm("LHEPdfSumw" + label, - "Sum of genEventWeight * LHEPdfWeight[i], divided by genEventSumw" + doclabel, - sumPDFs, - runCounter->sumw); - auto sumPS = runCounter->sumPS; - for (auto& val : sumPS) - val *= norm; - out->addVFloatWithNorm("PSSumw" + label, - "Sum of genEventWeight * PSWeight[i], divided by genEventSumw" + doclabel, - sumPS, - runCounter->sumw); - if (!runCounter->sumRwgt.empty()) { - auto sumRwgts = runCounter->sumRwgt; - for (auto& val : sumRwgts) - val *= norm; - out->addVFloatWithNorm("LHEReweightingSumw" + label, - "Sum of genEventWeight * LHEReweightingWeight[i], divided by genEventSumw" + doclabel, - sumRwgts, - runCounter->sumw); - } - if (!runCounter->sumNamed.empty()) { // it could be empty if there's no LHE info in the sample - for (unsigned int i = 0, n = namedWeightLabels_.size(); i < n; ++i) { - out->addFloatWithNorm( - "LHESumw_" + namedWeightLabels_[i] + label, - "Sum of genEventWeight * LHEWeight_" + namedWeightLabels_[i] + ", divided by genEventSumw" + doclabel, - runCounter->sumNamed[i] * norm, - runCounter->sumw); - } - } + enum { inLHE, inGen }; +}; +GenWeightsTableProducer::GenWeightsTableProducer(edm::ParameterSet const& params) + : lheWeightTokens_( + edm::vector_transform(params.getParameter>("lheWeights"), + [this](const edm::InputTag& tag) { return mayConsume(tag); })), + lheWeightInfoTokens_(edm::vector_transform( + params.getParameter>("lheWeights"), + [this](const edm::InputTag& tag) { return mayConsume(tag); })), + genWeightTokens_( + edm::vector_transform(params.getParameter>("genWeights"), + [this](const edm::InputTag& tag) { return mayConsume(tag); })), + genWeightInfoTokens_(edm::vector_transform( + params.getParameter>("genWeights"), + [this](const edm::InputTag& tag) { return mayConsume(tag); })), + genEventInfoToken_(consumes(params.getParameter("genEvent"))), + genLumiInfoHeadTag_( + mayConsume(params.getParameter("genLumiInfoHeader"))), + weightgroups_(edm::vector_transform(params.getParameter>("weightgroups"), + [](auto& c) { return gen::WeightType(c.at(0)); })), + outputnames_(params.getParameter>("outputNames")), + maxGroupsPerType_(params.getParameter>("maxGroupsPerType")), + pdfIds_(params.getUntrackedParameter>("pdfIds", {})), + lheWeightPrecision_(params.getParameter("lheWeightPrecision")), + unknownOnlyIfEmpty_(edm::vector_transform(params.getParameter>("unknownOnlyIfEmpty"), + [](auto& c) { return gen::WeightType(c.at(0)); })), + keepAllPSWeights_(params.getParameter("keepAllPSWeights")), + ignoreLheGroups_(params.getUntrackedParameter("ignoreLheGroups", false)), + ignoreGenGroups_(params.getUntrackedParameter("ignoreGenGroups", false)), + nStoreUngroupedLhe_(params.getUntrackedParameter("nStoreUngroupedLhe", 10)), + nStoreUngroupedGen_(params.getUntrackedParameter("nStoreUngroupedGen", 10)) { + if (weightgroups_.size() != maxGroupsPerType_.size() || weightgroups_.size() != outputnames_.size()) + throw std::invalid_argument( + "Inputs 'weightgroups', 'maxGroupsPerType', and 'outputNames' must " + "have equal size" + "! Found " + + std::to_string(weightgroups_.size()) + "; " + std::to_string(maxGroupsPerType_.size()) + "; " + + std::to_string(outputnames_.size())); + + produces("GENWeight"); + produces(); + produces("genModel"); + produces>("LHEWeightTableVec"); +} + +void GenWeightsTableProducer::produce(edm::StreamID id, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + // access counter for weight sums + Counter& counter = *streamCache(id)->get(); + edm::Handle lheWeightHandle; + bool foundLheWeights = false; + for (auto& token : lheWeightTokens_) { + iEvent.getByToken(token, lheWeightHandle); + if (lheWeightHandle.isValid()) { + foundLheWeights = true; + break; } - iRun.put(std::move(out)); } - // nothing to do here - void globalEndRun(edm::Run const&, edm::EventSetup const&) const override {} - static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { - edm::ParameterSetDescription desc; - desc.add("genEvent", edm::InputTag("generator")) - ->setComment("tag for the GenEventInfoProduct, to get the main weight"); - desc.add("genLumiInfoHeader", edm::InputTag("generator")) - ->setComment("tag for the GenLumiInfoProduct, to get the model string"); - desc.add>("lheInfo", std::vector{{"externalLHEProducer"}, {"source"}}) - ->setComment("tag(s) for the LHE information (LHEEventProduct and LHERunInfoProduct)"); + auto const& genInfo = iEvent.get(genEventInfoToken_); + const double genWeight = genInfo.weight(); + // table for gen info, always available + auto outGeninfo = std::make_unique(1, "genWeight", true); + outGeninfo->setDoc("generator weight"); + outGeninfo->addColumnValue("", genInfo.weight(), "generator weight"); + iEvent.put(std::move(outGeninfo), "GENWeight"); + // this will take care of sum of genWeights + counter.incGenOnly(genWeight); + + std::string& model_label = streamCache(id)->getLabel(); + auto outM = std::make_unique((!model_label.empty()) ? std::string("GenModel_") + model_label : ""); + iEvent.put(std::move(outM), "genModel"); + + WeightsContainer lheWeights; + if (foundLheWeights) { + const GenWeightProduct* lheWeightProduct = lheWeightHandle.product(); + lheWeights = lheWeightProduct->weights(); + } - edm::ParameterSetDescription prefpdf; - prefpdf.add("name"); - prefpdf.add("lhaid"); - desc.addVPSet("preferredPDFs", prefpdf, std::vector()) - ->setComment( - "LHA PDF Ids of the preferred PDF sets, in order of preference (the first matching one will be used)"); - desc.add>("namedWeightIDs")->setComment("set of LHA weight IDs for named LHE weights"); - desc.add>("namedWeightLabels") - ->setComment("output names for the namedWeightIDs (in the same order)"); - desc.add("lheWeightPrecision")->setComment("Number of bits in the mantissa for LHE weights"); - desc.add("maxPdfWeights")->setComment("Maximum number of PDF weights to save (to crop NN replicas)"); - desc.add("keepAllPSWeights")->setComment("Store all PS weights found"); - desc.addOptionalUntracked("debug")->setComment("dump out all LHE information for one event"); - descriptions.add("genWeightsTable", desc); + edm::Handle genWeightHandle; + for (auto& token : genWeightTokens_) { + iEvent.getByToken(token, genWeightHandle); + if (genWeightHandle.isValid()) { + break; + } + } + const GenWeightProduct* genWeightProduct = genWeightHandle.product(); + WeightsContainer genWeights = genWeightProduct->weights(); + + auto const& weightInfos = *luminosityBlockCache(iEvent.getLuminosityBlock().index()); + + // create a container with dummy weight vector + auto weightTablevec = std::make_unique>(); + if (foundLheWeights) { + if (ignoreLheGroups_) { + fillTableIgnoringGroups(*weightTablevec, weightInfos.at(inLHE), lheWeights, nStoreUngroupedLhe_, "LHEWeight"); + } else + addWeightGroupToTable(*weightTablevec, weightInfos.at(inLHE), lheWeights); } -protected: - const edm::EDGetTokenT genTag_; - const std::vector lheLabel_; - const std::vector> lheTag_; - const std::vector> lheRunTag_; - const edm::EDGetTokenT genLumiInfoHeadTag_; + if (ignoreGenGroups_) + fillTableIgnoringGroups(*weightTablevec, weightInfos.at(inGen), genWeights, nStoreUngroupedGen_, "GenWeight"); + else + addWeightGroupToTable(*weightTablevec, weightInfos.at(inGen), genWeights); + + iEvent.put(std::move(weightTablevec), "LHEWeightTableVec"); +} + +// Sequentially add the weights, up to maxStore +// Note that the order of the weights in the WeightsVector matches the order of +// weightgroups. In very rare cases, this could be modified from the order in +// the LHE file. If this happens, write a warning message in the table info +void GenWeightsTableProducer::fillTableIgnoringGroups(std::vector& weightTablevec, + const WeightGroupDataContainer& weightInfos, + WeightsContainer& allWeights, + size_t maxStore, + std::string tablename) const { + std::vector weights(maxStore); + size_t groupIdx = 0; + size_t offset = 0; + std::string tableInfo = "First "; + tableInfo.append(std::to_string(maxStore)); + tableInfo.append(" weights; "); + std::string warnings = ""; + bool foundUnassociated = false; + for (size_t i = 0; i < maxStore; i++) { + if (groupIdx >= allWeights.size()) + throw cms::Exception("GenWeightsTableProducer") + << "Requested " + std::to_string(maxStore) + " weights, which is more than there are in the file"; + size_t entry = i - offset; + auto& weightsForGroup = allWeights.at(groupIdx); + weights.at(i) = weightsForGroup.at(entry); + + if (weightInfos.size() <= groupIdx) + throw cms::Exception("GenWeightsTableProducer") + << "Unable to match weight to one of " << weightInfos.size() << " WeightGroups"; + auto matchingGroup = weightInfos.at(groupIdx).group; + if (entry == 0) { + size_t maxRange = std::min(offset + weightsForGroup.size() - 1, maxStore); + tableInfo.append("["); + tableInfo.append(std::to_string(offset)); + tableInfo.append("]-["); + tableInfo.append(std::to_string(maxRange)); + tableInfo.append("] "); + tableInfo.append(matchingGroup->name()); + tableInfo.append("; "); + } - std::vector preferredPDFLHAIDs_; - std::unordered_map lhaNameToID_; - std::vector namedWeightIDs_; - std::vector namedWeightLabels_; - int lheWeightPrecision_; - unsigned int maxPdfWeights_; - bool keepAllPSWeights_; + // Check if the order corresponds to the LHE file order + try { + auto matchingInfo = matchingGroup->weightMetaInfo(entry); + + if (matchingInfo.globalIndex != i) { + warnings.append("Index "); + warnings.append(std::to_string(i)); + warnings.append( + " does not match order in the LHE file or gen product (where it is " + "entry "); + warnings.append(std::to_string(matchingInfo.globalIndex)); + warnings.append(")"); + } + } catch (cms::Exception& e) { + if (!foundUnassociated) + warnings.append( + "Could not associate some weights to a group. Cannot verify" + " that the order is maintained wrt the LHE file or gen product"); + foundUnassociated = true; + } + if (entry == weightsForGroup.size() - 1) { + groupIdx += 1; + offset += weightsForGroup.size(); + } + } + if (!warnings.empty()) + tableInfo.append("WARNING: " + warnings); + + weightTablevec.emplace_back(weights.size(), tablename, false); + weightTablevec.back().addColumn("", weights, tableInfo, lheWeightPrecision_); +} + +void GenWeightsTableProducer::addWeightGroupToTable(std::vector& weightTablevec, + const WeightGroupDataContainer& weightInfos, + WeightsContainer& allWeights) const { + std::unordered_map typeCount = {}; + for (auto& type : gen::allWeightTypes) + typeCount[type] = 0; + + std::unordered_map weightTypeNames_; + for (size_t i = 0; i < weightgroups_.size(); i++) { + weightTypeNames_[weightgroups_[i]] = outputnames_[i]; + } - mutable std::atomic debug_, debugRun_, hasIssuedWarning_, psWeightWarning_; -}; + for (const auto& groupInfo : weightInfos) { + gen::WeightType weightType = groupInfo.group->weightType(); + std::string entryName = weightTypeNames_.at(weightType); + std::string label = groupInfo.group->description(); + auto weights = allWeights.at(groupInfo.index); + if (weightType == gen::WeightType::kScaleWeights) { + const auto& scaleGroup = *static_cast(groupInfo.group); + auto weightsAndLabel = orderedScaleWeights(weights, scaleGroup); + label.append(weightsAndLabel.first); + weights = weightsAndLabel.second; + } else if (weightType == gen::WeightType::kPartonShowerWeights) { + const auto& psGroup = *static_cast(groupInfo.group); + if (!keepAllPSWeights_) { + auto weightsAndLabel = preferredPSweights(weights, psGroup); + label.append(weightsAndLabel.first); + weights = weightsAndLabel.second; + } else if (psGroup.isWellFormed()) { + double baseline = weights[psGroup.weightIndexFromLabel("Baseline")]; + for (size_t i = 0; i < weights.size(); i++) + weights[i] = weights[i] / baseline; + label = "PS weights (w_var / w_nominal)"; + } else + label.append( + "WARNING: Did not properly parse weight information. Verify order " + "manually."); + } else if (!groupInfo.group->isWellFormed()) + label.append( + "WARNING: Did not properly parse weight information. Verify order " + "manually."); + + if (typeCount[weightType] > 0) { + entryName.append("AltSet"); + entryName.append(std::to_string(typeCount[weightType])); + } + weightTablevec.emplace_back(weights.size(), entryName, false); + weightTablevec.back().addColumn("", weights, label, lheWeightPrecision_); + + typeCount[weightType]++; + } +} + +WeightGroupsToStore GenWeightsTableProducer::groupsToStore( + bool foundLheWeights, + edm::Handle& genWeightInfoHandle, + edm::Handle& lheWeightInfoHandle) const { + std::unordered_map storePerType; + for (size_t i = 0; i < weightgroups_.size(); i++) + storePerType[weightgroups_.at(i)] = maxGroupsPerType_.at(i); + + WeightGroupsToStore weightsToStore; + // The order LHE then GEN is useful for the unknownOnlyIfEmpy check + bool storeUnknown = unknownOnlyIfEmpty_.empty(); + auto groupsToSearch = unknownOnlyIfEmpty_; + for (auto genOrLhe : {inLHE, inGen}) { + bool isLHE = genOrLhe == inLHE; + if (isLHE && !foundLheWeights) + continue; + auto& hand = isLHE ? lheWeightInfoHandle : genWeightInfoHandle; + bool ignoreGroups = isLHE ? ignoreLheGroups_ : ignoreGenGroups_; + auto& toStorePerType = weightsToStore[genOrLhe]; + if (ignoreGroups) { + toStorePerType = hand->allWeightGroupsInfoWithIndices(); + } else { + for (auto& typeAndCount : storePerType) { + if (typeAndCount.first == gen::WeightType::kUnknownWeights && !storeUnknown) + continue; + // Since the count isn't updated, the counts are effectively independent + // between LHE and GEN + auto groupsPerType = weightDataPerType(hand, typeAndCount.first, typeAndCount.second); + // Only store unknown if at least one specified groups is empty + if (!storeUnknown && !groupsToSearch.empty()) { + auto it = std::find(std::begin(groupsToSearch), std::end(groupsToSearch), typeAndCount.first); + if (it != std::end(groupsToSearch)) { + if (groupsPerType.empty()) + storeUnknown = true; + // Remove from array to avoid repeating the check on GEN. NOTE, if + // parton shower weights are included as one of the ones to + // consider, this can cause unknown LHE weights to be stored, given + // the order of the loops + else + groupsToSearch.erase(it); + } + } + toStorePerType.insert(std::end(toStorePerType), std::begin(groupsPerType), std::end(groupsPerType)); + } + } + } + return weightsToStore; +} + +WeightGroupDataContainer GenWeightsTableProducer::weightDataPerType( + edm::Handle& weightsInfoHandle, gen::WeightType weightType, size_t maxStore) const { + WeightGroupDataContainer allgroups; + if (weightType == gen::WeightType::kPdfWeights && !pdfIds_.empty()) { + allgroups = weightsInfoHandle->pdfGroupsWithIndicesByLHAIDs(pdfIds_); + if (allgroups.size() > maxStore) + allgroups.resize(maxStore); + } else + allgroups = weightsInfoHandle->weightGroupsAndIndicesByType(weightType, maxStore); + + return allgroups; +} + +std::pair> GenWeightsTableProducer::orderedScaleWeights( + const std::vector& scaleWeights, const gen::ScaleWeightGroupInfo& scaleGroup) const { + std::vector weights; + std::string labels = "LHE scale variation weights (w_var / w_nominal); "; + if (scaleGroup.isWellFormed()) { + weights.emplace_back(scaleWeights.at(scaleGroup.muR05muF05Index())); + labels += "[0] is muR=0.5 muF=0.5; "; + weights.emplace_back(scaleWeights.at(scaleGroup.muR05muF1Index())); + labels += "[1] is muR=0.5 muF=1; "; + weights.emplace_back(scaleWeights.at(scaleGroup.muR05muF2Index())); + labels += "[2] is muR=0.5 muF=2; "; + weights.emplace_back(scaleWeights.at(scaleGroup.muR1muF05Index())); + labels += "[3] is muR=1 muF=0.5; "; + weights.emplace_back(scaleWeights.at(scaleGroup.centralIndex())); + labels += "[4] is muR=1 muF=1; "; + weights.emplace_back(scaleWeights.at(scaleGroup.muR1muF2Index())); + labels += "[5] is muR=1 muF=2; "; + weights.emplace_back(scaleWeights.at(scaleGroup.muR2muF05Index())); + labels += "[6] is muR=2 muF=0.5; "; + weights.emplace_back(scaleWeights.at(scaleGroup.muR2muF1Index())); + labels += "[7] is muR=2 muF=1; "; + weights.emplace_back(scaleWeights.at(scaleGroup.muR2muF2Index())); + labels += "[8] is muR=2 muF=2"; + } else { + weights = scaleWeights; + size_t nstore = std::min(gen::ScaleWeightGroupInfo::MIN_SCALE_VARIATIONS, weights.size()); + weights = std::vector(begin(weights), std::begin(weights) + nstore); + labels.append("WARNING: Unexpected format found. Contains first " + std::to_string(nstore) + + " elements of weights vector, unordered"); + } + + return std::make_pair(labels, weights); +} + +std::pair> GenWeightsTableProducer::preferredPSweights( + const std::vector& psWeights, const gen::PartonShowerWeightGroupInfo& pswV) const { + std::vector psTosave; + + std::string labels = "PS weights (w_var / w_nominal); "; + if (pswV.isWellFormed()) { + double baseline = psWeights.at(pswV.weightIndexFromLabel("Baseline")); + psTosave.emplace_back(psWeights.at(pswV.variationIndex(true, false, gen::PSVarType::def)) / baseline); + labels += "[0] is ISR=2 FSR=1; "; + psTosave.emplace_back(psWeights.at(pswV.variationIndex(false, false, gen::PSVarType::def)) / baseline); + labels += "[1] is ISR=1 FSR=2; "; + psTosave.emplace_back(psWeights.at(pswV.isrCombinedDownIndex(gen::PSVarType::def)) / baseline); + labels += "[2] is ISR=0.5 FSR=1; "; + psTosave.emplace_back(psWeights.at(pswV.fsrCombinedDownIndex(gen::PSVarType::def)) / baseline); + labels += "[3] is ISR=1 FSR=0.5; "; + } + return std::make_pair(labels, psTosave); +} + +void GenWeightsTableProducer::streamEndRunSummary(edm::StreamID id, + edm::Run const&, + edm::EventSetup const&, + CounterMap* runCounterMap) const { + // this takes care for mergeing all the weight sums + runCounterMap->mergeSumMap(*streamCache(id)); +} + +void GenWeightsTableProducer::globalEndRunProduce(edm::Run& iRun, + edm::EventSetup const&, + CounterMap const* runCounterMap) const { + auto out = std::make_unique(); + + for (auto x : runCounterMap->countermap) { + auto& runCounter = x.second; + std::string label = std::string("_") + x.first; + std::string doclabel = (!x.first.empty()) ? (std::string(", for model label ") + x.first) : ""; + + out->addInt("genEventCount" + label, "event count" + doclabel, runCounter.num_); + out->addFloat("genEventSumw" + label, "sum of gen weights" + doclabel, runCounter.sumw_); + out->addFloat("genEventSumw2" + label, "sum of gen (weight^2)" + doclabel, runCounter.sumw2_); + + double norm = runCounter.sumw_ ? 1.0 / runCounter.sumw_ : 1; + // Sum from map + for (auto& sumw : runCounter.weightSumMap_) { + // Normalize with genEventSumw + for (auto& val : sumw.second) + val *= norm; + out->addVFloat(sumw.first + "Sumw" + label, + "Sum of genEventWeight *" + sumw.first + "[i]/genEventSumw" + doclabel, + sumw.second); + } + } + iRun.put(std::move(out)); +} +void GenWeightsTableProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add>("lheWeights"); + desc.add>("genWeights", std::vector{{"genWeights"}}); + desc.add("genEvent", edm::InputTag("generator")) + ->setComment("tag for the GenEventInfoProduct, to get the main weight"); + desc.add("genLumiInfoHeader", edm::InputTag("generator")) + ->setComment("tag for the GenLumiInfoProduct, to get the model string"); + desc.add>("weightgroups"); + desc.add>("outputNames"); + desc.add>("maxGroupsPerType"); + desc.addOptionalUntracked>("pdfIds"); + desc.add("lheWeightPrecision", -1)->setComment("Number of bits in the mantissa for LHE weights"); + desc.add>("unknownOnlyIfEmpty") + ->setComment( + "Only store weights in an Unknown WeightGroup if one of the " + "specified groups is empty"); + desc.add("keepAllPSWeights", false) + ->setComment("True: stores all PS weights (usually 45); False: saves preferred 4"); + desc.addUntracked("ignoreLheGroups", false) + ->setComment( + "Ignore LHE groups and store the first n weights, regardless of " + "type"); + desc.addUntracked("ignoreGenGroups", false) + ->setComment( + "Ignore Gen groups and store the first n weights, regardless of " + "type"); + desc.addUntracked("nStoreUngroupedLhe", 10) + ->setComment( + "Store the first n LHE weights (only relevant if ignoreLheGroups is " + "true)"); + desc.addUntracked("nStoreUngroupedGen", 10) + ->setComment( + "Store the first n Gen weights (only relevant if ignoreGenGroups is " + "true)"); + descriptions.addDefault(desc); +} #include "FWCore/Framework/interface/MakerMacros.h" DEFINE_FWK_MODULE(GenWeightsTableProducer); diff --git a/PhysicsTools/NanoAOD/plugins/LHETablesProducer.cc b/PhysicsTools/NanoAOD/plugins/LHETablesProducer.cc index f99d453815838..1a6ab2774412a 100644 --- a/PhysicsTools/NanoAOD/plugins/LHETablesProducer.cc +++ b/PhysicsTools/NanoAOD/plugins/LHETablesProducer.cc @@ -1,23 +1,24 @@ -#include "FWCore/Framework/interface/global/EDProducer.h" +#include +#include + +#include "DataFormats/NanoAOD/interface/FlatTable.h" #include "FWCore/Framework/interface/Event.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Framework/interface/global/EDProducer.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/Utilities/interface/transform.h" -#include "DataFormats/NanoAOD/interface/FlatTable.h" #include "SimDataFormats/GeneratorProducts/interface/LHEEventProduct.h" #include "TLorentzVector.h" -#include -#include - class LHETablesProducer : public edm::global::EDProducer<> { public: LHETablesProducer(edm::ParameterSet const& params) : lheTag_(edm::vector_transform(params.getParameter>("lheInfo"), [this](const edm::InputTag& tag) { return mayConsume(tag); })), precision_(params.getParameter("precision")), - storeLHEParticles_(params.getParameter("storeLHEParticles")) { + storeLHEParticles_(params.getParameter("storeLHEParticles")), + storeAllLHEInfo_(params.getParameter("storeAllLHEInfo")) { produces("LHE"); if (storeLHEParticles_) produces("LHEPart"); @@ -40,7 +41,8 @@ class LHETablesProducer : public edm::global::EDProducer<> { if (storeLHEParticles_) iEvent.put(std::move(lhePartTab), "LHEPart"); } else { - if (storeLHEParticles_) { // need to store a dummy table anyway to make the framework happy + if (storeLHEParticles_) { // need to store a dummy table anyway to make + // the framework happy auto lhePartTab = std::make_unique(1, "LHEPart", true); iEvent.put(std::move(lhePartTab), "LHEPart"); } @@ -54,7 +56,9 @@ class LHETablesProducer : public edm::global::EDProducer<> { unsigned int lheNj = 0, lheNb = 0, lheNc = 0, lheNuds = 0, lheNglu = 0; double lheVpt = 0; double alphaS = 0; - + double alphaQED = 0; + double scale = 0; + int idproc = 0; const auto& hepeup = lheProd.hepeup(); const auto& pup = hepeup.PUP; int lep = -1, lepBar = -1, nu = -1, nuBar = -1; @@ -63,18 +67,34 @@ class LHETablesProducer : public edm::global::EDProducer<> { std::vector vals_phi; std::vector vals_mass; std::vector vals_pz; + std::vector vals_time; std::vector vals_pid; std::vector vals_status; std::vector vals_spin; + std::vector vals_col1; + std::vector vals_col2; + std::vector vals_mother1; + std::vector vals_mother2; alphaS = hepeup.AQCDUP; + alphaQED = hepeup.AQEDUP; + scale = hepeup.SCALUP; + idproc = hepeup.IDPRUP; for (unsigned int i = 0, n = pup.size(); i < n; ++i) { int status = hepeup.ISTUP[i]; int idabs = std::abs(hepeup.IDUP[i]); - if (status == 1 || status == -1 || (status == 2 && (idabs >= 23 && idabs <= 25))) { - TLorentzVector p4(pup[i][0], pup[i][1], pup[i][2], pup[i][3]); // x,y,z,t + if (status == 1 || status == -1 || (status == 2 && (idabs >= 23 && idabs <= 25)) || storeAllLHEInfo_) { + TLorentzVector p4(pup[i][0], pup[i][1], pup[i][2], + pup[i][3]); // x,y,z,t vals_pid.push_back(hepeup.IDUP[i]); vals_spin.push_back(hepeup.SPINUP[i]); vals_status.push_back(status); + if (storeAllLHEInfo_) { + vals_col1.push_back(hepeup.ICOLUP[i].first); + vals_col2.push_back(hepeup.ICOLUP[i].second); + vals_mother1.push_back(hepeup.MOTHUP[i].first); + vals_mother2.push_back(hepeup.MOTHUP[i].second); + vals_time.push_back(hepeup.VTIMUP[i]); + } if (status == -1) { vals_pt.push_back(0); vals_eta.push_back(0); @@ -103,9 +123,9 @@ class LHETablesProducer : public edm::global::EDProducer<> { // HT double pt = std::hypot(pup[i][0], pup[i][1]); // first entry is px, second py lheHT += pt; - int mothIdx = std::max( - hepeup.MOTHUP[i].first - 1, - 0); //first and last mother as pair; first entry has index 1 in LHE; incoming particles return motherindex 0 + int mothIdx = std::max(hepeup.MOTHUP[i].first - 1, + 0); // first and last mother as pair; first entry has index 1 in + // LHE; incoming particles return motherindex 0 int mothIdxTwo = std::max(hepeup.MOTHUP[i].second - 1, 0); int mothStatus = hepeup.ISTUP[mothIdx]; int mothStatusTwo = hepeup.ISTUP[mothIdxTwo]; @@ -143,7 +163,11 @@ class LHETablesProducer : public edm::global::EDProducer<> { out.addColumnValue("NpNLO", lheProd.npNLO(), "number of partons at NLO"); out.addColumnValue("NpLO", lheProd.npLO(), "number of partons at LO"); out.addColumnValue("AlphaS", alphaS, "Per-event alphaS"); - + if (storeAllLHEInfo_) { + out.addColumnValue("AlphaQED", alphaQED, "Per-event alphaQED"); + out.addColumnValue("Scale", scale, "Per-event scale"); + out.addColumnValue("ProcessID", idproc, "Process id (as in the card ordering)"); + } auto outPart = std::make_unique(vals_pt.size(), "LHEPart", false); outPart->addColumn("pt", vals_pt, "Pt of LHE particles", this->precision_); outPart->addColumn("eta", vals_eta, "Pseodorapidity of LHE particles", this->precision_); @@ -153,17 +177,30 @@ class LHETablesProducer : public edm::global::EDProducer<> { outPart->addColumn("pdgId", vals_pid, "PDG ID of LHE particles"); outPart->addColumn("status", vals_status, "LHE particle status; -1:incoming, 1:outgoing"); outPart->addColumn("spin", vals_spin, "Spin of LHE particles"); - + if (storeAllLHEInfo_) { + outPart->addColumn("color1", vals_col1, "First color index of LHE particles"); + outPart->addColumn("color2", vals_col2, "Second color index of LHE particles"); + outPart->addColumn("mother1", vals_mother1, "First mother index of LHE particles"); + outPart->addColumn("mother2", vals_mother2, "Second mother index of LHE particles"); + outPart->addColumn("lifetime", vals_time, "Own lifetime of LHE particles", this->precision_); + } return outPart; } static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { edm::ParameterSetDescription desc; - desc.add>("lheInfo", std::vector{{"externalLHEProducer"}, {"source"}}) + desc.add>( + "lheInfo", std::vector{{"lheWeights"}, {"externalLHEProducer"}, {"source"}}) ->setComment("tag(s) for the LHE information (LHEEventProduct)"); desc.add("precision", -1)->setComment("precision on the 4-momenta of the LHE particles"); desc.add("storeLHEParticles", false) - ->setComment("Whether we want to store the 4-momenta of the status 1 particles at LHE level"); + ->setComment( + "Whether we want to store the 4-momenta of the status 1 particles " + "at LHE level"); + desc.add("storeAllLHEInfo", false) + ->setComment( + "Whether to store the whole set of intermediate LHE particles, not " + "only the status +/-1 ones"); descriptions.add("lheInfoTable", desc); } @@ -171,6 +208,7 @@ class LHETablesProducer : public edm::global::EDProducer<> { const std::vector> lheTag_; const unsigned int precision_; const bool storeLHEParticles_; + const bool storeAllLHEInfo_; }; #include "FWCore/Framework/interface/MakerMacros.h" diff --git a/PhysicsTools/NanoAOD/plugins/NanoAODOutputModule.cc b/PhysicsTools/NanoAOD/plugins/NanoAODOutputModule.cc index 7c5ac3cb70cf2..7367c307f18a9 100644 --- a/PhysicsTools/NanoAOD/plugins/NanoAODOutputModule.cc +++ b/PhysicsTools/NanoAOD/plugins/NanoAODOutputModule.cc @@ -13,45 +13,43 @@ // system include files #include #include +#include #include "Compression.h" #include "TFile.h" #include "TObjString.h" #include "TROOT.h" #include "TTree.h" -#include // user include files -#include "FWCore/Framework/interface/one/OutputModule.h" -#include "FWCore/Framework/interface/RunForOutput.h" -#include "FWCore/Framework/interface/LuminosityBlockForOutput.h" +#include + +#include "DataFormats/NanoAOD/interface/FlatTable.h" +#include "DataFormats/NanoAOD/interface/UniqueString.h" +#include "DataFormats/Provenance/interface/BranchDescription.h" +#include "DataFormats/Provenance/interface/BranchType.h" +#include "DataFormats/Provenance/interface/ProcessHistoryRegistry.h" #include "FWCore/Framework/interface/EventForOutput.h" -#include "FWCore/ServiceRegistry/interface/Service.h" +#include "FWCore/Framework/interface/LuminosityBlockForOutput.h" #include "FWCore/Framework/interface/MakerMacros.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Framework/interface/RunForOutput.h" +#include "FWCore/Framework/interface/one/OutputModule.h" #include "FWCore/MessageLogger/interface/JobReport.h" -#include "FWCore/Utilities/interface/GlobalIdentifier.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ServiceRegistry/interface/Service.h" #include "FWCore/Utilities/interface/Digest.h" +#include "FWCore/Utilities/interface/GlobalIdentifier.h" #include "IOPool/Provenance/interface/CommonProvenanceFiller.h" -#include "DataFormats/Provenance/interface/BranchType.h" -#include "DataFormats/Provenance/interface/BranchDescription.h" -#include "DataFormats/Provenance/interface/ProcessHistoryRegistry.h" -#include "DataFormats/NanoAOD/interface/FlatTable.h" -#include "DataFormats/NanoAOD/interface/UniqueString.h" -#include "PhysicsTools/NanoAOD/plugins/TableOutputBranches.h" -#include "PhysicsTools/NanoAOD/plugins/LumiOutputBranches.h" -#include "PhysicsTools/NanoAOD/plugins/TriggerOutputBranches.h" #include "PhysicsTools/NanoAOD/plugins/EventStringOutputBranches.h" +#include "PhysicsTools/NanoAOD/plugins/LumiOutputBranches.h" #include "PhysicsTools/NanoAOD/plugins/SummaryTableOutputBranches.h" - -#include - +#include "PhysicsTools/NanoAOD/plugins/TableOutputBranches.h" +#include "PhysicsTools/NanoAOD/plugins/TriggerOutputBranches.h" #include "oneapi/tbb/task_arena.h" class NanoAODOutputModule : public edm::one::OutputModule<> { public: NanoAODOutputModule(edm::ParameterSet const& pset); - ~NanoAODOutputModule() override; static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); @@ -69,7 +67,7 @@ class NanoAODOutputModule : public edm::one::OutputModule<> { int m_eventsSinceFlush{0}; std::string m_compressionAlgorithm; bool m_writeProvenance; - bool m_fakeName; //crab workaround, remove after crab is fixed + bool m_fakeName; // crab workaround, remove after crab is fixed int m_autoFlush; edm::ProcessHistoryRegistry m_processHistoryRegistry; edm::JobReport::Token m_jrToken; @@ -126,6 +124,9 @@ class NanoAODOutputModule : public edm::one::OutputModule<> { } m_commonRunBranches; std::vector m_tables; + std::vector m_tableTokens; + std::vector m_tableVectorTokens; + std::vector m_runFlatTableTokens; std::vector m_triggers; bool m_triggers_areSorted = false; std::vector m_evstrings; @@ -161,10 +162,8 @@ NanoAODOutputModule::NanoAODOutputModule(edm::ParameterSet const& pset) m_autoFlush(pset.getUntrackedParameter("autoFlush", -10000000)), m_processHistoryRegistry() {} -NanoAODOutputModule::~NanoAODOutputModule() {} - void NanoAODOutputModule::write(edm::EventForOutput const& iEvent) { - //Get data from 'e' and write it to the file + // Get data from 'e' and write it to the file edm::Service jr; jr->eventWrittenToFile(m_jrToken, iEvent.id().run(), iEvent.id().event()); @@ -179,19 +178,22 @@ void NanoAODOutputModule::write(edm::EventForOutput const& iEvent) { float percentClusterDone = m_firstFlush / static_cast(m_autoFlush); maxMemory = static_cast(m_tree->GetTotBytes()) / percentClusterDone; } else if (m_tree->GetZipBytes() == 0) { - maxMemory = 100 * 1024 * 1024; // Degenerate case of no information in the tree; arbitrary value + maxMemory = 100 * 1024 * 1024; // Degenerate case of no information in + // the tree; arbitrary value } else { - // Estimate the memory we'll be using by scaling the current compression ratio. + // Estimate the memory we'll be using by scaling the current compression + // ratio. float cxnRatio = m_tree->GetTotBytes() / static_cast(m_tree->GetZipBytes()); maxMemory = -m_autoFlush * cxnRatio; float percentBytesDone = -m_tree->GetZipBytes() / static_cast(m_autoFlush); m_autoFlush = m_firstFlush / percentBytesDone; } - //std::cout << "OptimizeBaskets: total bytes " << m_tree->GetTotBytes() << std::endl; - //std::cout << "OptimizeBaskets: zip bytes " << m_tree->GetZipBytes() << std::endl; - //std::cout << "OptimizeBaskets: autoFlush " << m_autoFlush << std::endl; - //std::cout << "OptimizeBaskets: maxMemory " << static_cast(maxMemory) << std::endl; - //m_tree->OptimizeBaskets(static_cast(maxMemory), 1, "d"); + // std::cout << "OptimizeBaskets: total bytes " << m_tree->GetTotBytes() + // << std::endl; std::cout << "OptimizeBaskets: zip bytes " << + // m_tree->GetZipBytes() << std::endl; std::cout << "OptimizeBaskets: + // autoFlush " << m_autoFlush << std::endl; std::cout << "OptimizeBaskets: + // maxMemory " << static_cast(maxMemory) << std::endl; + // m_tree->OptimizeBaskets(static_cast(maxMemory), 1, "d"); m_tree->OptimizeBaskets(static_cast(maxMemory), 1, ""); } if (m_eventsSinceFlush == m_autoFlush) { @@ -203,11 +205,33 @@ void NanoAODOutputModule::write(edm::EventForOutput const& iEvent) { m_commonBranches.fill(iEvent.eventAuxiliary()); // fill all tables, starting from main tables and then doing extension tables + std::vector tables; + for (auto& tableToken : m_tableTokens) { + edm::Handle handle; + iEvent.getByToken(tableToken, handle); + tables.push_back(&(*handle)); + } + for (auto& tableToken : m_tableVectorTokens) { + edm::Handle> handle; + iEvent.getByToken(tableToken, handle); + for (auto const& table : *handle) { + tables.push_back(&table); + } + } + + if (m_tables.empty()) { + m_tables.resize(tables.size()); + } for (unsigned int extensions = 0; extensions <= 1; ++extensions) { - for (auto& t : m_tables) - t.fill(iEvent, *m_tree, extensions); + size_t iTable = 0; + for (auto& table : tables) { + m_tables[iTable].fill(*table, *m_tree, extensions); + ++iTable; + } } - if (!m_triggers_areSorted) { // sort triggers/flags in inverse processHistory order, to save without any special label the most recent ones + if (!m_triggers_areSorted) { // sort triggers/flags in inverse processHistory + // order, to save without any special label the + // most recent ones std::vector pnames; for (auto& p : iEvent.processHistory()) pnames.push_back(p.processName()); @@ -257,8 +281,12 @@ void NanoAODOutputModule::writeRun(edm::RunForOutput const& iRun) { t.fill(iRun, *m_runTree); for (unsigned int extensions = 0; extensions <= 1; ++extensions) { - for (auto& t : m_runFlatTables) - t.fill(iRun, *m_runTree, extensions); + size_t iRunTable = 0; + for (auto& token : m_runFlatTableTokens) { + edm::Handle handle; + iRun.getByToken(token, handle); + m_runFlatTables[iRunTable].fill(*handle, *m_runTree, extensions); + } } edm::Handle hstring; @@ -299,17 +327,14 @@ void NanoAODOutputModule::openFile(edm::FileBlock const&) { m_file->SetCompressionAlgorithm(ROOT::kZLIB); } else if (m_compressionAlgorithm == std::string("LZMA")) { m_file->SetCompressionAlgorithm(ROOT::kLZMA); - } else if (m_compressionAlgorithm == std::string("ZSTD")) { - m_file->SetCompressionAlgorithm(ROOT::kZSTD); - } else if (m_compressionAlgorithm == std::string("LZ4")) { - m_file->SetCompressionAlgorithm(ROOT::kLZ4); } else { throw cms::Exception("Configuration") << "NanoAODOutputModule configured with unknown compression algorithm '" << m_compressionAlgorithm << "'\n" - << "Allowed compression algorithms are ZLIB, LZMA, ZSTD, and LZ4\n"; + << "Allowed compression algorithms are ZLIB and LZMA\n"; } /* Setup file structure here */ m_tables.clear(); + m_tableTokens.clear(); m_triggers.clear(); m_triggers_areSorted = false; m_evstrings.clear(); @@ -317,15 +342,19 @@ void NanoAODOutputModule::openFile(edm::FileBlock const&) { m_lumiTables.clear(); m_lumiTables2.clear(); m_runFlatTables.clear(); + m_runFlatTableTokens.clear(); const auto& keeps = keptProducts(); for (const auto& keep : keeps[edm::InEvent]) { - if (keep.first->className() == "nanoaod::FlatTable") - m_tables.emplace_back(keep.first, keep.second); - else if (keep.first->className() == "edm::TriggerResults") { + if (keep.first->className() == "nanoaod::FlatTable") { + m_tableTokens.emplace_back(keep.second); + } else if (keep.first->className() == "std::vector") { + m_tableVectorTokens.emplace_back(keep.second); + } else if (keep.first->className() == "edm::TriggerResults") { m_triggers.emplace_back(keep.first, keep.second); } else if (keep.first->className() == "std::basic_string >" && keep.first->productInstanceName() == "genModel") { // friendlyClassName == "String" - m_evstrings.emplace_back(keep.first, keep.second, true); // update only at lumiBlock transitions + m_evstrings.emplace_back(keep.first, keep.second, + true); // update only at lumiBlock transitions } else throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className()); } @@ -333,7 +362,7 @@ void NanoAODOutputModule::openFile(edm::FileBlock const&) { for (const auto& keep : keeps[edm::InLumi]) { if (keep.first->className() == "nanoaod::MergeableCounterTable") m_lumiTables.push_back(SummaryTableOutputBranches(keep.first, keep.second)); - else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata") + else if (keep.first->className() == "nanoaod::UniqueString") m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second); else if (keep.first->className() == "nanoaod::FlatTable") m_lumiTables2.push_back(LumiOutputBranches(keep.first, keep.second)); @@ -346,10 +375,10 @@ void NanoAODOutputModule::openFile(edm::FileBlock const&) { for (const auto& keep : keeps[edm::InRun]) { if (keep.first->className() == "nanoaod::MergeableCounterTable") m_runTables.push_back(SummaryTableOutputBranches(keep.first, keep.second)); - else if (keep.first->className() == "nanoaod::UniqueString" && keep.first->moduleLabel() == "nanoMetadata") + else if (keep.first->className() == "nanoaod::UniqueString") m_nanoMetadata.emplace_back(keep.first->productInstanceName(), keep.second); else if (keep.first->className() == "nanoaod::FlatTable") - m_runFlatTables.emplace_back(keep.first, keep.second); + m_runFlatTableTokens.emplace_back(keep.second); else throw cms::Exception("Configuration", "NanoAODOutputModule cannot handle class " + keep.first->className() + " in Run branch"); @@ -408,16 +437,19 @@ void NanoAODOutputModule::fillDescriptions(edm::ConfigurationDescriptions& descr desc.addUntracked("compressionLevel", 9)->setComment("ROOT compression level of output file."); desc.addUntracked("compressionAlgorithm", "ZLIB") - ->setComment("Algorithm used to compress data in the ROOT output file, allowed values are ZLIB and LZMA"); + ->setComment( + "Algorithm used to compress data in the ROOT output file, allowed " + "values are ZLIB and LZMA"); desc.addUntracked("saveProvenance", true) ->setComment("Save process provenance information, e.g. for edmProvDump"); desc.addUntracked("fakeNameForCrab", false) ->setComment( - "Change the OutputModule name in the fwk job report to fake PoolOutputModule. This is needed to run on cran " + "Change the OutputModule name in the fwk job report to fake " + "PoolOutputModule. This is needed to run on cran " "(and publish) till crab is fixed"); desc.addUntracked("autoFlush", -10000000)->setComment("Autoflush parameter for ROOT file"); - //replace with whatever you want to get from the EDM by default + // replace with whatever you want to get from the EDM by default const std::vector keep = {"drop *", "keep nanoaodFlatTable_*Table_*_*", "keep edmTriggerResults_*_*_*", @@ -426,7 +458,7 @@ void NanoAODOutputModule::fillDescriptions(edm::ConfigurationDescriptions& descr "keep nanoaodUniqueString_nanoMetadata_*_*"}; edm::one::OutputModule<>::fillDescription(desc, keep); - //Used by Workflow management for their own meta data + // Used by Workflow management for their own meta data edm::ParameterSetDescription dataSet; dataSet.setAllowAnything(); desc.addUntracked("dataset", dataSet) diff --git a/PhysicsTools/NanoAOD/plugins/TableOutputBranches.cc b/PhysicsTools/NanoAOD/plugins/TableOutputBranches.cc index 291a6af13cda9..a26693f0bcf4f 100644 --- a/PhysicsTools/NanoAOD/plugins/TableOutputBranches.cc +++ b/PhysicsTools/NanoAOD/plugins/TableOutputBranches.cc @@ -69,15 +69,12 @@ void TableOutputBranches::branch(TTree &tree) { } } -void TableOutputBranches::fill(const edm::OccurrenceForOutput &iWhatever, TTree &tree, bool extensions) { +void TableOutputBranches::fill(const nanoaod::FlatTable &tab, TTree &tree, bool extensions) { if (m_extension != DontKnowYetIfMainOrExtension) { if (extensions != m_extension) return; // do nothing, wait to be called with the proper flag } - edm::Handle handle; - iWhatever.getByToken(m_token, handle); - const nanoaod::FlatTable &tab = *handle; m_counter = tab.size(); m_singleton = tab.singleton(); if (!m_branchesBooked) { @@ -92,7 +89,9 @@ void TableOutputBranches::fill(const edm::OccurrenceForOutput &iWhatever, TTree if (!m_singleton && m_extension == IsExtension) { if (m_counter != *reinterpret_cast(m_counterBranch->GetAddress())) { throw cms::Exception("LogicError", - "Mismatch in number of entries between extension and main table for " + tab.name()); + "Mismatch in number of entries between extension " + "and main table for " + + tab.name()); } } for (auto &pair : m_floatBranches) diff --git a/PhysicsTools/NanoAOD/plugins/TableOutputBranches.h b/PhysicsTools/NanoAOD/plugins/TableOutputBranches.h index f408e65759e38..04fe6bf67b6cd 100644 --- a/PhysicsTools/NanoAOD/plugins/TableOutputBranches.h +++ b/PhysicsTools/NanoAOD/plugins/TableOutputBranches.h @@ -1,34 +1,29 @@ #ifndef PhysicsTools_NanoAOD_TableOutputBranches_h #define PhysicsTools_NanoAOD_TableOutputBranches_h +#include + #include #include -#include -#include "FWCore/Framework/interface/OccurrenceForOutput.h" + #include "DataFormats/NanoAOD/interface/FlatTable.h" #include "DataFormats/Provenance/interface/BranchDescription.h" #include "FWCore/Utilities/interface/EDGetToken.h" class TableOutputBranches { public: - TableOutputBranches(const edm::BranchDescription *desc, const edm::EDGetToken &token) - : m_token(token), m_extension(DontKnowYetIfMainOrExtension), m_branchesBooked(false) { - if (desc->className() != "nanoaod::FlatTable") - throw cms::Exception("Configuration", "NanoAODOutputModule can only write out nanoaod::FlatTable objects"); - } - void defineBranchesFromFirstEvent(const nanoaod::FlatTable &tab); void branch(TTree &tree); /// Fill the current table, if extensions == table.extension(). - /// This parameter is used so that the fill is called first for non-extensions and then for extensions - void fill(const edm::OccurrenceForOutput &iWhatever, TTree &tree, bool extensions); + /// This parameter is used so that the fill is called first for non-extensions + /// and then for extensions + void fill(const nanoaod::FlatTable &tab, TTree &tree, bool extensions); private: - edm::EDGetToken m_token; std::string m_baseName; bool m_singleton = false; - enum { IsMain = 0, IsExtension = 1, DontKnowYetIfMainOrExtension = 2 } m_extension; + enum { IsMain = 0, IsExtension = 1, DontKnowYetIfMainOrExtension = 2 } m_extension = DontKnowYetIfMainOrExtension; std::string m_doc; UInt_t m_counter; struct NamedBranchPtr { @@ -47,7 +42,7 @@ class TableOutputBranches { std::vector m_uint8Branches; std::vector m_uint32Branches; std::vector m_doubleBranches; - bool m_branchesBooked; + bool m_branchesBooked = false; template void fillColumn(NamedBranchPtr &pair, const nanoaod::FlatTable &tab) { diff --git a/PhysicsTools/NanoAOD/python/NanoAODEDMEventContent_cff.py b/PhysicsTools/NanoAOD/python/NanoAODEDMEventContent_cff.py index d2e1f939d795e..c30f0c8238d41 100644 --- a/PhysicsTools/NanoAOD/python/NanoAODEDMEventContent_cff.py +++ b/PhysicsTools/NanoAOD/python/NanoAODEDMEventContent_cff.py @@ -3,7 +3,9 @@ NanoAODEDMEventContent = cms.PSet( outputCommands = cms.untracked.vstring( 'drop *', + "keep *_genWeightsTable_*_*", # event data "keep nanoaodFlatTable_*Table_*_*", # event data + "keep nanoaodFlatTables_*Table_*_*", # event data "keep edmTriggerResults_*_*_*", # event data "keep String_*_genModel_*", # generator model data "keep nanoaodMergeableCounterTable_*Table_*_*", # accumulated per/run or per/lumi data @@ -19,12 +21,15 @@ compressionLevel = cms.untracked.int32(9), compressionAlgorithm = cms.untracked.string("LZMA"), ) - -NanoGenOutput = NanoAODEDMEventContent.outputCommands[:] -NanoGenOutput.remove("keep edmTriggerResults_*_*_*") - NANOAODGENEventContent = cms.PSet( + outputCommands = cms.untracked.vstring( + 'drop *', + "keep *_genWeightsTable_*_*", # event data + "keep nanoaodFlatTable_*Table_*_*", # event data + "keep String_*_genModel_*", # generator model data + "keep nanoaodMergeableCounterTable_*Table_*_*", # accumulated per/run or per/lumi data + "keep nanoaodUniqueString_nanoMetadata_*_*", # basic metadata + ), compressionLevel = cms.untracked.int32(9), compressionAlgorithm = cms.untracked.string("LZMA"), - outputCommands = cms.untracked.vstring(NanoGenOutput) ) diff --git a/PhysicsTools/NanoAOD/python/genWeightsTable_cfi.py b/PhysicsTools/NanoAOD/python/genWeightsTable_cfi.py deleted file mode 100644 index 158763fc7d51d..0000000000000 --- a/PhysicsTools/NanoAOD/python/genWeightsTable_cfi.py +++ /dev/null @@ -1,30 +0,0 @@ -import FWCore.ParameterSet.Config as cms - -genWeightsTable = cms.EDProducer("GenWeightsTableProducer", - genEvent = cms.InputTag("generator"), - genLumiInfoHeader = cms.InputTag("generator"), - lheInfo = cms.VInputTag(cms.InputTag("externalLHEProducer"), cms.InputTag("source")), - preferredPDFs = cms.VPSet( # see https://lhapdf.hepforge.org/pdfsets.html - cms.PSet( name = cms.string("NNPDF31_nnlo_hessian_pdfas"), lhaid = cms.uint32(306000) ), - cms.PSet( name = cms.string("NNPDF31_nnlo_as_0118_hessian"), lhaid = cms.uint32(304400) ), - cms.PSet( name = cms.string("NNPDF31_nnlo_as_0118_mc_hessian_pdfas"), lhaid = cms.uint32(325300) ), - cms.PSet( name = cms.string("NNPDF31_nnlo_as_0118_mc"), lhaid = cms.uint32(316200) ), - cms.PSet( name = cms.string("NNPDF31_nnlo_as_0118_nf_4_mc_hessian"), lhaid = cms.uint32(325500) ), - cms.PSet( name = cms.string("NNPDF31_nnlo_as_0118_nf_4"), lhaid = cms.uint32(320900) ), - cms.PSet( name = cms.string("NNPDF30_nlo_as_0118"), lhaid = cms.uint32(260000) ), # for some 92X samples. Note that the nominal weight, 260000, is not included in the LHE ... - cms.PSet( name = cms.string("NNPDF30_lo_as_0130"), lhaid = cms.uint32(262000) ), # some MLM 80X samples have only this (e.g. /store/mc/RunIISummer16MiniAODv2/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/MINIAODSIM/PUMoriond17_80X_mcRun2_asymptotic_2016_TrancheIV_v6_ext1-v2/120000/02A210D6-F5C3-E611-B570-008CFA197BD4.root ) - cms.PSet( name = cms.string("NNPDF30_nlo_nf_4_pdfas"), lhaid = cms.uint32(292000) ), # some FXFX 80X samples have only this (e.g. WWTo1L1Nu2Q, WWTo4Q) - cms.PSet( name = cms.string("NNPDF30_nlo_nf_5_pdfas"), lhaid = cms.uint32(292200) ), # some FXFX 80X samples have only this (e.g. DYJetsToLL_Pt, WJetsToLNu_Pt, DYJetsToNuNu_Pt) - cms.PSet( name = cms.string("PDF4LHC15_nnlo_30_pdfas"), lhaid = cms.uint32(91400) ), - cms.PSet( name = cms.string("PDF4LHC15_nlo_30_pdfas"), lhaid = cms.uint32(90400) ), - cms.PSet( name = cms.string("PDF4LHC15_nlo_30"), lhaid = cms.uint32(90900) ), - ), - namedWeightIDs = cms.vstring(), - namedWeightLabels = cms.vstring(), - lheWeightPrecision = cms.int32(14), - maxPdfWeights = cms.uint32(150), - keepAllPSWeights = cms.bool(False), - debug = cms.untracked.bool(False), -) - -genWeightsTableTask = cms.Task(genWeightsTable) diff --git a/PhysicsTools/NanoAOD/python/genWeights_cff.py b/PhysicsTools/NanoAOD/python/genWeights_cff.py new file mode 100644 index 0000000000000..1d52719713ba3 --- /dev/null +++ b/PhysicsTools/NanoAOD/python/genWeights_cff.py @@ -0,0 +1,34 @@ +import FWCore.ParameterSet.Config as cms +from GeneratorInterface.Core.genWeights_cfi import genWeights +from GeneratorInterface.Core.lheWeights_cfi import lheWeights + +genWeightsNano = genWeights.clone() +genWeightsNano.weightProductLabels = ["genWeights"] + +lheWeightsNano = lheWeights.clone() +lheWeightsNano.weightProductLabels = ["lheWeights"] + +genWeightsTable = cms.EDProducer("GenWeightsTableProducer", + lheWeightPrecision = cms.int32(14), + lheWeights = cms.VInputTag(["lheWeights", "lheWeightsNano"]), + genWeights = cms.VInputTag(["genWeights", "genWeightsNano"]), + genLumiInfoHeader = cms.InputTag("generator"), + # Warning: you can use a full string, but only the first character is read. + # Note also that the capitalization is important! For example, 'parton shower' + # must be lower case and 'PDF' must be capital + weightgroups = cms.vstring(['scale', 'PDF', 'matrix element', 'unknown', 'parton shower']), + outputNames = cms.vstring(['LHEScaleWeight', 'LHEPdfWeight', 'MEParamWeight', 'UnknownWeight', 'PSWeight']), + # Max number of groups to store for each type above, -1 ==> store all found + #maxGroupsPerType = cms.vint32([-1, -1, -1, -1, -1]), + maxGroupsPerType = cms.vint32([1, 1, -1, 1, 1]), + # If empty or not specified, no criteria are applied to filter on LHAPDF IDs + #pdfIds = cms.untracked.vint32([91400, 306000, 260000]), + unknownOnlyIfEmpty = cms.vstring(['scale', 'PDF']), + keepAllPSWeights = cms.bool(False), + # ignoreGenGroups = cms.untracked.bool(True), + # nStoreUngroupedGen = cms.untracked.int32(40), + # ignoreLheGroups = cms.untracked.bool(False), + # nStoreUngroupedLhe = cms.untracked.int32(100), +) + +genWeightsTableTask = cms.Task(lheWeightsNano, genWeightsNano, genWeightsTable) diff --git a/PhysicsTools/NanoAOD/python/nano_cff.py b/PhysicsTools/NanoAOD/python/nano_cff.py index 4e9965a19b938..929e7861523f3 100644 --- a/PhysicsTools/NanoAOD/python/nano_cff.py +++ b/PhysicsTools/NanoAOD/python/nano_cff.py @@ -17,7 +17,7 @@ from PhysicsTools.NanoAOD.ttbarCategorization_cff import * from PhysicsTools.NanoAOD.genparticles_cff import * from PhysicsTools.NanoAOD.particlelevel_cff import * -from PhysicsTools.NanoAOD.genWeightsTable_cfi import * +from PhysicsTools.NanoAOD.genWeights_cff import * from PhysicsTools.NanoAOD.genVertex_cff import * from PhysicsTools.NanoAOD.vertices_cff import * from PhysicsTools.NanoAOD.met_cff import * diff --git a/PhysicsTools/NanoAOD/python/nanogen_cff.py b/PhysicsTools/NanoAOD/python/nanogen_cff.py index a25e26181df18..b22be27f27061 100644 --- a/PhysicsTools/NanoAOD/python/nanogen_cff.py +++ b/PhysicsTools/NanoAOD/python/nanogen_cff.py @@ -4,7 +4,7 @@ from PhysicsTools.NanoAOD.met_cff import metMCTable from PhysicsTools.NanoAOD.genparticles_cff import * from PhysicsTools.NanoAOD.particlelevel_cff import * -from PhysicsTools.NanoAOD.genWeightsTable_cfi import * +from PhysicsTools.NanoAOD.genWeights_cff import * from PhysicsTools.NanoAOD.genVertex_cff import * from PhysicsTools.NanoAOD.common_cff import Var,CandVars @@ -15,25 +15,25 @@ ) nanogenSequence = cms.Sequence( - nanoMetadata+ - cms.Sequence(particleLevelTask)+ - genJetTable+ - patJetPartonsNano+ - genJetFlavourAssociation+ - genJetFlavourTable+ - genJetAK8Table+ - genJetAK8FlavourAssociation+ - genJetAK8FlavourTable+ - cms.Sequence(genTauTask)+ - genTable+ - genFilterTable+ - cms.Sequence(genParticleTablesTask)+ - cms.Sequence(genVertexTablesTask)+ - tautagger+ - rivetProducerHTXS+ - cms.Sequence(particleLevelTablesTask)+ - metMCTable+ - genWeightsTable + nanoMetadata + +cms.Sequence(particleLevelTask) + +genJetTable + +patJetPartonsNano + +genJetFlavourAssociation + +genJetFlavourTable + +genJetAK8Table + +genJetAK8FlavourAssociation + +genJetAK8FlavourTable + +cms.Sequence(genTauTask) + +genTable + +genFilterTable + +cms.Sequence(genParticleTablesTask) + +cms.Sequence(genVertexTablesTask) + +tautagger + +rivetProducerHTXS + +cms.Sequence(particleLevelTablesTask) + +metMCTable + +cms.Sequence(genWeightsTableTask) ) def nanoGenCommonCustomize(process): diff --git a/PhysicsTools/NanoAOD/python/particlelevel_cff.py b/PhysicsTools/NanoAOD/python/particlelevel_cff.py index d6e001b1a5108..48876986e266e 100644 --- a/PhysicsTools/NanoAOD/python/particlelevel_cff.py +++ b/PhysicsTools/NanoAOD/python/particlelevel_cff.py @@ -3,6 +3,7 @@ from PhysicsTools.NanoAOD.simpleCandidateFlatTableProducer_cfi import simpleCandidateFlatTableProducer from PhysicsTools.NanoAOD.simpleSingletonCandidateFlatTableProducer_cfi import simpleSingletonCandidateFlatTableProducer from PhysicsTools.NanoAOD.simpleHTXSFlatTableProducer_cfi import simpleHTXSFlatTableProducer +from PhysicsTools.NanoAOD.lheInfoTable_cfi import lheInfoTable ##################### User floats producers, selectors ########################## @@ -123,11 +124,7 @@ ) ) -lheInfoTable = cms.EDProducer("LHETablesProducer", - lheInfo = cms.VInputTag(cms.InputTag("externalLHEProducer"), cms.InputTag("source")), - precision = cms.int32(14), - storeLHEParticles = cms.bool(True) - ) +lheInfoTable.storeLHEParticles = True particleLevelTask = cms.Task(mergedGenParticles,genParticles2HepMC,particleLevel,tautagger,genParticles2HepMCHiggsVtx,rivetProducerHTXS) particleLevelTablesTask = cms.Task(rivetLeptonTable,rivetPhotonTable,rivetMetTable,HTXSCategoryTable,lheInfoTable) diff --git a/PhysicsTools/NanoAOD/test/testNanoWeights.py b/PhysicsTools/NanoAOD/test/testNanoWeights.py new file mode 100644 index 0000000000000..2f288b8629c91 --- /dev/null +++ b/PhysicsTools/NanoAOD/test/testNanoWeights.py @@ -0,0 +1,29 @@ +import ROOT +import argparse + +def variableAndNumber(varName, tree): + countVar = "n"+varName + if not hasattr(tree, varName): + print("No variable %s found in file" % varName) + else: + count = getattr(tree, countVar) + var = getattr(tree, varName) + print("Found %i entries of %s in file" % (count, varName)) + branch = tree.GetBranch(varName) + print(" --> Desciption:%s" % branch.GetTitle()) + +parser = argparse.ArgumentParser() +parser.add_argument('inputFile', type=str, help='NanoAOD file to process') +parser.add_argument('--scan', action='store_true', help='Scan the weight values') +args = parser.parse_args() + +rtfile = ROOT.TFile(args.inputFile) +tree = rtfile.Get("Events") +tree.GetEntry(0) +variables = ["LHEScaleWeight", "LHEPdfWeight", "MEParamWeight", "UnknownWeight", "PSWeight", ] + +for varName in variables: + variableAndNumber(varName, tree) + +if args.scan: + tree.Scan(":".join(variables)) diff --git a/PhysicsTools/PatAlgos/python/slimming/MicroEventContent_cff.py b/PhysicsTools/PatAlgos/python/slimming/MicroEventContent_cff.py index f19ae24f46cec..23c2bec0ca025 100644 --- a/PhysicsTools/PatAlgos/python/slimming/MicroEventContent_cff.py +++ b/PhysicsTools/PatAlgos/python/slimming/MicroEventContent_cff.py @@ -111,7 +111,7 @@ 'keep recoGenParticles_prunedGenParticles_*_*', 'keep *_packedPFCandidateToGenAssociation_*_*', 'keep *_lostTracksToGenAssociation_*_*', - 'keep LHEEventProduct_*_*_*', + 'keep LHEEventProduct_lheWeights_*_*', 'keep GenFilterInfo_*_*_*', 'keep GenLumiInfoHeader_generator_*_*', 'keep GenLumiInfoProduct_*_*_*', @@ -122,9 +122,12 @@ 'keep *_slimmedGenJetsAK8__*', 'keep *_slimmedGenJetsAK8SoftDropSubJets__*', 'keep *_genMetTrue_*_*', - # RUN 'keep LHERunInfoProduct_*_*_*', 'keep GenRunInfoProduct_*_*_*', + 'keep GenWeightProduct_lheWeights_*_*', + 'keep GenWeightProduct_genWeights_*_*', + 'keep GenWeightInfoProduct_lheWeights_*_*', + 'keep GenWeightInfoProduct_genWeights_*_*', 'keep *_genParticles_xyz0_*', 'keep *_genParticles_t0_*', ) diff --git a/SimDataFormats/GeneratorProducts/BuildFile.xml b/SimDataFormats/GeneratorProducts/BuildFile.xml index 65d414e27c34f..9d22a58964427 100644 --- a/SimDataFormats/GeneratorProducts/BuildFile.xml +++ b/SimDataFormats/GeneratorProducts/BuildFile.xml @@ -4,6 +4,7 @@ + diff --git a/SimDataFormats/GeneratorProducts/interface/GenEventInfoProduct.h b/SimDataFormats/GeneratorProducts/interface/GenEventInfoProduct.h index 680af315176b1..396bce07619a2 100644 --- a/SimDataFormats/GeneratorProducts/interface/GenEventInfoProduct.h +++ b/SimDataFormats/GeneratorProducts/interface/GenEventInfoProduct.h @@ -1,8 +1,8 @@ #ifndef SimDataFormats_GeneratorProducts_GenEventInfoProduct_h #define SimDataFormats_GeneratorProducts_GenEventInfoProduct_h -#include #include +#include #include "SimDataFormats/GeneratorProducts/interface/PdfInfo.h" @@ -31,6 +31,7 @@ class GenEventInfoProduct { std::vector &weights() { return weights_; } const std::vector &weights() const { return weights_; } + void clearWeights() { weights_.clear(); } double weight() const { return weights_.empty() ? 1.0 : weights_[0]; } diff --git a/SimDataFormats/GeneratorProducts/interface/GenWeightInfoProduct.h b/SimDataFormats/GeneratorProducts/interface/GenWeightInfoProduct.h new file mode 100644 index 0000000000000..b89b87672ccb4 --- /dev/null +++ b/SimDataFormats/GeneratorProducts/interface/GenWeightInfoProduct.h @@ -0,0 +1,74 @@ +#ifndef SimDataFormats_GeneratorProducts_GenWeightInfoProduct_h +#define SimDataFormats_GeneratorProducts_GenWeightInfoProduct_h + +#include +#include +#include +#include +#include +#include + +#include "SimDataFormats/GeneratorProducts/interface/LesHouches.h" +#include "SimDataFormats/GeneratorProducts/interface/WeightGroupInfo.h" + +namespace gen { + struct WeightGroupData { + size_t index; + const gen::WeightGroupInfo* group; + }; + + typedef std::vector> WeightGroupInfoContainer; +} // namespace gen + +class GenWeightInfoProduct { +public: + GenWeightInfoProduct() {} + GenWeightInfoProduct(gen::WeightGroupInfoContainer& weightGroups); + GenWeightInfoProduct(std::unique_ptr other) { copy(*other); } + GenWeightInfoProduct(const GenWeightInfoProduct& other) { copy(other); } + GenWeightInfoProduct& operator=(const GenWeightInfoProduct& other) { + copy(other); + return *this; + } + void copy(const GenWeightInfoProduct& other) { + for (auto& ptr : other.weightGroupsInfo_) { + std::unique_ptr cloneptr(ptr->clone()); + weightGroupsInfo_.emplace_back(std::move(cloneptr)); + } + unassociatedIdx_ = other.unassociatedIdx_; + } + ~GenWeightInfoProduct() {} + + const gen::WeightGroupInfoContainer& allWeightGroupsInfo() const; + const std::vector allWeightGroupsInfoWithIndices() const; + gen::WeightGroupData containingWeightGroupInfo(int index, size_t startSearch = 0) const; + const gen::WeightGroupInfo* orderedWeightGroupInfo(int index) const; + std::vector weightGroupsByType(gen::WeightType type) const; + std::vector weightGroupIndicesByType(gen::WeightType type) const; + std::vector weightGroupsAndIndicesByType(gen::WeightType type, int maxStore = -1) const; + std::optional pdfGroupWithIndexByLHAID(int lhaid) const; + std::vector pdfGroupsWithIndicesByLHAIDs(const std::vector& lhaids) const; + void addWeightGroupInfo(std::unique_ptr info); + const int numberOfGroups() const { return weightGroupsInfo_.size(); } + // If there are unassociated weights, the number of filled groups will be less + // than the number of groups, because the unassociated group can't be filled. + // Likewise the number of weights in the GenWeightInfoProduct product will be + // less than the number of weights in the event + const int numberOfFilledGroups() const { + return std::accumulate(weightGroupsInfo_.begin(), weightGroupsInfo_.end(), 0, [](auto sum, auto& entry) { + return sum + (entry->nIdsContained() > 0 ? 1 : 0); + }); + } + const int numberOfWeights() const { + return std::accumulate(weightGroupsInfo_.begin(), weightGroupsInfo_.end(), 0, [](auto sum, auto& entry) { + return sum + entry->nIdsContained(); + }); + } + const int unassociatedIdx() const { return unassociatedIdx_; } + +private: + gen::WeightGroupInfoContainer weightGroupsInfo_; + int unassociatedIdx_ = -1; +}; + +#endif // GeneratorWeightInfo_LHEInterface_GenWeightInfoProduct_h diff --git a/SimDataFormats/GeneratorProducts/interface/GenWeightProduct.h b/SimDataFormats/GeneratorProducts/interface/GenWeightProduct.h new file mode 100644 index 0000000000000..cd7b8fdaa89ff --- /dev/null +++ b/SimDataFormats/GeneratorProducts/interface/GenWeightProduct.h @@ -0,0 +1,56 @@ +#ifndef SimDataFormats_GeneratorProducts_GenWeightProduct_h +#define SimDataFormats_GeneratorProducts_GenWeightProduct_h + +#include +#include +#include +#include + +#include "FWCore/Utilities/interface/Exception.h" +#include "SimDataFormats/GeneratorProducts/interface/LesHouches.h" +#include "SimDataFormats/GeneratorProducts/interface/WeightsInfo.h" + +typedef std::vector> WeightsContainer; + +class GenWeightProduct { +public: + GenWeightProduct() { + weightsVector_ = {}; + centralWeight_ = 1.; + } + GenWeightProduct(double w0) { + weightsVector_ = {}; + centralWeight_ = w0; + } + GenWeightProduct& operator=(GenWeightProduct&& other) { + weightsVector_ = std::move(other.weightsVector_); + centralWeight_ = other.centralWeight_; + return *this; + } + ~GenWeightProduct() {} + + void setNumWeightSets(int num) { weightsVector_.resize(num); } + void addWeightSet() { weightsVector_.push_back({}); } + void addWeight(double weight, int setEntry, int weightNum) { + if (weightsVector_.empty() && setEntry == 0) + addWeightSet(); + int maxSets = static_cast(weightsVector_.size()); + if (maxSets <= setEntry) + throw cms::Exception("GenWeightProduct") + << "WeightGroup index " << setEntry << " is exceeds the number of WeightGroups expected (max " << maxSets + << " )"; + auto& weights = weightsVector_[setEntry]; + if (static_cast(weights.size()) <= weightNum) { + weights.resize(weightNum + 1); + } + weights[weightNum] = weight / centralWeight_; + } + const WeightsContainer& weights() const { return weightsVector_; } + double centralWeight() const { return centralWeight_; } + +private: + WeightsContainer weightsVector_; + double centralWeight_; +}; + +#endif // GeneratorEvent_LHEInterface_GenWeightProduct_h diff --git a/SimDataFormats/GeneratorProducts/interface/LHEEventProduct.h b/SimDataFormats/GeneratorProducts/interface/LHEEventProduct.h index b2b8e1358b08f..b20ca257b0649 100644 --- a/SimDataFormats/GeneratorProducts/interface/LHEEventProduct.h +++ b/SimDataFormats/GeneratorProducts/interface/LHEEventProduct.h @@ -2,8 +2,8 @@ #define SimDataFormats_GeneratorProducts_LHEEventProduct_h #include -#include #include +#include #include "SimDataFormats/GeneratorProducts/interface/LesHouches.h" #include "SimDataFormats/GeneratorProducts/interface/PdfInfo.h" @@ -22,6 +22,17 @@ class LHEEventProduct { LHEEventProduct(const lhef::HEPEUP &hepeup, const double originalXWGTUP) : hepeup_(hepeup), originalXWGTUP_(originalXWGTUP) {} LHEEventProduct(LHEEventProduct &&other) = default; + LHEEventProduct(const LHEEventProduct &other) { + hepeup_ = other.hepeup_; + comments_ = other.comments_; + if (other.pdf_) + pdf_ = std::make_unique(*other.pdf_); + weights_ = other.weights_; + originalXWGTUP_ = other.originalXWGTUP_; + scales_ = other.scales_; + npLO_ = other.npLO_; + npNLO_ = other.npNLO_; + } LHEEventProduct &operator=(LHEEventProduct &&other) = default; @@ -29,6 +40,7 @@ class LHEEventProduct { void setPDF(const PDF &pdf) { pdf_ = std::make_unique(pdf); } void addWeight(const WGT &wgt) { weights_.push_back(wgt); } + void clearWeights() { weights_.clear(); } void addComment(const std::string &line) { comments_.push_back(line); } double originalXWGTUP() const { return originalXWGTUP_; } @@ -105,9 +117,12 @@ class LHEEventProduct { std::unique_ptr pdf_; std::vector weights_; double originalXWGTUP_; - std::vector scales_; //scale value used to exclude EWK-produced partons from matching - int npLO_; //number of partons for LO process (used to steer matching/merging) - int npNLO_; //number of partons for NLO process (used to steer matching/merging) + std::vector scales_; // scale value used to exclude EWK-produced + // partons from matching + int npLO_; // number of partons for LO process (used to steer + // matching/merging) + int npNLO_; // number of partons for NLO process (used to steer + // matching/merging) }; #endif // GeneratorEvent_LHEInterface_LHEEventProduct_h diff --git a/SimDataFormats/GeneratorProducts/interface/MEParamWeightGroupInfo.h b/SimDataFormats/GeneratorProducts/interface/MEParamWeightGroupInfo.h new file mode 100644 index 0000000000000..f2bddd327839e --- /dev/null +++ b/SimDataFormats/GeneratorProducts/interface/MEParamWeightGroupInfo.h @@ -0,0 +1,21 @@ +#ifndef SimDataFormats_GeneratorProducts_MEParamWeightGroupInfo_h +#define SimDataFormats_GeneratorProducts_MEParamWeightGroupInfo_h + +#include "SimDataFormats/GeneratorProducts/interface/WeightGroupInfo.h" + +namespace gen { + class MEParamWeightGroupInfo : public WeightGroupInfo { + public: + MEParamWeightGroupInfo() : WeightGroupInfo() { weightType_ = WeightType::kMEParamWeights; } + MEParamWeightGroupInfo(std::string header, std::string name) : WeightGroupInfo(header, name) { + weightType_ = WeightType::kMEParamWeights; + } + MEParamWeightGroupInfo(std::string header) : MEParamWeightGroupInfo(header, header) {} + ~MEParamWeightGroupInfo() override {} + MEParamWeightGroupInfo(const MEParamWeightGroupInfo& other) : WeightGroupInfo(other) { copy(other); } + void copy(const MEParamWeightGroupInfo& other); + MEParamWeightGroupInfo* clone() const override; + }; +} // namespace gen + +#endif // SimDataFormats_GeneratorProducts_MEParamWeightGroupInfo_h diff --git a/SimDataFormats/GeneratorProducts/interface/PartonShowerWeightGroupInfo.h b/SimDataFormats/GeneratorProducts/interface/PartonShowerWeightGroupInfo.h new file mode 100644 index 0000000000000..fbfe0a0f1de8c --- /dev/null +++ b/SimDataFormats/GeneratorProducts/interface/PartonShowerWeightGroupInfo.h @@ -0,0 +1,63 @@ +#ifndef SimDataFormats_GeneratorProducts_PartonShowerWeightGroupInfo_h +#define SimDataFormats_GeneratorProducts_PartonShowerWeightGroupInfo_h + +#include "SimDataFormats/GeneratorProducts/interface/WeightGroupInfo.h" + +namespace gen { + enum class PSVarType { muR, cNS, con, def, red, alphaS, LAST }; + enum class PSSplittingType { combined, g2gg, x2xg, g2qq, q2qg }; + + class PartonShowerWeightGroupInfo : public WeightGroupInfo { + public: + PartonShowerWeightGroupInfo() : PartonShowerWeightGroupInfo("") {} + PartonShowerWeightGroupInfo(std::string header, std::string name); + PartonShowerWeightGroupInfo(std::string header) : PartonShowerWeightGroupInfo(header, header) {} + PartonShowerWeightGroupInfo(const PartonShowerWeightGroupInfo &other) : WeightGroupInfo(other) { copy(other); } + ~PartonShowerWeightGroupInfo() override {} + void copy(const PartonShowerWeightGroupInfo &other); + PartonShowerWeightGroupInfo *clone() const override; + void setNameIsPythiaSyntax(bool isPythiaSyntax) { nameIsPythiaSyntax_ = isPythiaSyntax; } + bool nameIsPythiaSyntax() const { return nameIsPythiaSyntax_; } + int variationIndex(bool isISR, bool isUp, PSVarType variationType, PSSplittingType splittingType) const; + std::string variationName(bool isISR, bool isUp, PSVarType variationType, PSSplittingType splittingType) const; + int variationIndex(bool isISR, bool isUp, PSVarType variationType) const; + int isrCombinedUpIndex(PSVarType variationType) const { return variationIndex(true, true, variationType); } + int isrCombinedDownIndex(PSVarType variationType) const { return variationIndex(true, false, variationType); } + int fsrCombinedUpIndex(PSVarType variationType) const { return variationIndex(false, true, variationType); } + int fsrCombinedDownIndex(PSVarType variationType) const { return variationIndex(false, false, variationType); } + static void setGuessPSWeightIdx(bool guessPSWeightIdx) { guessPSWeightIdx_ = guessPSWeightIdx; } + int psWeightIdxGuess(const std::string &varName) const; + void printVariables() const; + + private: + bool nameIsPythiaSyntax_ = false; + static inline bool guessPSWeightIdx_ = false; + + const std::vector expectedOrderPythiaSyntax_ = { + "fsr:murfac=0.707", "fsr:murfac=1.414", "fsr:murfac=0.5", "fsr:murfac=2.0", + "fsr:murfac=0.25", "fsr:murfac=4.0", "fsr:g2gg:murfac=0.5", "fsr:g2gg:murfac=2.0", + "fsr:g2qq:murfac=0.5", "fsr:g2qq:murfac=2.0", "fsr:q2qg:murfac=0.5", "fsr:q2qg:murfac=2.0", + "fsr:x2xg:murfac=0.5", "fsr:x2xg:murfac=2.0", "fsr:g2gg:cns=-2.0", "fsr:g2gg:cns=2.0", + "fsr:g2qq:cns=-2.0", "fsr:g2qq:cns=2.0", "fsr:q2qg:cns=-2.0", "fsr:q2qg:cns=2.0", + "fsr:x2xg:cns=-2.0", "fsr:x2xg:cns=2.0", "isr:murfac=0.707", "isr:murfac=1.414", + "isr:murfac=0.5", "isr:murfac=2.0", "isr:murfac=0.25", "isr:murfac=4.0", + "isr:g2gg:murfac=0.5", "isr:g2gg:murfac=2.0", "isr:g2qq:murfac=0.5", "isr:g2qq:murfac=2.0", + "isr:q2qg:murfac=0.5", "isr:q2qg:murfac=2.0", "isr:x2xg:murfac=0.5", "isr:x2xg:murfac=2.0", + "isr:g2gg:cns=-2.0", "isr:g2gg:cns=2.0", "isr:g2qq:cns=-2.0", "isr:g2qq:cns=2.0", + "isr:q2qg:cns=-2.0", "isr:q2qg:cns=2.0", "isr:x2xg:cns=-2.0", "isr:x2xg:cns=2.0", + }; + const std::vector expectedOrder_ = { + "isrRedHi", "fsrRedHi", "isrRedLo", "fsrRedLo", "isrDefHi", + "fsrDefHi", "isrDefLo", "fsrDefLo", "isrConHi", "fsrConHi", + "isrConLo", "fsrConLo", "fsr_G2GG_muR_dn", "fsr_G2GG_muR_up", "fsr_G2QQ_muR_dn", + "fsr_G2QQ_muR_up", "fsr_Q2QG_muR_dn", "fsr_Q2QG_muR_up", "fsr_X2XG_muR_dn", "fsr_X2XG_muR_up", + "fsr_G2GG_cNS_dn", "fsr_G2GG_cNS_up", "fsr_G2QQ_cNS_dn", "fsr_G2QQ_cNS_up", "fsr_Q2QG_cNS_dn", + "fsr_Q2QG_cNS_up", "fsr_X2XG_cNS_dn", "fsr_X2XG_cNS_up", "isr_G2GG_muR_dn", "isr_G2GG_muR_up", + "isr_G2QQ_muR_dn", "isr_G2QQ_muR_up", "isr_Q2QG_muR_dn", "isr_Q2QG_muR_up", "isr_X2XG_muR_dn", + "isr_X2XG_muR_up", "isr_G2GG_cNS_dn", "isr_G2GG_cNS_up", "isr_G2QQ_cNS_dn", "isr_G2QQ_cNS_up", + "isr_Q2QG_cNS_dn", "isr_Q2QG_cNS_up", "isr_X2XG_cNS_dn", "isr_X2XG_cNS_up", + }; + }; +} // namespace gen + +#endif diff --git a/SimDataFormats/GeneratorProducts/interface/PdfWeightGroupInfo.h b/SimDataFormats/GeneratorProducts/interface/PdfWeightGroupInfo.h new file mode 100644 index 0000000000000..48bc4fe8671ee --- /dev/null +++ b/SimDataFormats/GeneratorProducts/interface/PdfWeightGroupInfo.h @@ -0,0 +1,63 @@ +#ifndef SimDataFormats_GeneratorProducts_PdfWeightGroupInfo_h +#define SimDataFormats_GeneratorProducts_PdfWeightGroupInfo_h + +#include +#include +#include + +#include "LHAPDF/LHAPDF.h" +#include "SimDataFormats/GeneratorProducts/interface/WeightGroupInfo.h" + +namespace gen { + enum PdfUncertaintyType { + kHessianUnc, + kMonteCarloUnc, + kVariationSet, + kUnknownUnc, + }; + + class PdfWeightGroupInfo : public WeightGroupInfo { + private: + PdfUncertaintyType uncertaintyType_; + bool hasAlphasVars_; + int alphasUpIndex_; + int alphasDownIndex_; + int parentLhapdfId_ = -1; + size_t parentLhapdfSize_ = -1; + std::string parentLhapdfError_; + std::vector lhaids_; + int parentLhapdfId(int lhaid) const { return lhaid - LHAPDF::lookupPDF(lhaid).second; } + + public: + PdfWeightGroupInfo() : WeightGroupInfo() { weightType_ = WeightType::kPdfWeights; } + PdfWeightGroupInfo(std::string header, std::string name) : WeightGroupInfo(header, name) { + weightType_ = WeightType::kPdfWeights; + } + PdfWeightGroupInfo(std::string header) : WeightGroupInfo(header) { weightType_ = WeightType::kPdfWeights; } + PdfWeightGroupInfo(const PdfWeightGroupInfo& other) : WeightGroupInfo(other) { copy(other); } + ~PdfWeightGroupInfo() override {} + void copy(const PdfWeightGroupInfo& other); + PdfWeightGroupInfo* clone() const override; + + void setUncertaintyType(PdfUncertaintyType uncertaintyType) { uncertaintyType_ = uncertaintyType; } + void setHasAlphasVariations(bool hasAlphasVars) { hasAlphasVars_ = hasAlphasVars; } + void setAlphasUpIndex(int alphasUpIndex) { alphasUpIndex_ = alphasUpIndex; } + void setAlphasDownIndex(int alphasDownIndex) { alphasDownIndex_ = alphasDownIndex; } + PdfUncertaintyType uncertaintyType() const { return uncertaintyType_; } + bool hasAlphasVariations() const { return hasAlphasVars_; } + void addLhaid(int lhaid); + std::vector& lhaIds() { return lhaids_; } + + bool isIdInParentSet(int lhaid) const { return parentLhapdfId_ == parentLhapdfId(lhaid); } + int parentLhapdfId() const { return parentLhapdfId_; } + void setParentLhapdfInfo(int lhaid); + + // need to remove + bool containsLhapdfId(int lhaid) const { return isIdInParentSet(lhaid); } + + int alphasUpIndex() const { return alphasUpIndex_; } + int alphasDownIndex() const { return alphasDownIndex_; } + }; +} // namespace gen + +#endif // SimDataFormats_GeneratorProducts_PdfWeightGroupInfo_h diff --git a/SimDataFormats/GeneratorProducts/interface/ScaleWeightGroupInfo.h b/SimDataFormats/GeneratorProducts/interface/ScaleWeightGroupInfo.h new file mode 100644 index 0000000000000..8d78f2e8a457e --- /dev/null +++ b/SimDataFormats/GeneratorProducts/interface/ScaleWeightGroupInfo.h @@ -0,0 +1,94 @@ +#ifndef SimDataFormats_GeneratorProducts_ScaleWeightGroupInfo_h +#define SimDataFormats_GeneratorProducts_ScaleWeightGroupInfo_h + +#include +#include + +#include "SimDataFormats/GeneratorProducts/interface/WeightGroupInfo.h" + +namespace gen { + class ScaleWeightGroupInfo : public WeightGroupInfo { + private: + bool isFunctionalFormVar_; + std::vector muIndices_; + bool containsCentral_ = false; + int lhaid_ = -1; + bool weightIsCorrupt_ = false; + // Dyn_scale + std::vector dynNames_; + std::vector> dynVec_; + + inline int indexFromMus(int muR, int muF) const { return 3 * muR + muF; } + inline bool isValidValue(float mu) const { return mu == 0.5 || mu == 1.0 || mu == 2.0; } + + enum scaleIndices { + muR0p5_muF0p5_idx = 0, + muR0p5_muF1_idx = 1, + muR0p5_muF2_idx = 2, + muR1_muF0p5_idx = 3, + Central_idx = 4, + muR1_muF2_idx = 5, + muR2_muF0p5_idx = 6, + muR2_muF1_idx = 7, + muR2_muF2_idx = 8 + }; + + public: + static const unsigned int MIN_SCALE_VARIATIONS = 9; + ScaleWeightGroupInfo() : ScaleWeightGroupInfo("") {} + ScaleWeightGroupInfo(std::string header, std::string name) + : WeightGroupInfo(header, name), muIndices_(MIN_SCALE_VARIATIONS, -1), dynVec_(MIN_SCALE_VARIATIONS) { + weightType_ = WeightType::kScaleWeights; + isFunctionalFormVar_ = false; + } + ScaleWeightGroupInfo(std::string header) : ScaleWeightGroupInfo(header, header) {} + ScaleWeightGroupInfo(const ScaleWeightGroupInfo& other) : WeightGroupInfo(other) { copy(other); } + ~ScaleWeightGroupInfo() override {} + void copy(const ScaleWeightGroupInfo& other); + ScaleWeightGroupInfo* clone() const override; + bool containsCentralWeight() const { return containsCentral_; } + void addContainedId(int globalIndex, std::string id, std::string label, float muR, float muF); + void setWeightIsCorrupt() { + isWellFormed_ = false; + weightIsCorrupt_ = true; + } + + void setMuRMuFIndex(int globalIndex, std::string id, float muR, float muF); + void setDyn(int globalIndex, std::string id, float muR, float muF, size_t dynNum, std::string_view dynName); + int lhaid() { return lhaid_; } + void setLhaid(int lhaid) { lhaid_ = lhaid; } + // Is a variation of the functional form of the dynamic scale + bool isFunctionalFormVariation(); + void setIsFunctionalFormVariation(bool functionalVar) { isFunctionalFormVar_ = functionalVar; } + int centralIndex() const { return muIndices_.at(Central_idx); } + int muR1muF2Index() const { return muIndices_.at(muR1_muF2_idx); } + int muR1muF05Index() const { return muIndices_.at(muR1_muF0p5_idx); } + int muR2muF05Index() const { return muIndices_.at(muR2_muF0p5_idx); } + int muR2muF1Index() const { return muIndices_.at(muR2_muF1_idx); } + int muR2muF2Index() const { return muIndices_.at(muR2_muF2_idx); } + int muR05muF05Index() const { return muIndices_.at(muR0p5_muF0p5_idx); } + int muR05muF1Index() const { return muIndices_.at(muR0p5_muF1_idx); } + int muR05muF2Index() const { return muIndices_.at(muR0p5_muF2_idx); } + + // dynweight version + size_t centralIndex(std::string_view dynName) const { return scaleIndex(Central_idx, dynName); } + size_t muR1muF2Index(std::string_view dynName) const { return scaleIndex(muR1_muF2_idx, dynName); } + size_t muR1muF05Index(std::string_view dynName) const { return scaleIndex(muR1_muF0p5_idx, dynName); } + size_t muR2muF05Index(std::string_view dynName) const { return scaleIndex(muR2_muF0p5_idx, dynName); } + size_t muR2muF1Index(std::string_view dynName) const { return scaleIndex(muR2_muF1_idx, dynName); } + size_t muR2muF2Index(std::string_view dynName) const { return scaleIndex(muR2_muF2_idx, dynName); } + size_t muR05muF05Index(std::string_view dynName) const { return scaleIndex(muR0p5_muF0p5_idx, dynName); } + size_t muR05muF1Index(std::string_view dynName) const { return scaleIndex(muR0p5_muF1_idx, dynName); } + size_t muR05muF2Index(std::string_view dynName) const { return scaleIndex(muR0p5_muF2_idx, dynName); } + + size_t scaleIndex(float muR, float muF, size_t dynNum) const; + size_t scaleIndex(float muR, float muF, std::string_view dynName) const; + size_t scaleIndex(int index, std::string_view dynName) const; + size_t scaleIndex(float muR, float muF) const; + + size_t scaleIndex(int index, size_t dynNum) const { return dynVec_.at(index).at(dynNum); } + std::vector dynNames() const; + }; +} // namespace gen + +#endif diff --git a/SimDataFormats/GeneratorProducts/interface/UnknownWeightGroupInfo.h b/SimDataFormats/GeneratorProducts/interface/UnknownWeightGroupInfo.h new file mode 100644 index 0000000000000..21a20b891e160 --- /dev/null +++ b/SimDataFormats/GeneratorProducts/interface/UnknownWeightGroupInfo.h @@ -0,0 +1,23 @@ +#ifndef SimDataFormats_GeneratorProducts_UnknownWeightGroupInfo_h +#define SimDataFormats_GeneratorProducts_UnknownWeightGroupInfo_h + +#include "SimDataFormats/GeneratorProducts/interface/WeightGroupInfo.h" + +namespace gen { + class UnknownWeightGroupInfo : public WeightGroupInfo { + public: + UnknownWeightGroupInfo() : WeightGroupInfo() { weightType_ = WeightType::kUnknownWeights; } + UnknownWeightGroupInfo(std::string header, std::string name) : WeightGroupInfo(header, name) { + weightType_ = WeightType::kUnknownWeights; + isWellFormed_ = false; + } + UnknownWeightGroupInfo(std::string header) : WeightGroupInfo(header) { + weightType_ = WeightType::kUnknownWeights; + isWellFormed_ = false; + } + ~UnknownWeightGroupInfo() override {} + UnknownWeightGroupInfo* clone() const override; + }; +} // namespace gen + +#endif // SimDataFormats_GeneratorProducts_UnknownWeightGroupInfo_h diff --git a/SimDataFormats/GeneratorProducts/interface/WeightGroupInfo.h b/SimDataFormats/GeneratorProducts/interface/WeightGroupInfo.h new file mode 100644 index 0000000000000..e71d1d0f374a4 --- /dev/null +++ b/SimDataFormats/GeneratorProducts/interface/WeightGroupInfo.h @@ -0,0 +1,104 @@ +#ifndef SimDataFormats_GeneratorProducts_WeightGroupInfo_h +#define SimDataFormats_GeneratorProducts_WeightGroupInfo_h + +/** \class PdfInfo + * + */ +#include +#include +#include +#include +#include + +namespace gen { + struct WeightMetaInfo { + size_t globalIndex; + size_t localIndex; + std::string id; + std::string label; + + bool operator==(const WeightMetaInfo& other) { + return (other.globalIndex == globalIndex && other.localIndex == localIndex && other.id == id && + other.label == label); + } + }; + + enum class WeightType : char { + kPdfWeights = 'P', + kScaleWeights = 's', + kMEParamWeights = 'm', + kPartonShowerWeights = 'p', + kUnknownWeights = 'u', + }; + + const std::array allWeightTypes = {{ + WeightType::kPdfWeights, + WeightType::kScaleWeights, + WeightType::kMEParamWeights, + WeightType::kPartonShowerWeights, + WeightType::kUnknownWeights, + }}; + + class WeightGroupInfo { + public: + WeightGroupInfo() : WeightGroupInfo("") {} + WeightGroupInfo(std::string header, std::string name) + : isWellFormed_(false), headerEntry_(header), name_(name), firstId_(-1), lastId_(-1) {} + WeightGroupInfo(std::string header) + : isWellFormed_(false), headerEntry_(header), name_(header), firstId_(-1), lastId_(-1) {} + WeightGroupInfo(const WeightGroupInfo& other) { copy(other); } + WeightGroupInfo& operator=(const WeightGroupInfo& other) { + copy(other); + return *this; + } + virtual ~WeightGroupInfo(){}; + void copy(const WeightGroupInfo& other); + virtual WeightGroupInfo* clone() const; + const WeightMetaInfo& weightMetaInfo(int weightEntry) const; + const WeightMetaInfo& weightMetaInfoByGlobalIndex(std::string& wgtId, int weightEntry) const; + const WeightMetaInfo& weightMetaInfoByGlobalIndex(int weightEntry) const; + int weightVectorEntry(std::string& wgtId) const; + bool containsWeight(std::string& wgtId, int weightEntry) const; + bool containsWeight(int weightEntry) const; + int weightVectorEntry(std::string& wgtId, int weightEntry) const; + void addContainedId(int weightEntry, std::string id, std::string label); + bool indexInRange(int index) const; + + void setName(std::string name) { name_ = name; } + void setDescription(std::string description) { description_ = description; } + void appendDescription(std::string description) { description_ += description; } + void setHeaderEntry(std::string header) { headerEntry_ = header; } + void setWeightType(WeightType type) { weightType_ = type; } + void setFirstId(int firstId) { firstId_ = firstId; } + void setLastId(int lastId) { lastId_ = lastId; } + // Call before doing lots of searches by label + void cacheWeightIndicesByLabel(); + + std::string name() const { return name_; } + std::string description() const { return description_; } + std::string headerEntry() const { return headerEntry_; } + WeightType weightType() const { return weightType_; } + std::vector idsContained() const { return idsContained_; } + size_t nIdsContained() const { return idsContained_.size(); } + int firstId() const { return firstId_; } + int lastId() const { return lastId_; } + // Store whether the group was fully parsed succesfully + void setIsWellFormed(bool wellFormed) { isWellFormed_ = wellFormed; } + bool isWellFormed() const { return isWellFormed_; } + int weightIndexFromLabel(std::string weightLabel) const; + std::vector weightLabels() const; + + protected: + bool isWellFormed_ = false; + std::string headerEntry_; + std::string name_; + std::string description_; + WeightType weightType_; + std::vector idsContained_; + int firstId_; + int lastId_; + std::unordered_map weightLabelsToIndices_; + }; +} // namespace gen + +#endif // SimDataFormats_GeneratorProducts_WeightGroupInfo_h diff --git a/SimDataFormats/GeneratorProducts/src/GenWeightInfoProduct.cc b/SimDataFormats/GeneratorProducts/src/GenWeightInfoProduct.cc new file mode 100644 index 0000000000000..9d6362d9c0c62 --- /dev/null +++ b/SimDataFormats/GeneratorProducts/src/GenWeightInfoProduct.cc @@ -0,0 +1,131 @@ +#include "SimDataFormats/GeneratorProducts/interface/GenWeightInfoProduct.h" + +#include +#include + +#include "FWCore/Utilities/interface/Exception.h" +#include "SimDataFormats/GeneratorProducts/interface/PdfWeightGroupInfo.h" + +GenWeightInfoProduct::GenWeightInfoProduct(std::vector>& weightGroups) { + // Could just do a std::move on the vector, but copying is safer if the user + // expects the vector to still be usable + for (auto& ptr : weightGroups) { + std::unique_ptr cloneptr(ptr->clone()); + weightGroupsInfo_.emplace_back(std::move(cloneptr)); + } + auto it = std::find_if(std::begin(weightGroupsInfo_), std::end(weightGroupsInfo_), [](auto& entry) { + return entry->name() == "unassociated"; + }); + if (it != std::end(weightGroupsInfo_)) { + unassociatedIdx_ = std::distance(std::begin(weightGroupsInfo_), it); + } else + unassociatedIdx_ = -1; +} + +const std::vector>& GenWeightInfoProduct::allWeightGroupsInfo() const { + return weightGroupsInfo_; +} + +const std::vector GenWeightInfoProduct::allWeightGroupsInfoWithIndices() const { + std::vector groupInfo; + for (size_t i = 0; i < weightGroupsInfo_.size(); i++) + groupInfo.push_back({i, weightGroupsInfo_[i].get()}); + return groupInfo; +} + +gen::WeightGroupData GenWeightInfoProduct::containingWeightGroupInfo(int index, size_t startSearch) const { + // When filling the weights, most likely to find the weight matches the + // previous group or the one after + if (startSearch < weightGroupsInfo_.size() && weightGroupsInfo_[startSearch]->containsWeight(index)) + return {startSearch, weightGroupsInfo_[startSearch].get()}; + else if (startSearch + 1 < weightGroupsInfo_.size() && weightGroupsInfo_[startSearch + 1]->containsWeight(index)) + return {startSearch + 1, weightGroupsInfo_[startSearch + 1].get()}; + + auto it = std::find_if(std::begin(weightGroupsInfo_), std::end(weightGroupsInfo_), [index](auto& entry) { + return entry->containsWeight(index); + }); + if (it != std::end(weightGroupsInfo_)) + return {static_cast(std::distance(std::begin(weightGroupsInfo_), it)), it->get()}; + + throw cms::Exception("GenWeightInfoProduct") << "No weight group found containing weight index " << index + << " in the " << weightGroupsInfo_.size() << " groups."; +} + +const gen::WeightGroupInfo* GenWeightInfoProduct::orderedWeightGroupInfo(int weightGroupIndex) const { + if (weightGroupIndex >= static_cast(weightGroupsInfo_.size())) + throw cms::Exception("GenWeightInfoProduct") << "Weight index requested is outside the range of weights in the " + "product"; + return weightGroupsInfo_[weightGroupIndex].get(); +} + +std::vector GenWeightInfoProduct::weightGroupsAndIndicesByType(gen::WeightType type, + int maxStore) const { + std::vector matchingGroups; + size_t toStore = maxStore <= 0 ? weightGroupsInfo_.size() : std::min(maxStore, weightGroupsInfo_.size()); + for (size_t i = 0; i < weightGroupsInfo_.size(); i++) { + const gen::WeightGroupInfo* group = weightGroupsInfo_[i].get(); + if (group->weightType() == type) { + matchingGroups.push_back({i, group}); + if (matchingGroups.size() == toStore) + break; + } + } + return matchingGroups; +} + +std::vector GenWeightInfoProduct::weightGroupsByType(gen::WeightType type) const { + std::vector matchingGroups; + for (size_t i = 0; i < weightGroupsInfo_.size(); i++) { + const gen::WeightGroupInfo* group = weightGroupsInfo_[i].get(); + if (group->weightType() == type) + matchingGroups.push_back({i, group}); + } + return matchingGroups; +} + +std::optional GenWeightInfoProduct::pdfGroupWithIndexByLHAID(int lhaid) const { + std::vector pdfGroups = weightGroupsAndIndicesByType(gen::WeightType::kPdfWeights); + + auto matchingPdfSet = std::find_if(pdfGroups.begin(), pdfGroups.end(), [lhaid](auto& data) { + const auto* pdfGroup = static_cast(data.group); + return pdfGroup->containsLhapdfId(lhaid); + }); + + return matchingPdfSet == pdfGroups.end() + ? std::nullopt + : std::optional({matchingPdfSet->index, matchingPdfSet->group}); +} + +std::vector GenWeightInfoProduct::pdfGroupsWithIndicesByLHAIDs( + const std::vector& lhaids) const { + auto pdfGroups = weightGroupsAndIndicesByType(gen::WeightType::kPdfWeights); + std::vector groups; + + for (auto lhaid : lhaids) { + auto matchingPdfSet = std::find_if(pdfGroups.begin(), pdfGroups.end(), [lhaid](gen::WeightGroupData& data) { + const auto* pdfGroup = static_cast(data.group); + return pdfGroup->containsLhapdfId(lhaid); + }); + if (matchingPdfSet != pdfGroups.end()) { + pdfGroups.push_back({matchingPdfSet->index, matchingPdfSet->group}); + } + } + + return pdfGroups; +} + +std::vector GenWeightInfoProduct::weightGroupIndicesByType(gen::WeightType type) const { + std::vector matchingGroupIndices; + for (size_t i = 0; i < weightGroupsInfo_.size(); i++) { + if (weightGroupsInfo_[i]->weightType() == type) + matchingGroupIndices.push_back(i); + } + return matchingGroupIndices; +} + +void GenWeightInfoProduct::addWeightGroupInfo(std::unique_ptr info) { + if (info->name() == "unassociated") { + unassociatedIdx_ = weightGroupsInfo_.size(); + } + weightGroupsInfo_.push_back(std::move(info)); +} diff --git a/SimDataFormats/GeneratorProducts/src/MEParamWeightGroupInfo.cc b/SimDataFormats/GeneratorProducts/src/MEParamWeightGroupInfo.cc new file mode 100644 index 0000000000000..81d16402bc2a8 --- /dev/null +++ b/SimDataFormats/GeneratorProducts/src/MEParamWeightGroupInfo.cc @@ -0,0 +1,8 @@ +#include "SimDataFormats/GeneratorProducts/interface/MEParamWeightGroupInfo.h" + +// To be expanded with more specific behaviours in the future +namespace gen { + void MEParamWeightGroupInfo::copy(const MEParamWeightGroupInfo& other) { WeightGroupInfo::copy(other); } + + MEParamWeightGroupInfo* MEParamWeightGroupInfo::clone() const { return new MEParamWeightGroupInfo(*this); } +} // namespace gen diff --git a/SimDataFormats/GeneratorProducts/src/PartonShowerWeightGroupInfo.cc b/SimDataFormats/GeneratorProducts/src/PartonShowerWeightGroupInfo.cc new file mode 100644 index 0000000000000..fe7f5b7519963 --- /dev/null +++ b/SimDataFormats/GeneratorProducts/src/PartonShowerWeightGroupInfo.cc @@ -0,0 +1,124 @@ +#include "SimDataFormats/GeneratorProducts/interface/PartonShowerWeightGroupInfo.h" + +#include +#include + +namespace gen { + PartonShowerWeightGroupInfo::PartonShowerWeightGroupInfo(std::string header, std::string name) + : WeightGroupInfo(header, name) { + weightType_ = WeightType::kPartonShowerWeights; + } + + void PartonShowerWeightGroupInfo::copy(const PartonShowerWeightGroupInfo& other) { + WeightGroupInfo::copy(other); + nameIsPythiaSyntax_ = other.nameIsPythiaSyntax_; + } + + PartonShowerWeightGroupInfo* PartonShowerWeightGroupInfo::clone() const { + return new PartonShowerWeightGroupInfo(*this); + } + + int PartonShowerWeightGroupInfo::variationIndex(bool isISR, bool isUp, PSVarType variationType) const { + return variationIndex(isISR, isUp, variationType, PSSplittingType::combined); + } + + int PartonShowerWeightGroupInfo::variationIndex(bool isISR, + bool isUp, + PSVarType variationType, + PSSplittingType splittingType) const { + std::string varName = variationName(isISR, isUp, variationType, splittingType); + int wgtIdx = weightIndexFromLabel(varName); + // Guess PS idx if not in label list + if (wgtIdx == -1 && guessPSWeightIdx_) + wgtIdx = psWeightIdxGuess(varName); + return wgtIdx; + } + + std::string PartonShowerWeightGroupInfo::variationName(bool isISR, + bool isUp, + PSVarType variationType, + PSSplittingType splittingType) const { + std::string label = isISR ? "isr" : "fsr"; + + // if ((variationType == PSVarType::con || variationType == PSVarType::def || + // variationType == PSVarType::red) && + // splittingType != PSSplittingType::combined) + // throw std::invalid_argument("VariationType must be muR or CNS if + // subprocess is specified"); + + if (nameIsPythiaSyntax_) { + // Splitting + if (splittingType == PSSplittingType::g2gg) + label += ":g2gg"; + else if (splittingType == PSSplittingType::g2qq) + label += ":g2qq"; + else if (splittingType == PSSplittingType::x2xg) + label += ":x2xg"; + else if (splittingType == PSSplittingType::q2qg) + label += ":q2qg"; + // type + if (variationType == PSVarType::con) + label += isUp ? ":murfac=4.0" : ":murfac=0.25"; + else if (variationType == PSVarType::def || variationType == PSVarType::muR) + label += isUp ? ":murfac=2.0" : ":murfac=0.5"; + else if (variationType == PSVarType::red) + label += isUp ? ":murfac=1.414" : ":murfac=0.707"; + else if (variationType == PSVarType::cNS) + label += isUp ? ":cns=2.0" : ":cns=-2.0"; + } else { + // Splitting + if (splittingType == PSSplittingType::g2gg) + label += "_G2GG_"; + else if (splittingType == PSSplittingType::g2qq) + label += "_G2QQ_"; + else if (splittingType == PSSplittingType::x2xg) + label += "_X2XG_"; + else if (splittingType == PSSplittingType::q2qg) + label += "_Q2QG_"; + // type + if (variationType == PSVarType::con) + label += "Con"; + else if (variationType == PSVarType::def) + label += "Def"; + else if (variationType == PSVarType::muR) + label += "muR"; + else if (variationType == PSVarType::red) + label += "Red"; + else if (variationType == PSVarType::cNS) + label += "cNS"; + // Up/Down + if (splittingType != PSSplittingType::combined) { + label += isUp ? "_up" : "_dn"; + } else + label += isUp ? "Hi" : "Lo"; + } + return label; + } + + int PartonShowerWeightGroupInfo::psWeightIdxGuess(const std::string& varName) const { + int wgtIdx; + if (nameIsPythiaSyntax_) { + auto wgtIter = std::find(expectedOrderPythiaSyntax_.begin(), expectedOrderPythiaSyntax_.end(), varName); + wgtIdx = wgtIter - expectedOrderPythiaSyntax_.begin() + 2; + } else { + auto wgtIter = std::find(expectedOrder_.begin(), expectedOrder_.end(), varName); + wgtIdx = wgtIter - expectedOrder_.begin() + 2; + } + if (wgtIdx >= static_cast(nIdsContained())) + wgtIdx = -1; + return wgtIdx; + } + + void PartonShowerWeightGroupInfo::printVariables() const { + const auto& variations = (nameIsPythiaSyntax_) ? expectedOrderPythiaSyntax_ : expectedOrder_; + for (const auto& varName : variations) { + int wgtIdx = weightIndexFromLabel(varName); + // Guess PS idx if not in label list + if (wgtIdx == -1 && guessPSWeightIdx_) + wgtIdx = psWeightIdxGuess(varName); + if (wgtIdx != -1) + std::cout << varName << " : " << wgtIdx << std::endl; + } + } + +} // namespace gen diff --git a/SimDataFormats/GeneratorProducts/src/PdfWeightGroupInfo.cc b/SimDataFormats/GeneratorProducts/src/PdfWeightGroupInfo.cc new file mode 100644 index 0000000000000..818ad16a78ef2 --- /dev/null +++ b/SimDataFormats/GeneratorProducts/src/PdfWeightGroupInfo.cc @@ -0,0 +1,29 @@ +#include "SimDataFormats/GeneratorProducts/interface/PdfWeightGroupInfo.h" + +namespace gen { + void PdfWeightGroupInfo::copy(const PdfWeightGroupInfo& other) { + uncertaintyType_ = other.uncertaintyType(); + hasAlphasVars_ = other.hasAlphasVariations(); + alphasUpIndex_ = other.alphasDownIndex(); + alphasDownIndex_ = other.alphasDownIndex(); + parentLhapdfId_ = other.parentLhapdfId(); + WeightGroupInfo::copy(other); + } + + PdfWeightGroupInfo* PdfWeightGroupInfo::clone() const { return new PdfWeightGroupInfo(*this); } + void PdfWeightGroupInfo::addLhaid(int lhaid) { + lhaids_.push_back(lhaid); + if (lhaids_.size() == parentLhapdfSize_) + setIsWellFormed(true); + else + setIsWellFormed(false); + } + + void PdfWeightGroupInfo::setParentLhapdfInfo(int lhaid) { + parentLhapdfId_ = lhaid; + LHAPDF::PDFSet pdfSet(LHAPDF::lookupPDF(lhaid).first); + parentLhapdfSize_ = pdfSet.size(); + parentLhapdfError_ = pdfSet.errorType(); + } + +} // namespace gen diff --git a/SimDataFormats/GeneratorProducts/src/ScaleWeightGroupInfo.cc b/SimDataFormats/GeneratorProducts/src/ScaleWeightGroupInfo.cc new file mode 100644 index 0000000000000..0e524ee9d415d --- /dev/null +++ b/SimDataFormats/GeneratorProducts/src/ScaleWeightGroupInfo.cc @@ -0,0 +1,105 @@ +#include "SimDataFormats/GeneratorProducts/interface/ScaleWeightGroupInfo.h" + +#include +#include + +namespace gen { + void ScaleWeightGroupInfo::copy(const ScaleWeightGroupInfo& other) { + muIndices_ = other.muIndices_; + dynVec_ = other.dynVec_; + dynNames_ = other.dynNames_; + WeightGroupInfo::copy(other); + } + + ScaleWeightGroupInfo* ScaleWeightGroupInfo::clone() const { return new ScaleWeightGroupInfo(*this); } + + void ScaleWeightGroupInfo::addContainedId(int globalIndex, std::string id, std::string label, float muR, float muF) { + int idxDiff = firstId_ - globalIndex; + if (idxDiff > 0) { + for (auto& entry : muIndices_) { + entry += idxDiff; + } + for (auto& subVec : dynVec_) { + for (auto& entry : subVec) { + entry += idxDiff; + } + } + } + WeightGroupInfo::addContainedId(globalIndex, id, label); + setMuRMuFIndex(globalIndex, id, muR, muF); + } + + void ScaleWeightGroupInfo::setMuRMuFIndex(int globalIndex, std::string id, float muR, float muF) { + auto info = weightMetaInfoByGlobalIndex(id, globalIndex); + int index = indexFromMus(muR, muF); + if (!(isValidValue(muR) && isValidValue(muF))) { + setWeightIsCorrupt(); + return; + } + if (index == Central_idx) + containsCentral_ = true; + muIndices_[index] = info.localIndex; + + for (int muidx : muIndices_) { + if (muidx == -1) + return; + } + isWellFormed_ = !weightIsCorrupt_; + } + + void ScaleWeightGroupInfo::setDyn( + int globalIndex, std::string id, float muR, float muF, size_t dynNum, std::string_view dynName) { + auto info = weightMetaInfoByGlobalIndex(id, globalIndex); + int index = indexFromMus(muR, muF); + // resize if too small + if (dynVec_.at(index).size() < dynNum + 1) { + for (auto& dynIt : dynVec_) + dynIt.resize(dynNum + 1); + dynNames_.resize(dynNum + 1); + } + + if (dynNames_.at(dynNum).empty()) + dynNames_[dynNum] = dynName; + dynVec_[index][dynNum] = info.localIndex; + } + + size_t ScaleWeightGroupInfo::scaleIndex(float muR, float muF, std::string_view dynName) const { + auto it = std::find(dynNames_.begin(), dynNames_.end(), dynName); + if (it == dynNames_.end()) + return -1; + else + return scaleIndex(muR, muF, it - dynNames_.begin()); + } + + size_t ScaleWeightGroupInfo::scaleIndex(int index, std::string_view dynName) const { + auto it = std::find(dynNames_.begin(), dynNames_.end(), dynName); + if (it == dynNames_.end()) + return -1; + else + return scaleIndex(index, it - dynNames_.begin()); + } + + size_t ScaleWeightGroupInfo::scaleIndex(float muR, float muF, size_t dynNum) const { + ; + if (!(isValidValue(muR) && isValidValue(muF)) || dynNum + 1 > dynNames_.size()) + return -1; + else + return scaleIndex(indexFromMus(muR, muF), dynNum); + } + size_t ScaleWeightGroupInfo::scaleIndex(float muR, float muF) const { + if (!(isValidValue(muR) && isValidValue(muF))) + return -1; + else + return muIndices_.at(indexFromMus(muR, muF)); + } + + std::vector ScaleWeightGroupInfo::dynNames() const { + std::vector returnVec; + for (const auto& item : dynNames_) { + if (!item.empty()) + returnVec.push_back(item); + } + return returnVec; + } + +} // namespace gen diff --git a/SimDataFormats/GeneratorProducts/src/UnknownWeightGroupInfo.cc b/SimDataFormats/GeneratorProducts/src/UnknownWeightGroupInfo.cc new file mode 100644 index 0000000000000..8c33d59eff35b --- /dev/null +++ b/SimDataFormats/GeneratorProducts/src/UnknownWeightGroupInfo.cc @@ -0,0 +1,5 @@ +#include "SimDataFormats/GeneratorProducts/interface/UnknownWeightGroupInfo.h" + +namespace gen { + UnknownWeightGroupInfo* UnknownWeightGroupInfo::clone() const { return new UnknownWeightGroupInfo(*this); } +} // namespace gen diff --git a/SimDataFormats/GeneratorProducts/src/WeightGroupInfo.cc b/SimDataFormats/GeneratorProducts/src/WeightGroupInfo.cc new file mode 100644 index 0000000000000..b4dbb36b50778 --- /dev/null +++ b/SimDataFormats/GeneratorProducts/src/WeightGroupInfo.cc @@ -0,0 +1,129 @@ +#include "SimDataFormats/GeneratorProducts/interface/WeightGroupInfo.h" + +#include +#include +#include + +#include "FWCore/Utilities/interface/Exception.h" + +namespace gen { + void WeightGroupInfo::copy(const WeightGroupInfo& other) { + isWellFormed_ = other.isWellFormed_; + headerEntry_ = other.headerEntry_; + name_ = other.name_; + description_ = other.description_; + weightType_ = other.weightType_; + idsContained_ = other.idsContained_; + firstId_ = other.firstId_; + lastId_ = other.lastId_; + } + + WeightGroupInfo* WeightGroupInfo::clone() const { + throw cms::Exception("WeightGroupInfo") << "In group " << name_ + << ": WeightGroupInfo is abstract, so it's clone() method can't be " + "implemented."; + } + + const WeightMetaInfo& WeightGroupInfo::weightMetaInfo(int weightEntry) const { + if (weightEntry < 0 || weightEntry >= static_cast(idsContained_.size())) + throw cms::Exception("WeightGroupInfo") + << "Local index " << std::to_string(weightEntry) << " is not within the range of group " << name_ + << " which has " << idsContained_.size() << " entries\n"; + return idsContained_.at(weightEntry); + } + + const WeightMetaInfo& WeightGroupInfo::weightMetaInfoByGlobalIndex(int weightEntry) const { + std::string emptyLabel = ""; + return weightMetaInfoByGlobalIndex(emptyLabel, weightEntry); + } + + const WeightMetaInfo& WeightGroupInfo::weightMetaInfoByGlobalIndex(std::string& wgtId, int weightEntry) const { + int entry = weightVectorEntry(wgtId, weightEntry); + if (entry < 0 || entry >= static_cast(idsContained_.size())) + throw cms::Exception("WeightGroupInfo") + << "Weight entry " << std::to_string(weightEntry) << " is not a member of group " << name_ + << ". \n firstID = " << std::to_string(firstId_) << " lastId = " << std::to_string(lastId_); + return idsContained_.at(entry); + } + + int WeightGroupInfo::weightVectorEntry(std::string& wgtId) const { return weightVectorEntry(wgtId, 0); } + + bool WeightGroupInfo::containsWeight(std::string& wgtId, int weightEntry) const { + return weightVectorEntry(wgtId, weightEntry) != -1; + } + + bool WeightGroupInfo::containsWeight(int weightEntry) const { + std::string id = std::to_string(weightEntry); + return weightVectorEntry(id, weightEntry) != -1; + } + + int WeightGroupInfo::weightVectorEntry(std::string& wgtId, int weightEntry) const { + // First try direct comparison assuming ordered indices + int orderedEntry = weightEntry - firstId_; + if (indexInRange(weightEntry) && orderedEntry < static_cast(idsContained_.size())) { + if (!wgtId.empty() && idsContained_.at(orderedEntry).id == wgtId) { + return orderedEntry; + } else if (static_cast(idsContained_.at(orderedEntry).globalIndex) == weightEntry) { + return orderedEntry; + } + } + // Fall back to global search on ID or global index + auto it = std::find_if(idsContained_.begin(), idsContained_.end(), [wgtId, weightEntry](const WeightMetaInfo& w) { + return wgtId.empty() ? static_cast(w.globalIndex) == weightEntry : w.id == wgtId; + }); + if (it != idsContained_.end()) + return std::distance(idsContained_.begin(), it); + return -1; + } + + void WeightGroupInfo::addContainedId(int weightEntry, std::string id, std::string label = "") { + if (id.empty()) + id = std::to_string(weightEntry); + + if (firstId_ < 0 || weightEntry < firstId_) { + firstId_ = weightEntry; + for (auto& entry : idsContained_) // Reset if indices need to be shifted + entry.localIndex++; + } + if (weightEntry > lastId_) + lastId_ = weightEntry; + + size_t localIndex = std::min(weightEntry - firstId_, static_cast(idsContained_.size())); + WeightMetaInfo info = {static_cast(weightEntry), localIndex, id, label}; + // logic to insert for all cases e.g. inserting in the middle of the vector + if (localIndex == idsContained_.size()) + idsContained_.emplace_back(info); + else + idsContained_.insert(idsContained_.begin() + localIndex, info); + } + + bool WeightGroupInfo::indexInRange(int index) const { return (index <= lastId_ && index >= firstId_); } + + void WeightGroupInfo::cacheWeightIndicesByLabel() { + for (const auto& weight : idsContained_) + weightLabelsToIndices_[weight.label] = weight.localIndex; + } + + std::vector WeightGroupInfo::weightLabels() const { + std::vector labels; + labels.reserve(idsContained_.size()); + for (const auto& weight : idsContained_) + labels.push_back(weight.label); + return labels; + } + + int WeightGroupInfo::weightIndexFromLabel(std::string weightLabel) const { + if (!weightLabelsToIndices_.empty()) { + if (weightLabelsToIndices_.find(weightLabel) != weightLabelsToIndices_.end()) + return static_cast(weightLabelsToIndices_.at(weightLabel)); + return -1; + } + + auto it = std::find_if( + idsContained_.begin(), idsContained_.end(), [weightLabel](const auto& w) { return weightLabel == w.label; }); + if (it == idsContained_.end()) + return -1; + return std::distance(idsContained_.begin(), it); + } + +} // namespace gen diff --git a/SimDataFormats/GeneratorProducts/src/classes.h b/SimDataFormats/GeneratorProducts/src/classes.h index f4ef658588213..a8cc33fd99d37 100644 --- a/SimDataFormats/GeneratorProducts/src/classes.h +++ b/SimDataFormats/GeneratorProducts/src/classes.h @@ -1,28 +1,34 @@ -#include -#include +#include + #include +#include #include +#include +#include -#include "DataFormats/Common/interface/Wrapper.h" #include "DataFormats/Common/interface/RefVector.h" - +#include "DataFormats/Common/interface/Wrapper.h" +#include "SimDataFormats/GeneratorProducts/interface/ExternalGeneratorEventInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/ExternalGeneratorLumiInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/GenEventInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/GenFilterInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/GenLumiInfoHeader.h" +#include "SimDataFormats/GeneratorProducts/interface/GenLumiInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/GenRunInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/GenWeightInfoProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/GenWeightProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/HepMCProduct.h" #include "SimDataFormats/GeneratorProducts/interface/LHEEventProduct.h" #include "SimDataFormats/GeneratorProducts/interface/LHERunInfoProduct.h" #include "SimDataFormats/GeneratorProducts/interface/LHEXMLStringProduct.h" +#include "SimDataFormats/GeneratorProducts/interface/MEParamWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/PartonShowerWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/PdfWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/ScaleWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/UnknownWeightGroupInfo.h" +#include "SimDataFormats/GeneratorProducts/interface/WeightGroupInfo.h" -#include "SimDataFormats/GeneratorProducts/interface/HepMCProduct.h" - -#include "SimDataFormats/GeneratorProducts/interface/GenRunInfoProduct.h" -#include "SimDataFormats/GeneratorProducts/interface/GenFilterInfo.h" -#include "SimDataFormats/GeneratorProducts/interface/GenEventInfoProduct.h" -#include "SimDataFormats/GeneratorProducts/interface/GenLumiInfoProduct.h" -#include "SimDataFormats/GeneratorProducts/interface/GenLumiInfoHeader.h" -#include "SimDataFormats/GeneratorProducts/interface/ExternalGeneratorLumiInfo.h" -#include "SimDataFormats/GeneratorProducts/interface/ExternalGeneratorEventInfo.h" - -#include - -//needed for backward compatibility between HepMC 2.06.xx and 2.05.yy +// needed for backward compatibility between HepMC 2.06.xx and 2.05.yy namespace hepmc_rootio { void add_to_particles_in(HepMC::GenVertex*, HepMC::GenParticle*); void clear_particles_in(HepMC::GenVertex*); diff --git a/SimDataFormats/GeneratorProducts/src/classes_def.xml b/SimDataFormats/GeneratorProducts/src/classes_def.xml index 8713c2309ca58..4b99a2f1908fe 100644 --- a/SimDataFormats/GeneratorProducts/src/classes_def.xml +++ b/SimDataFormats/GeneratorProducts/src/classes_def.xml @@ -204,6 +204,8 @@ + + @@ -228,9 +230,29 @@ + + + + + + + - - + + + + + + + + + + + + + + + @@ -248,5 +270,7 @@ + +