diff --git a/code_generation/analysis_template.cxx b/code_generation/analysis_template.cxx index 990f2939..31b05dd6 100644 --- a/code_generation/analysis_template.cxx +++ b/code_generation/analysis_template.cxx @@ -156,73 +156,80 @@ int main(int argc, char *argv[]) { const std::string analysis = {ANALYSISTAG}; const std::string config = {CONFIGTAG}; const std::string era = {ERATAG}; - const std::string sample = {SAMPLETAG}; + const std::string sample_type = {SAMPLETAG}; const std::string commit_hash = {COMMITHASH}; bool setup_clean = {CROWN_IS_CLEAN}; const std::string analysis_commit_hash = {ANALYSIS_COMMITHASH}; bool analysis_setup_clean = {ANALYSIS_IS_CLEAN}; int scope_counter = 0; for (auto const &output : output_quantities) { - // output.first is the output file name - // output.second is the list of quantities + TFile outputfile(output.first.c_str(), "UPDATE"); - TTree quantities_meta = TTree("quantities", "quantities"); - for (auto const &quantity : output.second) { - quantities_meta.Branch(quantity.c_str(), &setup_clean); - } - quantities_meta.Write(); - TTree variations_meta = TTree("variations", "variations"); - for (auto const &variation : variations.at(output.first)) { - variations_meta.Branch(variation.c_str(), &setup_clean); - } - variations_meta.Write(); - TTree conditions_meta = TTree("conditions", "conditions"); - conditions_meta.Branch(analysis.c_str(), &setup_clean); - conditions_meta.Branch(config.c_str(), &setup_clean); - conditions_meta.Branch(era.c_str(), &setup_clean); - conditions_meta.Branch(sample.c_str(), &setup_clean); - conditions_meta.Write(); - TTree commit_meta = TTree("commit", "commit"); - commit_meta.Branch(commit_hash.c_str(), &setup_clean); - commit_meta.Fill(); - commit_meta.Write(); - TTree analysis_commit_meta = - TTree("analysis_commit", "analysis_commit"); - analysis_commit_meta.Branch(analysis_commit_hash.c_str(), - &analysis_setup_clean); - analysis_commit_meta.Fill(); - analysis_commit_meta.Write(); + + // ----------------------------- + // Unified metadata object + // ----------------------------- + nlohmann::json j; + + j["metadata"] = { + {"analysis", analysis}, + {"config", config}, + {"era", era}, + {"sample_type", sample_type}, + {"commit", commit_hash}, + {"analysis_commit", analysis_commit_hash}, + {"is_clean", setup_clean}, + {"analysis_is_clean", analysis_setup_clean} + }; + + j["content"] = { + {"quantities", output.second}, + {"variations", variations.at(output.first)} + }; + + TObjString json(j.dump().c_str()); + outputfile.WriteObject(&json, "metadata"); + + // ----------------------------- + // Cutflow histogram + // ----------------------------- if (nevents != 0) { + TH1D cutflow; cutflow.SetName("cutflow"); cutflow.SetTitle("cutflow"); - // iterate through the cutflow vector and fill the histogram with - // the .GetPass() values - if (scope_counter >= cutReports.size()) { + + if (cutReports.size() < scope_counter || cutReports.empty()) { Logger::get("main")->critical( - "Cutflow vector is too small, this should not happen"); + "cutReports vector is too small, this should not happen"); return 1; } + for (auto cut = cutReports[scope_counter].begin(); - cut != cutReports[scope_counter].end(); cut++) { - cutflow.SetBinContent( - std::distance(cutReports[scope_counter].begin(), cut) + 1, - cut->GetPass()); - cutflow.GetXaxis()->SetBinLabel( - std::distance(cutReports[scope_counter].begin(), cut) + 1, - cut->GetName().c_str()); + cut != cutReports[scope_counter].end(); cut++) { + + int bin = std::distance(cutReports[scope_counter].begin(), cut) + 1; + + cutflow.SetBinContent(bin, cut->GetPass()); + cutflow.GetXaxis()->SetBinLabel(bin, cut->GetName().c_str()); } - // store it in the output file + cutflow.Write(); } - outputfile.Close(); - TFile *fout = TFile::Open(output.first.c_str(), "UPDATE"); + + // ----------------------------- + // Additional shift maps + // ----------------------------- Logger::get("main")->info("Writing quantities map to {}", output.first); - fout->WriteObject(&shift_quantities_map.at(output.first), - "shift_quantities_map"); - fout->WriteObject(&quantities_shift_map.at(output.first), - "quantities_shift_map"); - fout->Close(); + + outputfile.WriteObject(&shift_quantities_map.at(output.first), + "shift_quantities_map"); + + outputfile.WriteObject(&quantities_shift_map.at(output.first), + "quantities_shift_map"); + + outputfile.Close(); + scope_counter++; } diff --git a/code_generation/analysis_template_friends.cxx b/code_generation/analysis_template_friends.cxx index 1b1615d8..ace2d942 100644 --- a/code_generation/analysis_template_friends.cxx +++ b/code_generation/analysis_template_friends.cxx @@ -211,73 +211,80 @@ int main(int argc, char *argv[]) { const std::string analysis = {ANALYSISTAG}; const std::string config = {CONFIGTAG}; const std::string era = {ERATAG}; - const std::string sample = {SAMPLETAG}; + const std::string sample_type = {SAMPLETAG}; const std::string commit_hash = {COMMITHASH}; bool setup_clean = {CROWN_IS_CLEAN}; const std::string analysis_commit_hash = {ANALYSIS_COMMITHASH}; bool analysis_setup_clean = {ANALYSIS_IS_CLEAN}; int scope_counter = 0; for (auto const &output : output_quantities) { - // output.first is the output file name - // output.second is the list of quantities + TFile outputfile(output.first.c_str(), "UPDATE"); - TTree quantities_meta = TTree("quantities", "quantities"); - for (auto const &quantity : output.second) { - quantities_meta.Branch(quantity.c_str(), &setup_clean); - } - quantities_meta.Write(); - TTree variations_meta = TTree("variations", "variations"); - for (auto const &variation : variations.at(output.first)) { - variations_meta.Branch(variation.c_str(), &setup_clean); - } - variations_meta.Write(); - TTree conditions_meta = TTree("conditions", "conditions"); - conditions_meta.Branch(analysis.c_str(), &setup_clean); - conditions_meta.Branch(config.c_str(), &setup_clean); - conditions_meta.Branch(era.c_str(), &setup_clean); - conditions_meta.Branch(sample.c_str(), &setup_clean); - conditions_meta.Write(); - TTree commit_meta = TTree("commit", "commit"); - commit_meta.Branch(commit_hash.c_str(), &setup_clean); - commit_meta.Fill(); - commit_meta.Write(); - TTree analysis_commit_meta = - TTree("analysis_commit", "analysis_commit"); - analysis_commit_meta.Branch(analysis_commit_hash.c_str(), - &analysis_setup_clean); - analysis_commit_meta.Fill(); - analysis_commit_meta.Write(); + + // ----------------------------- + // Unified metadata container + // ----------------------------- + nlohmann::json j; + + j["metadata"] = { + {"analysis", analysis}, + {"config", config}, + {"era", era}, + {"sample_type", sample_type}, + {"commit", commit_hash}, + {"analysis_commit", analysis_commit_hash}, + {"is_clean", setup_clean}, + {"analysis_is_clean", analysis_setup_clean} + }; + + j["content"] = { + {"quantities", output.second}, + {"variations", variations.at(output.first)} + }; + + TObjString json(j.dump().c_str()); + outputfile.WriteObject(&json, "metadata"); + + // ----------------------------- + // Cutflow + // ----------------------------- if (nevents != 0) { + TH1D cutflow; cutflow.SetName("cutflow"); cutflow.SetTitle("cutflow"); - // iterate through the cutflow vector and fill the histogram with - // the .GetPass() values + if (cutReports.size() < scope_counter || cutReports.empty()) { Logger::get("main")->critical( "cutReports vector is too small, this should not happen"); return 1; } + for (auto cut = cutReports[scope_counter].begin(); - cut != cutReports[scope_counter].end(); cut++) { - cutflow.SetBinContent( - std::distance(cutReports[scope_counter].begin(), cut) + 1, - cut->GetPass()); - cutflow.GetXaxis()->SetBinLabel( - std::distance(cutReports[scope_counter].begin(), cut) + 1, - cut->GetName().c_str()); + cut != cutReports[scope_counter].end(); cut++) { + + int bin = std::distance(cutReports[scope_counter].begin(), cut) + 1; + + cutflow.SetBinContent(bin, cut->GetPass()); + cutflow.GetXaxis()->SetBinLabel(bin, cut->GetName().c_str()); } - // store it in the output file + cutflow.Write(); } - outputfile.Close(); - TFile *fout = TFile::Open(output.first.c_str(), "UPDATE"); + + // ----------------------------- + // Shift maps + // ----------------------------- Logger::get("main")->info("Writing quantities map to {}", output.first); - fout->WriteObject(&shift_quantities_map.at(output.first), - "shift_quantities_map"); - fout->WriteObject(&quantities_shift_map.at(output.first), - "quantities_shift_map"); - fout->Close(); + + outputfile.WriteObject(&shift_quantities_map.at(output.first), + "shift_quantities_map"); + + outputfile.WriteObject(&quantities_shift_map.at(output.first), + "quantities_shift_map"); + + outputfile.Close(); + scope_counter++; } diff --git a/code_generation/code_generation.py b/code_generation/code_generation.py index 6593831e..39d92264 100644 --- a/code_generation/code_generation.py +++ b/code_generation/code_generation.py @@ -390,12 +390,12 @@ def write_code(self, calls: str, includes: str, run_commands: str) -> None: .replace(" // {RUN_COMMANDS}", run_commands) .replace("// {MULTITHREADING}", threadcall) .replace("// {DEBUGLEVEL}", self.set_debug_flag()) - .replace("{ERATAG}", '"Era={}"'.format(self.configuration.era)) + .replace("{ERATAG}", '"{}"'.format(self.configuration.era)) .replace( - "{SAMPLETAG}", '"Samplegroup={}"'.format(self.configuration.sample) + "{SAMPLETAG}", '"{}"'.format(self.configuration.sample) ) - .replace("{ANALYSISTAG}", '"Analysis={}"'.format(self.analysis_name)) - .replace("{CONFIGTAG}", '"Config={}"'.format(self.config_name)) + .replace("{ANALYSISTAG}", '"{}"'.format(self.analysis_name)) + .replace("{CONFIGTAG}", '"{}"'.format(self.config_name)) .replace("{OUTPUT_QUANTITIES}", self.set_output_quantities()) .replace("{SHIFT_QUANTITIES_MAP}", self.set_shift_quantities_map()) .replace("{QUANTITIES_SHIFT_MAP}", self.set_quantities_shift_map()) diff --git a/code_generation/friend_trees.py b/code_generation/friend_trees.py index 84636249..c3f72bfe 100644 --- a/code_generation/friend_trees.py +++ b/code_generation/friend_trees.py @@ -6,7 +6,7 @@ import os from time import time from code_generation.configuration import Configuration -from typing import List, Union, Dict, Set +from typing import List, Union, Dict, Set, Tuple, Any from code_generation.exceptions import ( ConfigurationError, @@ -141,21 +141,20 @@ def _determine_requested_shifts(self, shiftset: Set[str]) -> Dict[str, List[str] def _readout_input_information( self, input_information_list: Union[List[str], List[Dict[str, List[str]]]], + metadata: Dict[str, Any] = {}, ) -> Dict[str, Dict[str, List[str]]]: def update_input_information(existing_data, new_data): - if existing_data == {}: - return new_data - else: - # otherwise we have to merge the contents, while not overwriting existing data - for scope in new_data.keys(): - if scope not in existing_data.keys(): - existing_data[scope] = {} - for shift in new_data[scope].keys(): - if shift not in existing_data[scope].keys(): - existing_data[scope][shift] = [] - for quantity in new_data[scope][shift]: - if quantity not in existing_data[scope][shift]: - existing_data[scope][shift].append(quantity) + # Merge contents, while not overwriting existing data + for scope in new_data.keys(): + if scope not in existing_data.keys(): + existing_data[scope] = {} + for shift in new_data[scope].keys(): + if shift not in existing_data[scope].keys(): + existing_data[scope][shift] = [] + for quantity in new_data[scope][shift]: + if quantity not in existing_data[scope][shift]: + # Add origin config to quantity for naviagation with multifriends + existing_data[scope][shift].append((quantity, metadata["config"])) return existing_data # first check if the input is a root file or a json file @@ -164,13 +163,11 @@ def update_input_information(existing_data, new_data): log.info(f"adding input information from {input_information}") if isinstance(input_information, str): if input_information.endswith(".root"): - data = update_input_information( - data, self._readout_input_root_file(input_information) - ) + shift_map, metadata = self._readout_input_root_file(input_information) + data = update_input_information(data, shift_map) elif input_information.endswith(".json"): - data = update_input_information( - data, self._readout_input_json_file(input_information) - ) + shift_map, metadata = self._readout_input_json_file(input_information) + data = update_input_information(data, shift_map) else: error_message = f"\n Input information file {input_information} is not a json or root file \n" error_message += ( @@ -183,7 +180,7 @@ def update_input_information(existing_data, new_data): def _readout_input_root_file( self, input_file: str - ) -> Dict[str, Dict[str, List[str]]]: + ) -> Tuple[Dict[str, Dict[str, List[str]]], Dict[str, Any]]: """Read the shift_quantities_map from the input root file and return it as a dictionary Args: @@ -207,7 +204,7 @@ def _readout_input_root_file( if not os.path.exists(lib_path): log.error(f"Missing library: {lib_path}") # Evaluate ROOT-specific return codes - result = ROOT.gSystem.Load(lib_path) + result = ROOT.gSystem.Load(lib_path) # type: ignore if result < 0: err_type = ( "Version mismatch" @@ -217,19 +214,19 @@ def _readout_input_root_file( log.error(f"Load failed ({result}): {err_type} for {lib_path}") f = ROOT.TFile.Open(input_file) # type: ignore - name = "shift_quantities_map" - m = f.Get(name) + m = f.Get("shift_quantities_map") for shift, quantities in m: data[str(shift)] = [str(quantity) for quantity in quantities] + metadata = json.loads(f.Get("metadata").GetString().Data()) f.Close() log.debug( f"Reading quantities information took {round(time() - start,2)} seconds" ) - return {list(self.selected_scopes)[0]: data} + return {list(self.selected_scopes)[0]: data}, metadata["metadata"] def _readout_input_json_file( self, input_file: str - ) -> Dict[str, Dict[str, List[str]]]: + ) -> Tuple[Dict[str, Dict[str, List[str]]], Dict[str, Any]]: """Read the shift_quantities_map from the input json file and return it as a dictionary Args: @@ -240,26 +237,26 @@ def _readout_input_json_file( """ with open(input_file) as f: data = json.load(f) + quantity_data = data["quantities"] + metadata = data["metadata"] # json file structure is: {era: {sampletype: {scope: {shift: [quantities]}}} - if self.era not in data: + if self.era not in quantity_data or self.era != metadata["era"]: errorstring = ( f"Era {self.era} not found in input information file {input_file}.\n" ) - errorstring += f"Available eras are: {data.keys()}" + errorstring += f"Available eras are: {quantity_data.keys()}" raise ConfigurationError(errorstring) - if self.sample not in data[self.era].keys(): + if self.sample not in quantity_data[self.era].keys() or self.sample != metadata["sample_type"]: errorstring = f"Sampletype {self.sample} not found in input information file {input_file}.\n" - errorstring += f"Available sampletypes are: {data[self.era].keys()}" + errorstring += f"Available sampletypes are: {quantity_data[self.era].keys()}" raise ConfigurationError(errorstring) if not set(self.selected_scopes).issubset( - set(data[self.era][self.sample].keys()) + set(quantity_data[self.era][self.sample].keys()) ): errorstring = f"Scopes {self.selected_scopes} not found in input information file {input_file}.\n" - errorstring += f"Available scopes are: {data[self.era][self.sample].keys()}" + errorstring += f"Available scopes are: {quantity_data[self.era][self.sample].keys()}" raise ConfigurationError(errorstring) - else: - data = data[self.era][self.sample] - return data + return quantity_data[self.era][self.sample], metadata def optimize(self) -> None: """ @@ -375,7 +372,7 @@ def _validate_inputs(self) -> None: [x.name for x in producer.get_outputs(scope)] ) # get all available inputs - for input_quantitiy in self.input_quantities_mapping[scope][""]: + for input_quantitiy, quantity_origin in self.input_quantities_mapping[scope][""]: available_inputs.add(input_quantitiy) # now check if all inputs are available missing_inputs = required_inputs - available_inputs @@ -460,4 +457,4 @@ def expanded_configuration(self) -> Configuration: raise ConfigurationError(error_msg) self.config_parameters = expanded_configuration - return self + return self \ No newline at end of file diff --git a/code_generation/helpers.py b/code_generation/helpers.py index 75a5ce04..dc78568d 100644 --- a/code_generation/helpers.py +++ b/code_generation/helpers.py @@ -1,9 +1,10 @@ from __future__ import annotations # needed for type annotations in > python 3.7 +from typing import Any # File with helper functions for the CROWN code generation -def is_empty(value): +def is_empty(value: Any) -> bool: """ Check if a value is empty. @@ -13,12 +14,11 @@ def is_empty(value): Returns: bool: Whether the input value is considered 'empty' """ - # List of all values that should be considered empty despite not having a length. empty_values = [None] try: length = len(value) except TypeError: length = -1 - bool_val = value in empty_values or length == 0 - return bool_val + + return value in empty_values or length == 0 \ No newline at end of file