diff --git a/DeepSets/python/ElectronModel.py b/DeepSets/python/ElectronModel.py index b211b9f..f91d315 100644 --- a/DeepSets/python/ElectronModel.py +++ b/DeepSets/python/ElectronModel.py @@ -1,262 +1,346 @@ #!/usr/bin/env python from DisappTrksML.DeepSets.python.architecture import * import sys - -class ElectronModel(DeepSetsArchitecture): - - def __init__(self, - eta_range=0.25, phi_range=0.25, - max_hits=100, - phi_layers=[64, 64, 256], f_layers =[64, 64, 64], - track_info_indices=[4, 8, 9, 12]): - DeepSetsArchitecture.__init__(self, eta_range, phi_range, max_hits, phi_layers, f_layers, track_info_indices) - self.track_info_shape = len(track_info_indices) - - def eventSelectionTraining(self, event): - trackPasses = [] - for track in event.tracks: - if (abs(track.eta) >= 2.4 or - track.inGap or - abs(track.dRMinJet) < 0.5 or - abs(track.deltaRToClosestElectron) < 0.15 or - abs(track.deltaRToClosestMuon) < 0.15 or - abs(track.deltaRToClosestTauHad) < 0.15): - trackPasses.append(False) - else: - trackPasses.append(True) - return (True in trackPasses), trackPasses - - def convertTrackFromTree(self, event, track, class_label): - hits = [] - miniHits = [] - - for hit in event.recHits: - dEta, dPhi = imageCoordinates(track, hit) - if abs(dEta) >= self.eta_range or abs(dPhi) >= self.phi_range: - continue - detIndex = detectorIndex(hit.detType) - energy = hit.energy if detIndex != 2 else 1 - hits.append((dEta, dPhi, energy, detIndex)) - - ''' - for hit in event.miniRecHits: - dEta, dPhi = imageCoordinates(track, hit) - if abs(dEta) >= self.eta_range or abs(dPhi) >= self.phi_range: - continue - detIndex = detectorIndex(hit.detType) - energy = hit.energy if detIndex != 2 else 1 - miniHits.append((dEta, dPhi, energy, detIndex)) - ''' - if len(hits) > 0: - hits = np.reshape(hits, (len(hits), 4)) - hits = hits[hits[:, 2].argsort()] - hits = np.flip(hits, axis=0) - assert np.max(hits[:, 2]) == hits[0, 2] - - if len(miniHits) > 0: - miniHits = np.reshape(miniHits, (len(miniHits), 4)) - miniHits = miniHits[miniHits[:, 2].argsort()] - miniHits = np.flip(miniHits, axis=0) - assert np.max(miniHits[:, 2]) == miniHits[0, 2] - - sets = np.zeros(self.input_shape) - for i in range(min(len(hits), self.max_hits)): - for j in range(4): - sets[i][j] = hits[i][j] - - miniSets = np.zeros(self.input_shape) - for i in range(min(len(miniHits), self.max_hits)): - for j in range(4): - miniSets[i][j] = miniHits[i][j] - - infos = np.array([event.eventNumber, event.lumiBlockNumber, event.runNumber, - class_label, - event.nPV, - track.deltaRToClosestElectron, - track.deltaRToClosestMuon, - track.deltaRToClosestTauHad, - track.eta, - track.phi, - track.dRMinBadEcalChannel, - track.nLayersWithMeasurement, - track.nValidPixelHits]) - - values = { - 'sets' : sets, - 'miniSets' : miniSets, - 'infos' : infos, - } - - return values - - def convertMCFileToNumpy(self, fileName): - inputFile = TFile(fileName, 'read') - inputTree = inputFile.Get('trackImageProducer/tree') - - signal = [] - miniSignal = [] - signal_info = [] - background = [] - miniBackground = [] - background_info = [] - - for event in inputTree: - eventPasses, trackPasses = self.eventSelectionTraining(event) - if not eventPasses: continue - - for i, track in enumerate(event.tracks): - if not trackPasses[i]: continue - - if isGenMatched(event, track, 11): - values = self.convertTrackFromTree(event, track, 1) - signal.append(values['sets']) - miniSignal.append(values['miniSets']) - signal_info.append(values['infos']) - else: - values = self.convertTrackFromTree(event, track, 0) - background.append(values['sets']) - miniBackground.append(values['miniSets']) - background_info.append(values['infos']) - - outputFileName = fileName.split('/')[-1] + '.npz' - - if len(signal) != 0 or len(background) != 0: - np.savez_compressed(outputFileName, - signal=signal, - miniSignal=miniSignal, - signal_info=signal_info, - background=background, - miniBackground=miniBackground, - background_info=background_info) - - print('Wrote', outputFileName) - else: - print('No events found in file') - - inputFile.Close() - - def convertAMSBFileToNumpy(self, fileName, selection=None): - inputFile = TFile(fileName, 'read') - inputTree = inputFile.Get('trackImageProducer/tree') - - signal = [] - signal_infos = [] - - for event in inputTree: - if selection is None: sys.exit("Pick a selection to apply from ['full', 'training']") - elif selection == 'full': eventPasses, trackPasses = self.eventSelectionSignal(event) - elif selection == 'training': eventPasses, trackPasses = self.eventSelectionSignal(event) - else: sys.exit("Selection not recognized.") - - if not eventPasses: continue - - for i, track in enumerate(event.tracks): - if not trackPasses[i]: continue - - if not (isGenMatched(event, track, 1000022) or isGenMatched(event, track, 1000024)): continue - - values = self.convertTrackFromTree(event, track, 1) - signal.append(values['sets']) - signal_infos.append(values['infos']) - - outputFileName = (fileName.split('/')[-1]).split('.')[0] + '.npz' - - print(outputFileName) - - if len(signal) > 0: - np.savez_compressed(outputFileName, - signal=signal, - signal_infos=signal_infos) - print('Wrote', outputFileName) - else: - print('No events passed the selections') - - inputFile.Close() - - def convertTPFileToNumpy(self, fileName): - inputFile = TFile(fileName, 'read') - inputTree = inputFile.Get('trackImageProducer/tree') - - tracks = [] - infos = [] - - for event in inputTree: - eventPasses, trackPasses, trackPassesVeto = self.eventSelectionLeptonBackground(event, 'electrons') - if not eventPasses: continue - - for i, track in enumerate(event.tracks): - if not trackPasses[i]: continue - if not trackPassesVeto[i]: continue - - values = self.convertTrackFromTree(event, track, 1) - tracks.append(values['sets']) - infos.append(values['infos']) - - if len(tracks) != 0: - outputFileName = fileName.split('/')[-1] + '.npz' - - np.savez_compressed(outputFileName, - tracks=tracks, - infos=infos) - - print('Wrote', outputFileName) - - inputFile.Close() - else: - print('No events found in file') - - def buildModel(self): - #Rewrite - inputs = Input(shape = self.input_shape, name = "input" ) - inputs_track = Input(shape = (self.track_info_shape,), name = "input_track" ) - print("input shape", self.input_shape, "input track shape", self.track_info_shape) - phi_inputs = Input(shape = (self.input_shape[-1],)) - phi_network = Masking()(phi_inputs) - for layerSize in self.phi_layers[:-1]: - phi_network = Dense(layerSize)(phi_network) - phi_network = Activation('relu')(phi_network) - #phi_network = BatchNormalization()(phi_network) - phi_network = Dense(self.phi_layers[-1])(phi_network) - phi_network = Activation('linear')(phi_network) - unsummed_model = Model(inputs=phi_inputs, outputs=phi_network) - phi_set = TimeDistributed(unsummed_model)(inputs) - summed = Lambda(lambda x: tf.reduce_sum(x, axis=1))(phi_set) - if (self.track_info_shape == 0): - f_network = Dense(self.f_layers[0])(summed) - else: - f_network = Dense(self.f_layers[0])(concatenate([summed,inputs_track])) - f_network = Activation('relu')(f_network) - for layerSize in self.f_layers[1:]: - f_network = Dense(layerSize)(f_network) - f_network = Activation('relu')(f_network) - f_network = Dense(2)(f_network) - f_outputs = Activation('softmax',name="output_xyz")(f_network) - integratedmodel = Model(inputs=[inputs,inputs_track], outputs=f_outputs) - - print(integratedmodel.summary()) - - self.model = integratedmodel - - def evaluate_model(self, event, track): - event_converted = self.convertTrackFromTree(event, track, 1) # class_label doesn't matter - prediction = self.model.predict([np.reshape(event_converted['sets'], (1, self.max_hits, 4)), np.reshape(event_converted['infos'], (1,len(event_converted['infos'])))[:, self.track_info_indices]]) - return prediction[0, 1] # p(is electron) - - def evaluate_npy(self, fname, obj=['sets']): - data = np.load(fname, allow_pickle=True) - - if(data[obj[0]].shape[0] == 0): return True, 0 - sets = data[obj[0]][:, :self.max_hits] - - x = [sets] - - if(len(obj) > 1): - info = data[obj[1]][:, self.track_info_indices] - x.append(info) - - return False, self.model.predict(x,) - - def saveGraph(self): - cmsml.tensorflow.save_graph("graph.pb", self.model, variables_to_constants=True) - cmsml.tensorflow.save_graph("graph.pb.txt", self.model, variables_to_constants=True) +sys.path.append('/home/rsantos/scratch0/CMSSW_12_4_11_patch3/src/DisappTrksML') # Fix this +import datetime +import pickle +import os +from MachineLearning.networkController import NetworkBase +from MachineLearning.networkController import NetworkBase +from typing import Union +import keras +import cmsml +from tensorflow.keras import optimizers, regularizers, callbacks, models +from tensorflow.keras.callbacks import TensorBoard +import glob +import numpy as np +import logging +from DeepSets.python.generator import Generator, BalancedGenerator +from tensorflow.keras.models import Model +from tensorflow import reduce_sum +from tensorflow.keras.layers import Dense, TimeDistributed, Masking, Input, Lambda, Activation, BatchNormalization, concatenate + +class ElectronModel(NetworkBase): + + def __init__(self, + eta_range:float=0.25, phi_range:float=0.25, + max_hits:int=100, + phi_layers:list[int] = [16, 16], + f_layers:list[int] = [16, 16], + track_info_indices:list[int]=[4, 8, 9, 12], + log_dir:Union[str,None]=None): + self.track_info_shape = len(track_info_indices) + self.max_hits = max_hits + self.input_shape = (self.max_hits, 4) + self.track_info_indices = track_info_indices + self.phi_layers = phi_layers + self.f_layers = f_layers + self.log_dir = log_dir + + def eventSelectionTraining(self, event): + trackPasses = [] + for track in event.tracks: + if (abs(track.eta) >= 2.4 or + track.inGap or + abs(track.dRMinJet) < 0.5 or + abs(track.deltaRToClosestElectron) < 0.15 or + abs(track.deltaRToClosestMuon) < 0.15 or + abs(track.deltaRToClosestTauHad) < 0.15): + trackPasses.append(False) + else: + trackPasses.append(True) + return (True in trackPasses), trackPasses + + def convertTrackFromTree(self, event, track, class_label): + hits = [] + miniHits = [] + + for hit in event.recHits: + dEta, dPhi = imageCoordinates(track, hit) + if abs(dEta) >= self.eta_range or abs(dPhi) >= self.phi_range: + continue + detIndex = detectorIndex(hit.detType) + energy = hit.energy if detIndex != 2 else 1 + hits.append((dEta, dPhi, energy, detIndex)) + + ''' + for hit in event.miniRecHits: + dEta, dPhi = imageCoordinates(track, hit) + if abs(dEta) >= self.eta_range or abs(dPhi) >= self.phi_range: + continue + detIndex = detectorIndex(hit.detType) + energy = hit.energy if detIndex != 2 else 1 + miniHits.append((dEta, dPhi, energy, detIndex)) + ''' + if len(hits) > 0: + hits = np.reshape(hits, (len(hits), 4)) + hits = hits[hits[:, 2].argsort()] + hits = np.flip(hits, axis=0) + assert np.max(hits[:, 2]) == hits[0, 2] + + if len(miniHits) > 0: + miniHits = np.reshape(miniHits, (len(miniHits), 4)) + miniHits = miniHits[miniHits[:, 2].argsort()] + miniHits = np.flip(miniHits, axis=0) + assert np.max(miniHits[:, 2]) == miniHits[0, 2] + + sets = np.zeros(self.input_shape) + for i in range(min(len(hits), self.max_hits)): + for j in range(4): + sets[i][j] = hits[i][j] + + miniSets = np.zeros(self.input_shape) + for i in range(min(len(miniHits), self.max_hits)): + for j in range(4): + miniSets[i][j] = miniHits[i][j] + + infos = np.array([event.eventNumber, event.lumiBlockNumber, event.runNumber, + class_label, + event.nPV, + track.deltaRToClosestElectron, + track.deltaRToClosestMuon, + track.deltaRToClosestTauHad, + track.eta, + track.phi, + track.dRMinBadEcalChannel, + track.nLayersWithMeasurement, + track.nValidPixelHits]) + + values = { + 'sets' : sets, + 'miniSets' : miniSets, + 'infos' : infos, + } + + return values + + def convertMCFileToNumpy(self, fileName): + inputFile = TFile(fileName, 'read') + inputTree = inputFile.Get('trackImageProducer/tree') + + signal = [] + miniSignal = [] + signal_info = [] + background = [] + miniBackground = [] + background_info = [] + + for event in inputTree: + eventPasses, trackPasses = self.eventSelectionTraining(event) + if not eventPasses: continue + + for i, track in enumerate(event.tracks): + if not trackPasses[i]: continue + + if isGenMatched(event, track, 11): + values = self.convertTrackFromTree(event, track, 1) + signal.append(values['sets']) + miniSignal.append(values['miniSets']) + signal_info.append(values['infos']) + else: + values = self.convertTrackFromTree(event, track, 0) + background.append(values['sets']) + miniBackground.append(values['miniSets']) + background_info.append(values['infos']) + + outputFileName = fileName.split('/')[-1] + '.npz' + + if len(signal) != 0 or len(background) != 0: + np.savez_compressed(outputFileName, + signal=signal, + miniSignal=miniSignal, + signal_info=signal_info, + background=background, + miniBackground=miniBackground, + background_info=background_info) + + print('Wrote', outputFileName) + else: + print('No events found in file') + + inputFile.Close() + + def get_metrics(self, input_dir:str, threshold:float = 0.5, glob_pattern="*")->list[int]: + """ + Return TP TN FP FN + """ + output = self.evaluate_directory(input_dir, glob_pattern, obj=['signal', 'signal_info']) + TP = np.count_nonzero(output[:,1] > threshold) + FN = np.count_nonzero(output[:,1] <= threshold) + + output = self.evaluate_directory(input_dir, glob_pattern, obj=['background', 'background_info']) + TN = np.count_nonzero(output[:,1] < threshold) + FP = np.count_nonzero(output[:,1] >= threshold) + + return [TP, TN, FP, FN] + + + def convertAMSBFileToNumpy(self, fileName, selection=None): + inputFile = TFile(fileName, 'read') + inputTree = inputFile.Get('trackImageProducer/tree') + + signal = [] + signal_infos = [] + + for event in inputTree: + if selection is None: sys.exit("Pick a selection to apply from ['full', 'training']") + elif selection == 'full': eventPasses, trackPasses = self.eventSelectionSignal(event) + elif selection == 'training': eventPasses, trackPasses = self.eventSelectionSignal(event) + else: sys.exit("Selection not recognized.") + + if not eventPasses: continue + + for i, track in enumerate(event.tracks): + if not trackPasses[i]: continue + + if not (isGenMatched(event, track, 1000022) or isGenMatched(event, track, 1000024)): continue + + values = self.convertTrackFromTree(event, track, 1) + signal.append(values['sets']) + signal_infos.append(values['infos']) + + outputFileName = (fileName.split('/')[-1]).split('.')[0] + '.npz' + + print(outputFileName) + + if len(signal) > 0: + np.savez_compressed(outputFileName, + signal=signal, + signal_infos=signal_infos) + print('Wrote', outputFileName) + else: + print('No events passed the selections') + + inputFile.Close() + + def convertTPFileToNumpy(self, fileName): + inputFile = TFile(fileName, 'read') + inputTree = inputFile.Get('trackImageProducer/tree') + + tracks = [] + infos = [] + + for event in inputTree: + eventPasses, trackPasses, trackPassesVeto = self.eventSelectionLeptonBackground(event, 'electrons') + if not eventPasses: continue + + for i, track in enumerate(event.tracks): + if not trackPasses[i]: continue + if not trackPassesVeto[i]: continue + + values = self.convertTrackFromTree(event, track, 1) + tracks.append(values['sets']) + infos.append(values['infos']) + + if len(tracks) != 0: + outputFileName = fileName.split('/')[-1] + '.npz' + + np.savez_compressed(outputFileName, + tracks=tracks, + infos=infos) + + print('Wrote', outputFileName) + + inputFile.Close() + else: + print('No events found in file') + + def build_model(self): + inputs = Input(shape = self.input_shape, name = "input" ) + inputs_track = Input(shape = (self.track_info_shape,), name = "input_track" ) + print("input shape", self.input_shape, "input track shape", self.track_info_shape) + phi_inputs = Input(shape = (self.input_shape[-1],)) + phi_network = Masking()(phi_inputs) + for layerSize in self.phi_layers[:-1]: + phi_network = Dense(layerSize)(phi_network) + phi_network = Activation('relu')(phi_network) + phi_network = Dense(self.phi_layers[-1])(phi_network) + phi_network = Activation('linear')(phi_network) + unsummed_model = Model(inputs=phi_inputs, outputs=phi_network) + phi_set = TimeDistributed(unsummed_model)(inputs) + summed = Lambda(lambda x: reduce_sum(x, axis=1))(phi_set) + if (self.track_info_shape == 0): + f_network = Dense(self.f_layers[0])(summed) + else: + f_network = Dense(self.f_layers[0])(concatenate([summed,inputs_track])) + f_network = Activation('relu')(f_network) + for layerSize in self.f_layers[1:]: + f_network = Dense(layerSize)(f_network) + f_network = Activation('relu')(f_network) + f_network = Dense(2)(f_network) + f_outputs = Activation('softmax',name="output_xyz")(f_network) + integratedmodel = Model(inputs=[inputs,inputs_track], outputs=f_outputs) + + print(integratedmodel.summary()) + + self.model = integratedmodel + + + def evaluate_model(self, fname:str, obj=['sets'])->Union[list[float],None]: + data = np.load(fname, allow_pickle=True) + + if(data[obj[0]].shape[0] == 0): return None + sets = data[obj[0]][:, :self.max_hits] + + x = [sets] + + if(len(obj) > 1): + info = data[obj[1]][:, self.track_info_indices] + x.append(info) + + return self.model.predict(x,) + + def train_model(self, data_directory:str, epochs:int = 10, monitor='val_loss', + patience_count:int=10, metrics=['accuracy', keras.metrics.Precision(), keras.metrics.Recall()], + optimizer=optimizers.Adagrad(), outdir="", val_generator_params={}, + train_generator_params={}, use_tensorboard:bool=False, tensorboard_histogram_freq:int=1)->Model: + + self.model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=metrics) + + if not use_tensorboard: + training_callbacks = [ + callbacks.EarlyStopping(monitor=monitor, patience=patience_count), + ] + else: + if self.log_dir: + tensorboard_callback =callbacks.TensorBoard(log_dir=self.log_dir, histogram_freq=tensorboard_histogram_freq) + logging.info(f"TensorBoard logs will be sent to {self.log_dir}") + else: + log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + tensorboard_callback= callbacks.TensorBoard(log_dir=log_dir, histogram_freq=tensorboard_histogram_freq) + logging.info(f"TensorBoard logs will be sent to {log_dir}") + training_callbacks = [ + callbacks.EarlyStopping(monitor=monitor, patience=patience_count), + tensorboard_callback + ] + + + inputFiles = glob.glob(data_directory + 'images_*.root.npz') + inputIndices = np.array([f.split('images_')[-1][:-9] for f in inputFiles]) + nFiles = len(inputIndices) + logging.info(f"Found {nFiles} inputfiles") + + print("Before generators") + train_generator = BalancedGenerator(inputIndices[:int(nFiles*0.7)], **train_generator_params) + val_generator = Generator(inputIndices[int(nFiles*0.7):], **val_generator_params) + print("After generators") + training_history = self.model.fit(train_generator, + validation_data=val_generator, + callbacks=training_callbacks, + epochs=epochs, + verbose=2) + with open(f"{outdir}trainingHistory.pickle", "wb") as f: + pickle.dump(training_history.history, f) + #self.save(f"{outdir}model.h5") + + def saveGraph(self, directory): + cmsml.tensorflow.save_graph(f"{directory}graph.pb", self.model, variables_to_constants=True) + cmsml.tensorflow.save_graph(f"{directory}graph.pb.txt", self.model, variables_to_constants=True) + + def load_model(self, model_path): + try: + self.model = models.load_model(model_path) + except Exception as e: + print("Error is: ", e) + self.model = models.load_model(model_path, custom_objects={'tf': tf}) diff --git a/DeepSets/test/train.py b/DeepSets/test/train.py index 1e7d5b0..38d7f48 100644 --- a/DeepSets/test/train.py +++ b/DeepSets/test/train.py @@ -4,90 +4,90 @@ sys.path.append("/home/ryan/scratch0/CMSSW_12_4_11_patch3/src/") print("PATH: ", sys.path) import tensorflow as tf -#import cmsml -from sklearn.model_selection import KFold - -from DisappTrksML.DeepSets.python.ElectronModel import * -print("Imported") -# from DisappTrksML.DeepSets.MuonModel import * -from DisappTrksML.DeepSets.python.generator import * -from DisappTrksML.DeepSets.python.utilities import * +from datetime import datetime +import numpy as np +import sys +from DisappTrksML.DeepSets.ElectronModel import ElectronModel +from DisappTrksML.DeepSets.generator import BalancedGenerator, Generator +#from DisappTrksML.DeepSets.utilities import * if False: - # limit CPU usage - config = tf.compat.v1.ConfigProto(inter_op_parallelism_threads = 4, - intra_op_parallelism_threads = 4, - allow_soft_placement = True, - device_count={'CPU': 4}) - tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config)) + # limit CPU usage + config = tf.compat.v1.ConfigProto(inter_op_parallelism_threads = 4, + intra_op_parallelism_threads = 4, + allow_soft_placement = True, + device_count={'CPU': 4}) + tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config)) ####### - -backup_suffix = "backup" +os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" +backup_suffix = datetime.now().strftime('%Y-%m-%d_%H.%M.%S') outdir = "train_"+backup_suffix+"/" -if(len(sys.argv)>1): - input_params = np.load("params.npy",allow_pickle=True)[int(sys.argv[1])] - print(input_params) - outdir = str(input_params[-1]) - print("output directory", outdir) -info_indices = [4, # nPV - 8, # eta - 9, # phi - 12 # nValidPixelHits - ] +info_indices = [4, # nPV + 8, # eta + 9, # phi + 12 # nValidPixelHits + ] model_params = { - 'eta_range':0.25, - 'phi_range':0.25, - 'phi_layers':[64,64,256], - 'f_layers':[64,64,64], - 'max_hits' : 100, - 'track_info_indices' : info_indices -} + 'eta_range':0.25, + 'phi_range':0.25, + 'phi_layers':[64,64,256], + 'f_layers':[64,64,64], + 'max_hits' : 100, + 'track_info_indices' : info_indices +} val_generator_params = { - 'input_dir' : '/store/user/mcarrigan/deepSets/electronModel/', - 'batch_size' : 256, - 'max_hits' : 100, - 'info_indices' : info_indices + 'input_dir' : '/store/user/rsantos/2022/combined_DYJet/training/', + 'batch_size' : 256, + 'max_hits' : 100, + 'info_indices' : info_indices } train_generator_params = val_generator_params.copy() train_generator_params.update({ - 'shuffle': True, - 'batch_ratio': 0.5 + 'shuffle': True, + 'batch_ratio': 0.5 }) train_params = { - 'epochs': 1, - 'outdir':outdir, - 'patience_count':5 + 'epochs': 1, + 'outdir':outdir, + 'patience_count':20 } -if(not os.path.isdir(outdir)): os.mkdir(outdir) +if(len(sys.argv)>1): + input_params = np.load("params.npy",allow_pickle=True)[int(sys.argv[1])] + print(input_params) + train_params['outdir'] = str(input_params[-1]) + model_params['phi_layers'] = input_params[0] + model_params['f_layers'] = input_params[1] + + +if(not os.path.isdir(train_params["outdir"])): os.mkdir(train_params["outdir"]) arch = ElectronModel(**model_params) -arch.buildModel() +arch.build_model() -inputFiles = glob.glob(train_generator_params['input_dir']+'images_*10.root.npz') +inputFiles = glob.glob(train_generator_params['input_dir']+'*.root.npz') inputIndices = np.array([f.split('images_')[-1][:-9] for f in inputFiles]) nFiles = len(inputIndices) print(('Found', nFiles, 'input files')) file_ids = { - 'train' : inputIndices[:10], - 'validation' : inputIndices[10:] + 'train' : inputIndices[:int(nFiles*0.9)], + 'validation' : inputIndices[int(nFiles*0.9):] } train_generator = BalancedGenerator(file_ids['train'], **train_generator_params) val_generator = Generator(file_ids['validation'], **val_generator_params) -arch.fit_generator(train_generator=train_generator, - val_generator=val_generator, - **train_params) +arch.train_model(data_directory= train_generator_params['input_dir'], + epochs = 1, + patience_count = 20, + outdir = train_params['outdir'], + val_generator_params = val_generator_params, + train_generator_params = train_generator_params) #arch.saveGraph() -arch.save_trainingHistory(train_params['outdir']+'trainingHistory.pkl') -arch.plot_trainingHistory(train_params['outdir']+'trainingHistory.pkl',train_params['outdir']+'trainingHistory.png','loss') -arch.save_weights(train_params['outdir']+'model_weights.h5') -arch.save_model(train_params['outdir']+'model.h5') -arch.save_metrics(train_params['outdir']+'trainingHistory.pkl',train_params['outdir']+"metrics.pkl", train_params) +#arch.save_model(train_params['outdir']+'model.h5') diff --git a/MachineLearning/README.md b/MachineLearning/README.md new file mode 100644 index 0000000..446c5a8 --- /dev/null +++ b/MachineLearning/README.md @@ -0,0 +1,6 @@ +* Instructions to use NetworkController() + - Any network that you want to use with the NetworkController should subclass the NetworkBase class defined in networkController.py + - All functions defined within NetworkBase should be reimplemented inside your class with the same signatures (ie. same input and output) + - After this all functionality of NetworkController() should be available to your model. + - When writing tools for both the DeepSets model and the FakeTracks model, only assume that the methods defined in NetworkController exist. If you find you need another method, add this to NetworkController() and make sure that these methods are added to both models. +The code is well documented, so check networkController for further details on what is available for use and how to use it. An example for using the NetworkController is given in example.py. diff --git a/MachineLearning/condor_generate_example.py b/MachineLearning/condor_generate_example.py new file mode 100644 index 0000000..228e4ff --- /dev/null +++ b/MachineLearning/condor_generate_example.py @@ -0,0 +1,32 @@ +""" +An example of how you would use +generate_condor_submission to create a condor +submission script +""" +import sys +import os +import pickle +import logging +sys.path.append("/data/users/mcarrigan/home_link/.local/bin/") +#sys.path.append('/home/ryan/Documents/Research/') +import optuna +from networkController import NetworkController + +logging.basicConfig(level=logging.DEBUG) + +params = [{"trainable_params":{"phi_layers" : ["layers", 1, 3, 8, 16]}}, + {"trainable_params":{"f_layers" : ["layers", 1, 3, 8, 16]}}] + +with open("params.pkl", "wb") as pickle_file: + pickle.dump(params, pickle_file) + +NetworkController.generate_condor_submission( + argument="$(PROCESS)", + number_of_jobs=len(params), + use_gpu=True, log_dir=".", + input_files=["run_wrapper_gpu.sh", "runNN.py", + "ElectronModel.py", "networkController.py", + "generator.py", "singularity_wrapper.sh" + ]) + +os.system("condor_submit run.sub") diff --git a/MachineLearning/condor_script_generator.py b/MachineLearning/condor_script_generator.py new file mode 100644 index 0000000..45df544 --- /dev/null +++ b/MachineLearning/condor_script_generator.py @@ -0,0 +1,37 @@ +class HTCondorScriptGenerator: + def __init__(self, executable, output_file="out_$(PROCESS).txt", error_file="error_$(PROCESS).txt", log_file="log_$(PROCESS).log", log_dir="."): + self.script_lines = [] + self.script_lines.append("Executable = {}".format(executable)) + self.script_lines.append("Output = {}/{}".format(log_dir, output_file)) + self.script_lines.append("Error = {}/{}".format(log_dir, error_file)) + self.script_lines.append("Log = {}/{}".format(log_dir,log_file)) + + def add_argument(self, argument): + self.script_lines.append("Arguments = {}".format(argument)) + + def add_requirements(self, requirements): + self.script_lines.append("Requirements = {}".format(requirements)) + + def add_queue(self, num_jobs=1): + self.script_lines.append("Queue {}".format(num_jobs)) + + def add_option(self, option:str, value:str): + self.script_lines.append("{} = {}".format(option, value)) + + def generate_script(self, file_path): + with open(file_path, "w") as file: + file.write("\n".join(self.script_lines)) + +# Example usage: +if __name__ == "__main__": + # Create an instance of HTCondorScriptGenerator + condor_generator = HTCondorScriptGenerator(executable="my_executable") + + # Add arguments, requirements, and specify the number of jobs to queue + condor_generator.add_argument("arg1 arg2 arg3") + condor_generator.add_requirements('OpSysMajorVer == 7') + condor_generator.add_option("should_transfer_files", "yes") + + condor_generator.add_queue(num_jobs=5) + # Generate the HTCondor script and save it to a file + condor_generator.generate_script("my_condor_script.submit") diff --git a/MachineLearning/networkController.py b/MachineLearning/networkController.py new file mode 100644 index 0000000..61691f4 --- /dev/null +++ b/MachineLearning/networkController.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 +""" +This module is used to control a general neural network + + +The reason a very general subclass was used was to allow +for the sharing of tools while also not requiring a major +rewrite of either the DeepSets network or the FakeTracks network. +""" +from abc import ABC, abstractmethod +import os +from typing import Union, Dict +import logging +from glob import glob + +import numpy as np +import cmsml +import tensorflow as tf +from tensorflow.keras.models import Model, load_model + +import optuna +import keras + +from condor_script_generator import HTCondorScriptGenerator + + +class NetworkBase(ABC): + """ + This abstract class requires that the following methods be defined in your network: + build_model() - responsible for creating layers, nodes, and activation functions in model + evaluate_model() - Used to run a single data file through the trained model + train_model() - Used to train the model on a directory of data + + Your model should also inherit this class + """ + + @abstractmethod + def build_model(self, *args, **kwargs): + """ Return a built nueral network """ + pass + + @abstractmethod + def evaluate_model(self, fileName:str, *args, **kwargs)->Union[list[float],None]: + """ Return prediction of neural network on input data """ + pass + + @abstractmethod + def train_model(self, model:Model, data_directory:str, epochs:int = 10, monitor:str='val_loss', patience_count:int=10, metrics:list=['accuracy', keras.metrics.Precision(), keras.metrics.Recall()], outdir:str="", *args, **kwargs)->Model: + """ Return trained neural network """ + pass + + @abstractmethod + def get_metrics(self, input_directory:str, threshold:float = 0.5, glob_patterns="*", **kwargs)->list[int]: + """ Return list of TP, TN, FP, FN""" + pass + + def evaluate_directory(self, directory_path:str, glob_pattern:Union[None, str]=None, **kwargs)->Union[np.ndarray, None]: + """ + Conveineince function to evaluate the model on files in a directory using a specific glob pattern + """ + if glob_pattern: + inputFiles = glob(directory_path+glob_pattern) + else: + inputFiles = glob(directory_path+"*") + + results = np.empty(1) # To avoid unbound variable + + for i, file in enumerate(inputFiles): + if i % 100 == 0: + logging.info(f"On file {i}") + eval_output = self.evaluate_model(file, **kwargs) + if eval_output is None: + return None + if i == 0: + results = np.array(eval_output) + else: + results = np.concatenate((results, np.array(eval_output))) + return results + + def save_model(self, filename:str = "model.h5"): + self.model.save(filename) + + def saveGraph(self): + cmsml.tensorflow.save_graph("graph.pb", self.model, variables_to_constants=True) + cmsml.tensorflow.save_graph("graph.pb.txt", self.model, variables_to_constants=True) + +class NetworkController(): + def __init__(self, model:NetworkBase, config:Union[str,None]=None): + + self.model = model + self.input_dir = None + + + def calculateMetrics(self, metrics:list[int])->tuple[float, float, float]: + """ + Return precision, recall, and f1 score + + The input should be a list of the following order [TP, TN, FP, FN] + """ + if metrics[0] == 0: + logging.warning("No TP found!") + return 0, 0, 0 + else: + precision = metrics[0] / (metrics[0] + metrics[2]) + recall = metrics[0] / (metrics[0] + metrics[3]) + f1 = 2 * (precision * recall) / (precision + recall) + return precision, recall, f1 + + + def load_model(self, model_path:str)->Model: + """ Return a pretrained model that was previously stored in a h5 file """ + return load_model(model_path) + + def _objective(self, trial, trainable_params, train_parameters, build_parameters, glob_pattern, metric, threshold = 0.5): + """ + This method should not be used externally. This function is used by optuna + to optimize the model. + """ + + if not self.input_dir: + logging.debug("""You should not be calling this function outside of tune_hyprparameters. + The input_directory for NetworkController needs to be set""") + return + output_values = {} + for key, value in trainable_params.items(): + if value[0] == "int": + output_values[key] = trial.suggest_int(str(key), value[1], value[2]) + elif value[0] == "float": + output_values[key] = trial.suggest_float(str(key), value[1], value[2]) + elif value[0] == "category": + output_values[key] = trial.suggest_categorical(str(key), value) + elif value[0] == "layers": + number_of_layers = trial.suggest_int("num_layers", value[1], value[2]) + output_values[key] = [trial.suggest_int(f"nodes_{i}", value[3], value[4], log=True) for i in + range(number_of_layers)] + + self.model.build_model(**build_parameters, **output_values) + logging.debug(f"train_model parameters {train_parameters}") + self.model.train_model(self.input_dir, **train_parameters) + return self.calculateMetrics(self.model.get_metrics(self.input_dir, threshold, glob_pattern))[metric] + + @staticmethod + def generate_condor_submission(argument:str, number_of_jobs:int, use_gpu:bool, log_dir:str, input_files, use_container:bool=True): + """ + Generate a condor script. This is a static method so can be used without + instantiating a NetworkController instance. + """ + if use_gpu: + executable = "run_wrapper_gpu.sh" + elif use_container: + executable = "run_wrapper_no_container.sh" + else: + executable = "run_wrapper_cpu.sh" + condor_generator = HTCondorScriptGenerator(executable, log_dir=log_dir) + condor_generator.add_option("Universe", "vanilla") + condor_generator.add_option("+IsLocalJob", "true") + condor_generator.add_option("Rank", "TARGET.IsLocalSlot") + condor_generator.add_option("request_disk", "2000MB") + condor_generator.add_option("request_memory", "2GB") + condor_generator.add_option("should_transfer_files", "Yes") + condor_generator.add_option("when_to_transfer_output", "ON_EXIT") + condor_generator.add_option("transfer_input_files", ', '.join(input_files)) + condor_generator.add_option("getenv", "true") + if use_gpu: + condor_generator.add_option("request_cpus", "6") + condor_generator.add_option("+IsGPUJob", "true") + condor_generator.add_requirements("((Target.IsGPUSlot == True))") + else: + condor_generator.add_option("request_cpus", "2") + condor_generator.add_option("+isSmallJob", "true") + condor_generator.add_argument(argument) + condor_generator.add_queue(number_of_jobs) + condor_generator.generate_script("run.sub") + + def tune_hyperparameters(self, + trainable_params: Union[Dict[str, tuple[float,float]], + Dict[str, tuple[int, int]], Dict[str, list]] = {}, + train_parameters = {}, + build_parameters = {}, + use_gpu:bool=True, num_trials:int= 10, timeout:int=600, + input_dir:str="", glob_pattern:str="*", metric:int = 2, threshold:float = 0.5)->None: + """ + Return output of hyperparameter tuning of your model. Type checking is performed in the objective function, so make sure + that you properly distinguish floats from integers when inputting the trainable params + + trainable_params: + list of parameters to be tuned. The first item in the list should be either + "int", "float", "category", or "layers" . This will call optuna.suggest_int, optuna.suggest_float respectively. + The layers method will output layers with a number of neurons. The second input to the list is the range of options + that Optuna should use to make suggestions. + train_parameters: + Parameters that are needed to train you model as defined in NetworkBase as a dictionary using dictionary unpacking + build_parameters: + Parameters that are needed to build you model as defined in NetworkBase as a dictionary using dictionary unpacking + use_gpu: + Whether or not to use gpu, will set correct Tensorflow parameters and send to correct node for condor + num_trial: + How many trials optuna should run to find optimal values + timeout: + Timeout of optuna + input_dir: + Directory that contains input data used for training and validation of model + glob_pattern: + Pattern used to match files inside input_dir for training + metric: + Int referring to the index of the output of calculateMetrics defined in network base (precision, recall, f1) + threshold: + Threshold used to define what is a positive and negative guess from the network + """ + + self.input_dir = input_dir + if use_gpu: + config = tf.compat.v1.ConfigProto(log_device_placement=True) + tf.compat.v1.Session(config=config) + else: + os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + # Limits CPU + config = tf.compat.v1.ConfigProto(inter_op_parallelism_threads = 4, + intra_op_parallelism_threads = 4, + allow_soft_placement = True, + device_count={'CPU':4}) + tf.compat.v1.keras.backend.set_session(tf.compat.v1.Session(config=config)) + + self.input_dir = input_dir + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' + logging.debug("Setting up Optuna study on GPU") + + logging.debug("Setting up Optuna study") + study = optuna.create_study(direction="maximize") + + study.optimize(lambda trials: self._objective(trials, train_parameters=train_parameters, + build_parameters=build_parameters, + trainable_params = trainable_params, + glob_pattern=glob_pattern, metric=metric, threshold=threshold), + num_trials, timeout) diff --git a/MachineLearning/runNN_example.py b/MachineLearning/runNN_example.py new file mode 100644 index 0000000..7b3a881 --- /dev/null +++ b/MachineLearning/runNN_example.py @@ -0,0 +1,35 @@ +""" +An example script of how you would use NetworkController to +tune hyprparameters +""" + +import sys +sys.path.append("/data/users/mcarrigan/home_link/.local/") +import pickle + +from networkController import NetworkController +from DisappTrksML.DeepSets.python.ElectronModel import ElectronModel + +index = sys.argv[1] +print("Index: ", index) +controller = NetworkController(ElectronModel) + +with open("params.pkl", "rb") as pickle_file: + params = pickle.load(pickle_file) + + +data_directory="/home/ryan/Documents/Research/Data/DeepSetsTraining/TrainDataSample/" + + +controller.tune_hyperparameters(trainable_params= params[index], + train_parameters={"epochs" : 10, + "val_generator_params": {"input_dir": data_directory, + "info_indices": [4,8,9,12], + "batch_size": 256}, + "train_generator_params": {"input_dir": data_directory, + "info_indices":[4,8,9,12], + "batch_size":256}, + "use_tensorboard": True}, + build_parameters={}, + use_gpu=False, + input_dir=data_directory) diff --git a/MachineLearning/run_wrapper_cpu.sh b/MachineLearning/run_wrapper_cpu.sh new file mode 100644 index 0000000..95cb0f7 --- /dev/null +++ b/MachineLearning/run_wrapper_cpu.sh @@ -0,0 +1,7 @@ +#!/usr/bin/bash +localMachine=$(hostname) +echo "Running on computer $localMachine" +echo "Running on CPU in singularity" +execute_in_container=$1 # Command that should be executed in container +singularity exec -B $PWD,/store,/data disappTrksCPU.sif bash "$execute_in_container" +rm *.sif diff --git a/MachineLearning/run_wrapper_gpu.sh b/MachineLearning/run_wrapper_gpu.sh new file mode 100644 index 0000000..1d3b1ba --- /dev/null +++ b/MachineLearning/run_wrapper_gpu.sh @@ -0,0 +1,10 @@ +#!/usr/bin/bash +localMachine=$(hostname) + +echo "Running on computer $localMachine" +echo "Running on GPU" +cp /mnt/driveB/Singularity/disapp_trks.sif . +execute_in_container=$@ # Command that should be executed in container +echo "Running $execute_in_container" +singularity exec -B $PWD,/store,/data disapp_trks.sif bash $PWD/singularity_wrapper.sh $1 +rm *.sif diff --git a/MachineLearning/run_wrapper_no_container.sh b/MachineLearning/run_wrapper_no_container.sh new file mode 100644 index 0000000..dfc5c15 --- /dev/null +++ b/MachineLearning/run_wrapper_no_container.sh @@ -0,0 +1,48 @@ +#!/usr/bin/bash +#TODO Clean up +localMachine=$(hostname) + +echo "Running on computer $localMachine" +CMSSW_VERSION_LOCAL=CMSSW_12_4_11_patch3 + +source /cvmfs/cms.cern.ch/cmsset_default.sh +export SCRAM_ARCH=slc7_amd64_gcc820 + +scramv1 project $CMSSW_VERSION_LOCAL + +cp fakesNN.py $CMSSW_VERSION_LOCAL/src/fakesNN.py +cp plotMetrics.py $CMSSW_VERSION_LOCAL/src/plotMetrics.py +cp params.npy $CMSSW_VERSION_LOCAL/src/params.npy +cp validateData.py $CMSSW_VERSION_LOCAL/src/validateData.py +#cp gridSearchParams.npy CMSSW_11_2_1_patch2/src/gridSearchParams.npy +cp utilities.py $CMSSW_VERSION_LOCAL/src/utilities.py +cp fakeClass.py $CMSSW_VERSION_LOCAL/src/fakeClass.py + +cd $CMSSW_VERSION_LOCAL/src/ +eval `scramv1 runtime -sh` + +#python3 fakesNN.py -d $1 -p gridSearchParams.npy -i $2 +if [[$4 -gt 0]] +then +python3 fakesNN.py -d $1 -p params.npy -i $2 -g $((1/4)) -c $6 +else +python3 fakesNN.py -d $1 -p params.npy -i $2 -c $6 +fi + +echo $((1/4)) + +rm *.py +rm *.npy + +echo $3/$1 + +if [ -d "$3/$1" ]; then +mv * $3/$1/ +else +mkdir $3/$1/ +mv * $3/$1/ +fi + +cd ../.. + +rm -r $CMSSW_VERSION_LOCAL diff --git a/Scripts/condor_submission_creator.py b/Scripts/condor_submission_creator.py new file mode 100644 index 0000000..dd029c7 --- /dev/null +++ b/Scripts/condor_submission_creator.py @@ -0,0 +1,26 @@ +def create_submission(executable:str, njobs:int, arguments:list, log_dir:str, input_files:list, file_name:str="run.sub", disk_space:int=200, memory:int=2, cpus:int=2): + arguments_contents = ' '.join(map(str, arguments)) + input_files_contents = ' '.join(map(str, input_files)) + with open(file_name, 'w') as file: + submit_lines = f"""Universe = vanilla ++IsLocalJob = true +request_disk = {disk_space}MB +request_memory = {memory}GB +request_cpus = {cpus} +hold = False +executable = {executable} +arguments = {arguments_contents} +log = {log_dir}/log_$(PROCESS).log +output = {log_dir}/out_$(PROCESS).txt +error = {log_dir}/error_$(PROCESS).txt +should_transfer_files = Yes +when_to_transfer_output = ON_EXIT ++isSmallJob = true +transfer_input_files = {input_files_contents} +getenv = true +queue {njobs} + """ + file.write(submit_lines) + +if __name__ == "__main__": + create_submission("test.py", njobs=100, arguments=[1,2,3], log_dir="/home/ryan/", input_files=['test2.py', 'test3.py']) diff --git a/Scripts/condor_submission_creator.py~ b/Scripts/condor_submission_creator.py~ new file mode 100644 index 0000000..ee4cc14 --- /dev/null +++ b/Scripts/condor_submission_creator.py~ @@ -0,0 +1,22 @@ +import os + +def create_submission(executable:str, file_name:str="run.sub", disk_space:int=200, memory:int=2, cpus:int=2): + with open(file_name, 'w') as file: + submit_lines = f""" + Universe = vanilla + +IsLocalJob = true + request_disk = {disk_space}MB + request_memory = {memory}GB + request_cpus = {cpus} + hold = False + executable = run_wrapper.sh + arguments = {0} $(PROCESS) {2} {3} {4} {5} {6} {8} + log = {2}{0}/log_$(PROCESS).log + output = {2}{0}/out_$(PROCESS).txt + error = {2}{0}/error_$(PROCESS).txt + should_transfer_files = Yes + when_to_transfer_output = ON_EXIT + +isSmallJob = true + transfer_input_files = run_wrapper.sh, fakesNN.py, plotMetrics.py, params.npy, utilities.py, fakeClass.py, validateData.py, singularity_wrapper.sh, {5}, {7} + getenv = true + queue {1} diff --git a/fakeTracks/machineLearning/fakesNN.py b/fakeTracks/machineLearning/fakesNN.py index 649094e..0fba2a0 100644 --- a/fakeTracks/machineLearning/fakesNN.py +++ b/fakeTracks/machineLearning/fakesNN.py @@ -104,7 +104,7 @@ def cpu_settings(self): def gpu_settings(self): config=tf.compat.v1.ConfigProto(log_device_placement=True) - sess = tf.compat.v1.Session(config=config) + tf.compat.v1.Session(config=config) # suppress warnings os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # or any {'0', '1', '2'} diff --git a/fakeTracks/machineLearning/runNN.py b/fakeTracks/machineLearning/runNN.py index fc3e952..a0f9eef 100644 --- a/fakeTracks/machineLearning/runNN.py +++ b/fakeTracks/machineLearning/runNN.py @@ -1,11 +1,5 @@ -import pickle -import os,re +import os import sys -import time -from decimal import Decimal -import glob -import subprocess -from itertools import product import numpy as np import shutil