diff --git a/AdaptMeDelphes.py b/AdaptMeDelphes.py index b65082c..3167cbf 100644 --- a/AdaptMeDelphes.py +++ b/AdaptMeDelphes.py @@ -11,7 +11,9 @@ from sklearn.model_selection import train_test_split from keras.models import Model from keras.layers import Input +from keras.metrics import binary_accuracy from Layers import * +from weightedobjectives import binary_crossentropy_labelweights, binary_accuracy_labelweights global_loss_list={} parser = ArgumentParser() @@ -39,6 +41,12 @@ parser.add_argument("--nepochscorr", help="number of epochs for MC correction", type=int, default=100) parser.add_argument("--batchsize", help="batch size", type=int, default=10000) parser.add_argument("--runIC", action='store_true') +parser.add_argument("--skewfraction", help='fraction to skew data samples, >0 dowsamples lights, <0 downsamples bs', + type=float, default=None) +parser.add_argument("--pretrainbtag", help='number of epochs to train the b-tagging part', type=int, default=0) +parser.add_argument("--pretraindatamc", help='number of epochs to train the data/mc part', type=int, default=0) +parser.add_argument("--fixweights" , action='store_true', help='fix label fractions weights') +parser.add_argument("--rightweights", action='store_true', help='set starting label fractions weights to the right value') args = parser.parse_args() @@ -52,7 +60,6 @@ print "Using GPU: ",os.environ['CUDA_VISIBLE_DEVICES'] except ImportError: pass - else: print "LXPLUS settings" if args.gpu<0: @@ -67,7 +74,6 @@ os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) print('running on GPU '+str(args.gpu)) - if args.gpufraction>0 and args.gpufraction<1: gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpufraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) @@ -79,17 +85,13 @@ from make_samples import make_sample import pandas as pd from keras.optimizers import Adam, SGD +from utils import save, fix_layers, open_layers, set_trainable - -def save(df, fname): - dname = os.path.dirname(fname) - if not os.path.isdir(dname): - os.makedirs(dname) - records = df.to_records(index=False) - records.dtype.names = [str(i) for i in records.dtype.names] - np.save(fname, records) - - +def get_weight(model): + session = K.get_session() + sub_models = [i for i in model.layers if hasattr(i, 'layers')] + weight_layer = [i for m in sub_models for i in m.layers if i.name == 'weight_layer'][0] + return weight_layer.weights[0].eval(session=session).ravel()[0] def schedule(x): lr=args.lr @@ -104,17 +106,26 @@ def schedule(x): def run_model(outdir, training_method=1): - nepochstotal=args.nepochs nepochscorr=args.nepochscorr batchsize=args.batchsize - + metrics_dict = { + 'btag_discriminator' : binary_accuracy, + 'datamc_discriminator' : [binary_accuracy_labelweights, binary_accuracy], + } - Inputs = Input((10,)) + Inputs = [Input((10,)), Input((1,))] global_loss_list['GradientReversal']=GradientReversal() - X_traintest, isB_traintest , isMC_traintest = make_sample(args.indir, args.addsv) - X_all, X_test, isB_all, isB_test, isMC_all, isMC_test = train_test_split(X_traintest, isB_traintest , isMC_traintest, test_size=0.1, random_state=42) + + X_traintest, isB_traintest , isMC_traintest, weight_in_traintest, exp_weight = make_sample( + args.indir, args.addsv, args.skewfraction + ) + get_expected_weight = lambda x: exp_weight #dummy function + + X_all, X_test, isB_all, isB_test, isMC_all, isMC_test, weight_in_all, weight_in_test = train_test_split( + X_traintest, isB_traintest , isMC_traintest, weight_in_traintest, test_size=0.1, random_state=42 + ) onesarray=np.ones(isB_all.shape[0]) @@ -125,12 +136,12 @@ def run_model(outdir, training_method=1): #for compatibility def modelIverseGrad(Inputs,rev_grad): #build the models - feat = common_features(Inputs) - feat_outshape = Input(feat.get_layer(index=-1).output_shape) + feat = common_features(Inputs[0]) + feat_outshape = Input(shape=(feat.get_layer(index=-1).output_shape[1:])) btag = btag_discriminator(feat_outshape,dropoutrate=0.03) - datamc = datamc_discriminator(feat_outshape,rev_grad=rev_grad,dropoutrate=0.03) + datamc = datamc_discriminator([feat_outshape, Inputs[1]],rev_grad=rev_grad,dropoutrate=0.03) - return Model(inputs=Inputs, outputs=btag(feat(Inputs))+datamc(feat(Inputs)),name='fullModel') + return Model(inputs=Inputs, outputs=btag(feat(Inputs[0]))+datamc([feat(Inputs[0]), Inputs[1]]),name='fullModel') model = modelIverseGrad(Inputs,rev_grad=args.lmb) @@ -138,29 +149,31 @@ def modelIverseGrad(Inputs,rev_grad): if(training_method == 'MC_training'): model.compile( - loss=['binary_crossentropy']*4, - optimizer=optimizer, - loss_weights=[1., 0., 0., 0.], - weighted_metrics=['accuracy'] - ) + loss = ['binary_crossentropy']*2+[binary_crossentropy_labelweights, 'binary_crossentropy'], + optimizer=optimizer, + loss_weights=[1.,0.,0.,0.], + weighted_metrics = metrics_dict + ) history = model.fit( - X_all, - [isB_all, isB_all, isMC_all, isMC_all], + [X_all, weight_in_all], + [isB_all, isB_all, isMC_all, isMC_all], batch_size=batchsize, epochs=nepochstotal, verbose=2, validation_split=0.2, - sample_weight=[isMC_all.ravel(), 1-isMC_all.ravel(), - 1+0.5*isB_all.ravel(), 1-0.5*isB_all.ravel()], - ) - - + sample_weight = [ + isMC_all.ravel(), + 1-isMC_all.ravel(), + 1+0.5*isB_all.ravel(), + 1-0.5*isB_all.ravel()], + + ) elif(training_method == 'data_training'): model.compile( - loss=['binary_crossentropy']*4, + loss = ['binary_crossentropy']*2+[binary_crossentropy_labelweights, 'binary_crossentropy'], optimizer=optimizer, loss_weights=[0.,1.,0.,0.], - weighted_metrics=['accuracy'] + weighted_metrics = metrics_dict ) history = model.fit( - X_all, + [X_all, weight_in_all], [isB_all, isB_all, isMC_all, isMC_all], batch_size=batchsize, epochs=nepochstotal, verbose=2, validation_split=0.2, sample_weight = [ @@ -174,62 +187,142 @@ def modelIverseGrad(Inputs,rev_grad): from block_models import mc_correction - da_epochs = nepochstotal + da_epochs = nepochstotal - args.pretrainbtag - args.pretraindatamc if training_method == 'corrected_domain_adaptation': da_epochs=nepochstotal-nepochscorr #build the models - feat = common_features(Inputs) + feat = common_features(Inputs[0]) feat_outshape = Input(shape=(feat.get_layer(index=-1).output_shape[1:])) btag = btag_discriminator(feat_outshape,dropoutrate=0.03) - datamc = datamc_discriminator(feat_outshape,rev_grad=args.lmb,dropoutrate=0.03) - - - modelallopen = Model(inputs=Inputs, outputs=btag(feat(Inputs))+datamc(feat(Inputs)),name='fullModel') + datamc = datamc_discriminator([feat_outshape, Inputs[1]],rev_grad=args.lmb,dropoutrate=0.03) + #fix the data/mc discriminator, leave weights trainable + datamc = set_trainable(datamc, 'ada_*', False) + + if args.rightweights: + weight_layer = [i for i in datamc.layers if i.name == 'weight_layer'][0] + sess.run( + weight_layer.weights[0].assign( + np.array([[[exp_weight]]]) + ) + ) - #print(feat.summary()) - #print(btag.summary()) - #print(datamc.summary()) + if args.fixweights: + datamc = set_trainable(datamc, 'weight_*', False) + + modelallopen = Model(inputs=Inputs, outputs=btag(feat(Inputs[0]))+datamc([feat(Inputs[0]), Inputs[1]]),name='fullModel') + + #pre-train b-tagger + hist_prebtag = {} + if args.pretrainbtag: + print 'pre-training the b-tag classifier' + datamc = set_trainable(datamc, 'weight_*', False) + modelallopen.compile( + loss = ['binary_crossentropy']*2+[binary_crossentropy_labelweights, 'binary_crossentropy'], + optimizer = Adam(lr=args.lr), + loss_weights = [1.,0.,0.,0.], + weighted_metrics = metrics_dict + ) + + hist_prebtag = modelallopen.fit( + [X_all, weight_in_all], + [isB_all, isB_all, isMC_all, isMC_all], + batch_size=batchsize, epochs=args.pretrainbtag, + verbose=2, validation_split=0.2, + sample_weight = [ + isMC_all.ravel(), + 1-isMC_all.ravel(), + onesarray, + onesarray], + ).history + hist_prebtag['weight'] = [get_weight(modelallopen)]*len(hist_prebtag['loss']) + hist_prebtag['real_weight'] = [get_expected_weight(modelallopen)]*len(hist_prebtag['loss']) + datamc = set_trainable(datamc, 'weight_*', not args.fixweights) modelallopen.compile( - loss = ['binary_crossentropy']*4, + loss = ['binary_crossentropy']*2+[binary_crossentropy_labelweights, 'binary_crossentropy'], optimizer = optimizer, loss_weights = [1.,0.,args.weight,0.], - weighted_metrics=['accuracy'] + weighted_metrics = metrics_dict ) - feat.trainable = False btag.trainable = False + #set data/mc discriminator as trainable, fix weights + datamc = set_trainable(datamc, 'ada_*', True) + datamc = set_trainable(datamc, 'weight_*', False) - modelfixedbtag = Model(inputs=Inputs, outputs=btag(feat(Inputs))+datamc(feat(Inputs)),name='fullModel2') + modelfixedbtag = Model(inputs=Inputs, outputs=btag(feat(Inputs[0]))+datamc([feat(Inputs[0]), Inputs[1]]),name='fullModel2') + #pre-train data/mc + hist_predatamc = {} + if args.pretraindatamc: + print 'pre-training the data/MC classifier' + + modelfixedbtag.compile( + loss = ['binary_crossentropy']*2+[binary_crossentropy_labelweights, 'binary_crossentropy'], + optimizer = Adam(lr=args.lr), + loss_weights = [0.,0.,args.weight,0.], + weighted_metrics = metrics_dict + ) + hist_predatamc = modelfixedbtag.fit( + [X_all, weight_in_all], + [isB_all, isB_all, isMC_all, isMC_all], + batch_size=batchsize, epochs=args.pretraindatamc, + verbose=2, validation_split=0.2, + sample_weight = [ + isMC_all.ravel(), + 1-isMC_all.ravel(), + onesarray, + onesarray], + ).history + hist_predatamc['weight'] = [get_weight(modelfixedbtag)]*len(hist_predatamc['loss']) + hist_predatamc['real_weight'] = [get_expected_weight(modelfixedbtag)]*len(hist_predatamc['loss']) + + modelfixedbtag.compile( - loss = ['binary_crossentropy']*4, + loss = ['binary_crossentropy']*2+[binary_crossentropy_labelweights, 'binary_crossentropy'], optimizer = optimizer, loss_weights = [1.,0.,args.weight,0.], - weighted_metrics=['accuracy'] - ) - + weighted_metrics = metrics_dict + ) + from training_tools import train_models_adversarial - history=train_models_adversarial([modelallopen,modelfixedbtag], - [X_all], - [isB_all, isB_all, isMC_all, isMC_all], - da_epochs, - sample_weight=[isMC_all.ravel(), - 1-isMC_all.ravel(), - onesarray, - onesarray], - batch_size=batchsize, - validation_split=0.2, - nstepspermodel=None, - custombatchgenerators=None, - verbose=1) + history=train_models_adversarial( + [modelallopen,modelfixedbtag], + [X_all, weight_in_all], + [isB_all, isB_all, isMC_all, isMC_all], + da_epochs, + sample_weight=[ + isMC_all.ravel(), + 1-isMC_all.ravel(), + onesarray, + onesarray], + batch_size=batchsize, + validation_split=0.2, + nstepspermodel=None, + custombatchgenerators=None, + verbose=1, + history_addenda = { + 'weight' : get_weight, + 'real_weight' : get_expected_weight, + } + ) print ('history type ', type(history)) history=pd.DataFrame(history) - + #collate histories + if args.pretraindatamc: + history = pd.concat(( + pd.DataFrame(hist_predatamc), + history + )) + if args.pretrainbtag: + history = pd.concat(( + pd.DataFrame(hist_prebtag), + history + )) model=modelallopen if(training_method == 'corrected_domain_adaptation'): @@ -307,9 +400,7 @@ def generate_noise(nsamples): model=mcmodel noise=generate_noise(X_test.shape[0]) - X_test=[X_test,isMC_test,noise,isB_all] - - + X_test=[X_test,isMC_test,noise,isB_all] else: raise ValueError('%s is an unknown run option' % training_method) @@ -319,13 +410,23 @@ def generate_noise(nsamples): if args.nopred: return history - predictions = model.predict(X_test) + predictions = model.predict([X_test, weight_in_test]) preds = pd.DataFrame() preds['prediction'] = predictions[0].ravel() preds['isB'] = isB_test preds['isMC'] = isMC_test save(preds, '%s/predictions.npy' %outdir) + from shutil import copyfile + import os + + copyfile( + os.path.abspath(__file__), + '%s/script.py' % outdir + ) + + with open('%s/opts.txt' % outdir, 'w') as out: + out.write(args.__str__()) return history diff --git a/block_models.py b/block_models.py index 180286d..c2eaaa3 100644 --- a/block_models.py +++ b/block_models.py @@ -1,5 +1,6 @@ from keras.initializers import RandomNormal -from keras.layers import Dense, Dropout, Multiply, Add, Concatenate +from keras.layers import Dense, Dropout, Multiply, Add, \ + Concatenate, Reshape, LocallyConnected1D, Flatten from keras.models import Model from Layers import GradientReversal @@ -15,30 +16,44 @@ def common_features(Inputs, dropoutrate=0.03): X = Inputs X = Dense(30, activation='relu', name='common_dense_0')(X) X = Dropout(dropoutrate)(X) - X = Dense(20, activation='relu')(X) + X = Dense(20, activation='relu', name='common_dense_1')(X) X = Dropout(dropoutrate)(X) - X = Dense(20, activation='relu')(X) + X = Dense(20, activation='relu', name='common_dense_2')(X) X = Dropout(dropoutrate)(X) - X = Dense(15, activation='relu')(X) + X = Dense(15, activation='relu', name='common_dense_3')(X) X = Dropout(dropoutrate)(X) return Model(inputs=Inputs, outputs=X, name='common_features') -def datamc_discriminator(XInputs, rev_grad=10, dropoutrate=0.03): - Ad = GradientReversal(hp_lambda=rev_grad, name='Ad_gradrev')(XInputs) - Ad = Dense(20, activation='relu')(Ad) +def datamc_discriminator(XInputs, rev_grad=10, dropoutrate=0.03, weight_rev=10.): + Ad = GradientReversal(hp_lambda=rev_grad, name='ada_gradrev')(XInputs[0]) + Ad = Dense(20, activation='relu', name='ada_dense_0')(Ad) Ad = Dropout(dropoutrate)(Ad) - Ad = Dense(15, activation='relu')(Ad) + Ad = Dense(15, activation='relu', name='ada_dense_1')(Ad) Ad = Dropout(dropoutrate)(Ad) - Ad = Dense(15, activation='relu')(Ad) + Ad = Dense(15, activation='relu', name='ada_dense_2')(Ad) Ad = Dropout(dropoutrate)(Ad) - Ad = Dense(10, activation='relu')(Ad) + Ad = Dense(10, activation='relu', name='ada_dense_3')(Ad) Ad = Dropout(dropoutrate)(Ad) - Ad = Dense(1, activation='sigmoid', name='Add')(Ad) + Ad = Dense(1, activation='sigmoid', name='ada_out')(Ad) # just for compatibility for now Ad1 = Dense( 1, activation='linear', use_bias=False, trainable=False, name='Add1')(Ad) + + #make tunable weight + weight = Reshape((1,1),name='weight_reshape')(XInputs[1]) + weight = LocallyConnected1D( + 1,1, + activation='linear', use_bias=False, + kernel_initializer='zeros', + name="weight_layer", + #kernel_regularizer=boundary_regularizer, + ) (weight) + weight= Flatten()(weight) + weight = GradientReversal(name='weight_reversal', hp_lambda=weight_rev)(weight) + Ad = Concatenate(name='ada_weight')([Ad, weight]) + return Model(inputs=XInputs, outputs=[Ad, Ad1], name="datamc_discriminator") diff --git a/make_plots.py b/make_plots.py index 6654a89..f2e1dee 100644 --- a/make_plots.py +++ b/make_plots.py @@ -4,6 +4,7 @@ matplotlib.use('Agg') import matplotlib.pyplot as plt from argparse import ArgumentParser +from pdb import set_trace parser = ArgumentParser() parser.add_argument('inputdir') @@ -77,7 +78,23 @@ def makeEpochPlot(idstring,fill): ) plt.plot(mc_history['val_btag_discriminator_'+idstring+'_2_mean'],label=dataonmc, c='red') + if fill: + plt.fill_between( + range(nepochs), + mc_history['val_btag_discriminator_'+idstring+'_2_mean']-mc_history['val_btag_discriminator_'+idstring+'_2_std'], + mc_history['val_btag_discriminator_'+idstring+'_2_mean']+mc_history['val_btag_discriminator_'+idstring+'_2_std'], + color='red', + alpha=0.3 + ) plt.plot(mc_history['val_btag_discriminator_'+idstring+'_1_mean'],label=mconmc, c='blueviolet',linestyle=':') + if fill: + plt.fill_between( + range(nepochs), + mc_history['val_btag_discriminator_'+idstring+'_1_mean']-mc_history['val_btag_discriminator_'+idstring+'_1_std'], + mc_history['val_btag_discriminator_'+idstring+'_1_mean']+mc_history['val_btag_discriminator_'+idstring+'_1_std'], + color='blueviolet', + alpha=0.3 + ) plt.plot(data_history['val_btag_discriminator_'+idstring+'_2_mean'],label=dataondata, c='orange') plt.plot(data_history['val_btag_discriminator_'+idstring+'_1_mean'],label=mcondata, c='brown',linestyle=':') @@ -94,9 +111,23 @@ def makeEpochPlot(idstring,fill): makeEpochPlot('loss',True) -makeEpochPlot('weighted_acc',False) - +makeEpochPlot('weighted_binary_accuracy',False) +#plot weights +plt.clf() +plt.plot(da_history.index, da_history.weight_mean, label='fitted weight', color='blue') +plt.fill_between( + da_history.index, + da_history.weight_mean - da_history.weight_std, + da_history.weight_mean + da_history.weight_std, + color='blue', alpha=0.3 + ) +plt.plot([da_history.index.min(), da_history.index.max()], [da_history.real_weight_mean, da_history.real_weight_mean], label='best value', ls='--') +plt.xlabel('epoch') +plt.ylabel('weight') +#plt.legend(loc='best') +plt.grid(True) +plt.savefig('%s/weights%s.png' % (args.inputdir, args.postfix)) from sklearn.metrics import roc_curve, roc_auc_score from scipy.interpolate import InterpolatedUnivariateSpline diff --git a/make_samples.py b/make_samples.py index 3dc741a..dc69287 100644 --- a/make_samples.py +++ b/make_samples.py @@ -1,9 +1,9 @@ import numpy as np +np.random.seed(1234567) from glob import glob +from pdb import set_trace - -def make_sample(input_dir, add_svs): - +def make_sample(input_dir, add_svs, keep_probability=None): #default value tuned for current sample x_file = glob('%s/*features_0.npy' % input_dir)[0] y_file = glob('%s/*truth_1.npy' % input_dir)[0] X_all = np.load(x_file) @@ -20,16 +20,15 @@ def make_sample(input_dir, add_svs): isMC_all = isMC_all.ravel() print(isMC_all.shape) # select MC only - - if False: - np.random.seed(1234) + if True: isMC_all = np.random.randint(2, size=X_all.shape[0]) isMC_all = np.reshape(isMC_all,(isMC_all.shape[0],1)) - X_ptRel = X_all[:, :1]#3 - X_2Ds = X_all[:, 5:6]#8 + X_ptRel = X_all[:, :3] + X_2Ds = X_all[:, 5:8] + X_3Ds = X_all[:, 10:11] - X_ptPro = X_all[:, 15:16]#18 + X_ptPro = X_all[:, 15:18] # now we can increase the smearing # noise = np.random.randn(X_all.shape[0],5)*0.5 # noise2 = np.random.randn(X_all.shape[0],5)*0.5 @@ -47,8 +46,8 @@ def addMCStretch(Xin, stretch, data=False): Xin = np.multiply(Xin,selected) return Xin - # X_2Ds=addMCStretch(X_2Ds, 5.5) - # X_3Ds=addMCStretch(X_3Ds, 5.5) + X_2Ds=addMCStretch(X_2Ds, 0.1) + X_3Ds=addMCStretch(X_3Ds, 0.1) # poisson_b = (np.random.rand(X_all.shape[0], 1) > 0.15) * isB_all @@ -61,5 +60,71 @@ def addMCStretch(Xin, stretch, data=False): # X_2Ds = X_2Ds + noise * X_2Ds #* X_2Ds * (isMC_all<.1) # X_ptRel= noise #* X_3Ds * (isMC_all<.1) # X_ptPro= noise #* X_3Ds * (isMC_all<.1) - return np.concatenate( - [X_ptRel, X_2Ds, X_3Ds, X_ptPro], axis=1), isB_all, isMC_all + + X_all = np.concatenate([X_ptRel, X_2Ds, X_3Ds, X_ptPro], axis=1) + # + # flavour weights stuff + # + ismc = (isMC_all.ravel() > 0.5) + isb = (isB_all.ravel() > 0.5) + #compute scaling to get same composition + cnst = float((isb & np.invert(ismc)).sum())/np.invert(ismc).sum() + nb = (isb & ismc).sum() + nl = (np.invert(isb) & ismc).sum() + fraction = (nb/cnst - nb)/nl + if fraction > 1.: + fraction = -1*nl/(nb/cnst - nb) + print ' -- DATASET --' + print 'Before masking:' + print 'MC fraction: %.3f' % (float(ismc.sum())/ismc.shape[0]) + print 'Data B fraction: %.3f' % (float((isb & np.invert(ismc)).sum())/np.invert(ismc).sum()) + print 'MC B fraction: %.3f' % (float((isb & ismc).sum())/ismc.sum()) + print '\n' + if keep_probability is None: + print 'Forcing fractions to be the same' + keep_probability = fraction + + if keep_probability > 0: + mask = np.invert(ismc) | \ + (ismc & isb) | \ + (ismc & np.invert(isb) & (np.random.rand(isMC_all.shape[0]) < keep_probability)) + else: + mask = np.invert(ismc) | \ + (ismc & np.invert(isb)) | \ + (ismc & isb & (np.random.rand(isMC_all.shape[0]) < -1*keep_probability)) + + X_all = X_all[mask] + isB_all = isB_all[mask] + isMC_all = isMC_all[mask] + ismc = (isMC_all.ravel() > 0.5) + #make data and mc 50% + mc_frac = (float(ismc.sum())/ismc.shape[0]) + if mc_frac > 0.5: + dropout = float(np.invert(ismc).sum())/float(ismc.sum()) + mask = np.invert(ismc) | (ismc & (np.random.rand(isMC_all.shape[0]) < dropout)) + else: + dropout = float(ismc.sum())/float(np.invert(ismc).sum()) + mask = ismc | (np.invert(ismc) & (np.random.rand(isMC_all.shape[0]) < dropout)) + + X_all = X_all[mask] + isB_all = isB_all[mask] + isMC_all = isMC_all[mask] + ismc = (isMC_all.ravel() > 0.5) + isb = (isB_all.ravel() > 0.5) + print 'After masking:' + print 'MC fraction: %.3f' % (float(ismc.sum())/ismc.shape[0]) + print 'Data B fraction: %.3f' % (((isb & np.invert(ismc)).sum())/float(np.invert(ismc).sum())) + print 'MC B fraction: %.3f' % (((isb & ismc).sum())/float(ismc.sum())) + print '\n' + input_weights = ((isb == 1) & ismc).astype(float) + input_weights = input_weights.reshape((input_weights.shape[0],1)) + + nb_mc = (isb & ismc).sum() + nl_mc = (ismc & np.invert(isb)).sum() + data_b_frac = (np.invert(ismc) & isb).sum() / float((np.invert(ismc)).sum()) + print 'MC: #B: %d #L: %d' % (nb_mc, nl_mc) + exp_f = data_b_frac*nl_mc/(nb_mc*(1-data_b_frac)) + exp_f -= 1 + print 'Expected weight: %.3f' % exp_f + + return X_all, isB_all, isMC_all, input_weights, exp_f diff --git a/training_tools.py b/training_tools.py index 51144b3..18326fc 100644 --- a/training_tools.py +++ b/training_tools.py @@ -3,7 +3,7 @@ import numpy as np from sklearn.utils import shuffle from random import randint - +from pdb import set_trace def train_models_adversarial(modellist, x, @@ -15,7 +15,8 @@ def train_models_adversarial(modellist, nstepspermodel=None, custombatchgenerators=None, modify_labels=None, - verbose=0): + verbose=0, + history_addenda={}): ''' modellist,x,y,sample_weight need to be lists, even if only one entry @@ -33,12 +34,15 @@ def train_models_adversarial(modellist, modify_labels: applies user defined function to the label list. Needs to be a list with same length as the model list. This function will be called for each batch, so keep it simple + history_addenda is a string : function dictionary, with the function acting on the model outputing a + float, such float gets added to the output history ''' # create the batches totalsize = x[0].shape[0] ntrainbatches = int((1-validation_split)*totalsize/batch_size) + history = {i: [] for i in history_addenda} split = [] # make split - last split will be val sample for i in range(ntrainbatches): @@ -170,6 +174,9 @@ def randomise_array_lists(): valhist.append(model.test_on_batch(x_val, y_val, sample_weight = sw_splitval)) + for name, fcn in history_addenda.iteritems(): + history[name].append(fcn(model)) + if verbose > 0: for i in range(len(usedmetrics)): print(usedmetrics[i]+": "+str(trainhist[-1][i]), end=' - ') @@ -177,7 +184,6 @@ def randomise_array_lists(): print("val_"+usedmetrics[i]+": "+str(valhist[-1][i]), end=' - ') print('\n') - history = {} # use usedmetrics to make dict out of it for i in range(len(usedmetrics)): diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..f9a136c --- /dev/null +++ b/utils.py @@ -0,0 +1,37 @@ +'''collection of useful functions''' +import os +import numpy as np +import pandas as pd +from fnmatch import fnmatch + +def save(df, fname): + 'saves a pandas DataFrame to a numpy file' + dname = os.path.dirname(fname) + if not os.path.isdir(dname): + os.makedirs(dname) + records = df.to_records(index=False) + records.dtype.names = [str(i) for i in records.dtype.names] + np.save(fname, records) + +def fix_layers(my_model): + 'sets all the layers in a model as non trainable' + for layer in my_model.layers: + layer.trainable = False + return my_model + +def open_layers(my_model): + 'sets all the layers in a model as trainable' + for layer in my_model.layers: + layer.trainable=True + return my_model + +def set_trainable(m, patterns, value=True): + '''sets single layers are (non)-trainable, depending on the name. +POSIX regex allowed as single string or list. Defaults to trainable''' + if isinstance(patterns, basestring): + patterns = [patterns] + for layer in m.layers: + name = layer.name + if any(fnmatch(name, i) for i in patterns): + layer.trainable = value + return m diff --git a/weightedobjectives.py b/weightedobjectives.py new file mode 100644 index 0000000..69007fb --- /dev/null +++ b/weightedobjectives.py @@ -0,0 +1,87 @@ +from keras import backend as K +from tensorflow import where, greater, abs, zeros_like, exp +import tensorflow as tf +from keras import losses +from keras import metrics +from pdb import set_trace + +def tf2np(fcn, session, *args): + '''tf2np - TensorFlow to NumPy +Allows to use TF-based objective functions with numpy inputs and outputs to ease debugging. +Usage tf2np(fcn, session, *args): +fcn - TF-based function (accepts and handles tensors) +session - TF session +*args - TF-based function arguments, NumPy format + ''' + args = [tf.convert_to_tensor(i) for i in args] + return fcn(*args).eval(session=session) + +def binary_crossentropy_labelweights(y_true, y_pred): + """Modification of the binary cross-entropy to use fitted weights, +which are passed as one additional column(s) to the y_pred + """ + printAll=False + + if printAll: + print('in binary_crossentropy_labelweights_Delphes') + + # the prediction if it is data or MC is in the first index (see model) + isMCpred = y_pred[:,:1] + + #the weights are in the remaining parts of the vector + Weightpred = y_pred[:,1:] + + # the truth if it is data or MC + isMCtrue = y_true[:,:1] + # labels: B, C, UDSG - not needed here, but maybe later + # labels_true = y_true[:,1:] + if printAll: + print('isMCpred ', isMCpred) + print('Weightpred ', Weightpred) + print('isMCtrue ', isMCtrue) + + #only apply label weight deltas to MC, for data will be 1 (+1) + #as a result of locally connected if will be only !=0 for one label + #set_trace() + nmc = K.sum(isMCtrue) + flat_mc = K.flatten(isMCtrue) + #compute weights for all + #set_trace() + mc_weights = K.clip(flat_mc * K.sum(Weightpred, axis=1)+1, K.epsilon(), 5) + #remove data and compute normalization + mc_weights -= (1-flat_mc) + mc_factor = nmc/K.sum(mc_weights) + mc_weights *= mc_factor + #add back data + weights = mc_weights+(1-K.flatten(isMCtrue)) + weighted_xentr = weights*K.flatten(losses.binary_crossentropy(isMCtrue, isMCpred)) + + if printAll: + print('weightsum', weightsum) + print('weighted_xentr', weighted_xentr.get_shape()) + + return weighted_xentr + + +def binary_accuracy_labelweights(y_true, y_pred): + '''Modification of the binary accuracy to use fitted weights, +which are passed as one additional column(s) to the y_pred + ''' + acc = metrics.binary_accuracy(y_true, y_pred[:,:1]) + fitted_weights = y_pred[:,1:] + + #only apply label weight deltas to MC, for data will be 1 (+1) + #as a result of locally connected if will be only !=0 for one label + #set_trace() + nmc = K.sum(y_true) + flat_mc = K.flatten(y_true) + #compute weights for all + mc_weights = K.clip(flat_mc * K.sum(fitted_weights, axis=1)+1, K.epsilon(), 5) + #remove data and compute normalization + mc_weights -= (1-flat_mc) + mc_factor = nmc/K.sum(mc_weights) + mc_weights *= mc_factor + #add back data + weights = mc_weights+(1-K.flatten(flat_mc)) + weighted_acc = tf.cast(weights, tf.float32)*K.flatten(acc) + return weighted_acc