diff --git a/AdaptMeDelphes.py b/AdaptMeDelphes.py
index b65082c..3167cbf 100644
--- a/AdaptMeDelphes.py
+++ b/AdaptMeDelphes.py
@@ -11,7 +11,9 @@
 from sklearn.model_selection import train_test_split
 from keras.models import Model
 from keras.layers import Input
+from keras.metrics import binary_accuracy
 from Layers import *
+from weightedobjectives import binary_crossentropy_labelweights, binary_accuracy_labelweights
 global_loss_list={}
 
 parser = ArgumentParser()
@@ -39,6 +41,12 @@
 parser.add_argument("--nepochscorr", help="number of epochs for MC correction", type=int, default=100)
 parser.add_argument("--batchsize", help="batch size", type=int, default=10000)
 parser.add_argument("--runIC", action='store_true')
+parser.add_argument("--skewfraction", help='fraction to skew data samples, >0 dowsamples lights, <0 downsamples bs', 
+                    type=float, default=None)
+parser.add_argument("--pretrainbtag", help='number of epochs to train the b-tagging part', type=int, default=0)
+parser.add_argument("--pretraindatamc", help='number of epochs to train the data/mc part', type=int, default=0)
+parser.add_argument("--fixweights"  , action='store_true', help='fix label fractions weights')
+parser.add_argument("--rightweights", action='store_true', help='set starting label fractions weights to the right value')
 
 args = parser.parse_args()
 
@@ -52,7 +60,6 @@
         print "Using GPU: ",os.environ['CUDA_VISIBLE_DEVICES']
     except ImportError:
         pass
-
 else:
     print "LXPLUS settings"
     if args.gpu<0:
@@ -67,7 +74,6 @@
         os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu)
         print('running on GPU '+str(args.gpu))
 
-
 if args.gpufraction>0 and args.gpufraction<1:
     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpufraction)
     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
@@ -79,17 +85,13 @@
 from make_samples import make_sample
 import pandas as pd
 from keras.optimizers import Adam, SGD
+from utils import save, fix_layers, open_layers, set_trainable
 
-
-def save(df, fname):
-    dname = os.path.dirname(fname)
-    if not os.path.isdir(dname):
-        os.makedirs(dname)
-    records = df.to_records(index=False)
-    records.dtype.names = [str(i) for i in records.dtype.names]
-    np.save(fname, records)
-    
-
+def get_weight(model):
+    session = K.get_session()
+    sub_models = [i for i in model.layers if hasattr(i, 'layers')]
+    weight_layer = [i for m in sub_models for i in m.layers if i.name == 'weight_layer'][0]
+    return weight_layer.weights[0].eval(session=session).ravel()[0]
 
 def schedule(x):
     lr=args.lr
@@ -104,17 +106,26 @@ def schedule(x):
 
 
 def run_model(outdir, training_method=1):
-        
     nepochstotal=args.nepochs 
     nepochscorr=args.nepochscorr
     batchsize=args.batchsize
-
+    metrics_dict = {
+        'btag_discriminator' : binary_accuracy,
+        'datamc_discriminator' : [binary_accuracy_labelweights, binary_accuracy],
+        }    
     
-    Inputs = Input((10,))
+    Inputs = [Input((10,)), Input((1,))]
         
     global_loss_list['GradientReversal']=GradientReversal()
-    X_traintest, isB_traintest , isMC_traintest = make_sample(args.indir, args.addsv)
-    X_all, X_test, isB_all, isB_test, isMC_all, isMC_test = train_test_split(X_traintest, isB_traintest , isMC_traintest, test_size=0.1, random_state=42)
+
+    X_traintest, isB_traintest , isMC_traintest, weight_in_traintest, exp_weight = make_sample(
+                args.indir, args.addsv, args.skewfraction
+        )
+    get_expected_weight = lambda x: exp_weight #dummy function
+
+    X_all, X_test, isB_all, isB_test, isMC_all, isMC_test, weight_in_all, weight_in_test = train_test_split(
+                X_traintest, isB_traintest , isMC_traintest, weight_in_traintest, test_size=0.1, random_state=42
+        )
 
     
     onesarray=np.ones(isB_all.shape[0])
@@ -125,12 +136,12 @@ def run_model(outdir, training_method=1):
     #for compatibility
     def modelIverseGrad(Inputs,rev_grad):
         #build the models
-        feat = common_features(Inputs)
-        feat_outshape = Input(feat.get_layer(index=-1).output_shape)
+        feat = common_features(Inputs[0])
+        feat_outshape = Input(shape=(feat.get_layer(index=-1).output_shape[1:]))
         btag = btag_discriminator(feat_outshape,dropoutrate=0.03)
-        datamc = datamc_discriminator(feat_outshape,rev_grad=rev_grad,dropoutrate=0.03)
+        datamc = datamc_discriminator([feat_outshape, Inputs[1]],rev_grad=rev_grad,dropoutrate=0.03)
         
-        return Model(inputs=Inputs, outputs=btag(feat(Inputs))+datamc(feat(Inputs)),name='fullModel')
+        return Model(inputs=Inputs, outputs=btag(feat(Inputs[0]))+datamc([feat(Inputs[0]), Inputs[1]]),name='fullModel')
     
     model = modelIverseGrad(Inputs,rev_grad=args.lmb)
     
@@ -138,29 +149,31 @@ def modelIverseGrad(Inputs,rev_grad):
     
     if(training_method == 'MC_training'):
         model.compile(
-            loss=['binary_crossentropy']*4,
-            optimizer=optimizer,
-            loss_weights=[1., 0., 0., 0.],
-            weighted_metrics=['accuracy']
-            )
+            loss = ['binary_crossentropy']*2+[binary_crossentropy_labelweights, 'binary_crossentropy'],
+            optimizer=optimizer, 
+            loss_weights=[1.,0.,0.,0.],
+            weighted_metrics = metrics_dict
+        )
         history = model.fit(
-            X_all,
-            [isB_all, isB_all, isMC_all, isMC_all],
+            [X_all, weight_in_all],
+            [isB_all, isB_all, isMC_all, isMC_all], 
             batch_size=batchsize, epochs=nepochstotal, verbose=2, validation_split=0.2,
-            sample_weight=[isMC_all.ravel(), 1-isMC_all.ravel(), 
-                   1+0.5*isB_all.ravel(), 1-0.5*isB_all.ravel()],
-            )
- 
-         
+            sample_weight = [
+                isMC_all.ravel(),
+                1-isMC_all.ravel(), 
+                1+0.5*isB_all.ravel(), 
+                1-0.5*isB_all.ravel()],
+            
+        )
     elif(training_method == 'data_training'):
         model.compile(
-            loss=['binary_crossentropy']*4,
+            loss = ['binary_crossentropy']*2+[binary_crossentropy_labelweights, 'binary_crossentropy'],
             optimizer=optimizer, 
             loss_weights=[0.,1.,0.,0.],
-            weighted_metrics=['accuracy']
+            weighted_metrics = metrics_dict
         )
         history = model.fit(
-            X_all,
+            [X_all, weight_in_all],
             [isB_all, isB_all, isMC_all, isMC_all], 
             batch_size=batchsize, epochs=nepochstotal, verbose=2, validation_split=0.2,
             sample_weight = [
@@ -174,62 +187,142 @@ def modelIverseGrad(Inputs,rev_grad):
         
         from block_models import mc_correction
         
-        da_epochs = nepochstotal
+        da_epochs = nepochstotal - args.pretrainbtag - args.pretraindatamc
         
         if training_method == 'corrected_domain_adaptation':
             da_epochs=nepochstotal-nepochscorr
         
         #build the models
-        feat = common_features(Inputs)
+        feat = common_features(Inputs[0])
         feat_outshape = Input(shape=(feat.get_layer(index=-1).output_shape[1:]))
         btag = btag_discriminator(feat_outshape,dropoutrate=0.03)
-        datamc = datamc_discriminator(feat_outshape,rev_grad=args.lmb,dropoutrate=0.03)
-        
-            
-        modelallopen = Model(inputs=Inputs, outputs=btag(feat(Inputs))+datamc(feat(Inputs)),name='fullModel')
+        datamc = datamc_discriminator([feat_outshape, Inputs[1]],rev_grad=args.lmb,dropoutrate=0.03)
+        #fix the data/mc discriminator, leave weights trainable
+        datamc = set_trainable(datamc, 'ada_*', False)
+
+        if args.rightweights:
+            weight_layer = [i for i in datamc.layers if i.name == 'weight_layer'][0]
+            sess.run(
+                weight_layer.weights[0].assign(
+                       np.array([[[exp_weight]]])
+		       )
+            )
 
-        #print(feat.summary())
-        #print(btag.summary())
-        #print(datamc.summary())
+        if args.fixweights:
+            datamc = set_trainable(datamc, 'weight_*', False)
+
+        modelallopen = Model(inputs=Inputs, outputs=btag(feat(Inputs[0]))+datamc([feat(Inputs[0]), Inputs[1]]),name='fullModel')
+
+        #pre-train b-tagger
+        hist_prebtag = {}
+        if args.pretrainbtag:
+            print 'pre-training the b-tag classifier'
+            datamc = set_trainable(datamc, 'weight_*', False)
+            modelallopen.compile(
+                loss = ['binary_crossentropy']*2+[binary_crossentropy_labelweights, 'binary_crossentropy'],
+                optimizer = Adam(lr=args.lr),
+                loss_weights = [1.,0.,0.,0.],
+                weighted_metrics = metrics_dict
+            )
+
+            hist_prebtag = modelallopen.fit(
+                [X_all, weight_in_all],
+                [isB_all, isB_all, isMC_all, isMC_all], 
+                batch_size=batchsize, epochs=args.pretrainbtag, 
+                verbose=2, validation_split=0.2,
+                sample_weight = [
+                    isMC_all.ravel(),
+                    1-isMC_all.ravel(), 
+                    onesarray, 
+                    onesarray],
+            ).history
+            hist_prebtag['weight'] = [get_weight(modelallopen)]*len(hist_prebtag['loss'])
+            hist_prebtag['real_weight'] = [get_expected_weight(modelallopen)]*len(hist_prebtag['loss'])
+            datamc = set_trainable(datamc, 'weight_*', not args.fixweights)
     
         modelallopen.compile(
-            loss = ['binary_crossentropy']*4,
+            loss = ['binary_crossentropy']*2+[binary_crossentropy_labelweights, 'binary_crossentropy'],
             optimizer = optimizer, 
             loss_weights = [1.,0.,args.weight,0.],
-            weighted_metrics=['accuracy']
+            weighted_metrics = metrics_dict
         )
         
-    
         feat.trainable = False
         btag.trainable = False
+        #set data/mc discriminator as trainable, fix weights
+        datamc = set_trainable(datamc, 'ada_*', True)
+        datamc = set_trainable(datamc, 'weight_*', False) 
         
-        modelfixedbtag = Model(inputs=Inputs, outputs=btag(feat(Inputs))+datamc(feat(Inputs)),name='fullModel2')
+        modelfixedbtag = Model(inputs=Inputs, outputs=btag(feat(Inputs[0]))+datamc([feat(Inputs[0]), Inputs[1]]),name='fullModel2')
         
+        #pre-train data/mc
+        hist_predatamc = {}
+        if args.pretraindatamc:
+            print 'pre-training the data/MC classifier'
+
+            modelfixedbtag.compile(
+                loss = ['binary_crossentropy']*2+[binary_crossentropy_labelweights, 'binary_crossentropy'],
+                optimizer = Adam(lr=args.lr),
+                loss_weights = [0.,0.,args.weight,0.],
+                weighted_metrics = metrics_dict
+            )                                
+            hist_predatamc = modelfixedbtag.fit(
+                [X_all, weight_in_all],
+                [isB_all, isB_all, isMC_all, isMC_all], 
+                batch_size=batchsize, epochs=args.pretraindatamc,
+                verbose=2, validation_split=0.2,
+                sample_weight = [
+                    isMC_all.ravel(),
+                    1-isMC_all.ravel(), 
+                    onesarray, 
+                    onesarray],
+            ).history
+            hist_predatamc['weight'] = [get_weight(modelfixedbtag)]*len(hist_predatamc['loss'])
+            hist_predatamc['real_weight'] = [get_expected_weight(modelfixedbtag)]*len(hist_predatamc['loss'])
+                        
+
         modelfixedbtag.compile(
-            loss = ['binary_crossentropy']*4,
+            loss = ['binary_crossentropy']*2+[binary_crossentropy_labelweights, 'binary_crossentropy'],
             optimizer = optimizer, 
             loss_weights = [1.,0.,args.weight,0.],
-            weighted_metrics=['accuracy']
-        )
-        
+            weighted_metrics = metrics_dict
+        )                                
+                        
         from training_tools import train_models_adversarial
-        history=train_models_adversarial([modelallopen,modelfixedbtag], 
-                                [X_all], 
-                                [isB_all, isB_all, isMC_all, isMC_all], 
-                                da_epochs, 
-                                sample_weight=[isMC_all.ravel(),
-                                               1-isMC_all.ravel(), 
-                                               onesarray,
-                                               onesarray], 
-                                batch_size=batchsize, 
-                                validation_split=0.2, 
-                                nstepspermodel=None, 
-                                custombatchgenerators=None, 
-                                verbose=1)
+        history=train_models_adversarial(
+            [modelallopen,modelfixedbtag], 
+            [X_all, weight_in_all], 
+            [isB_all, isB_all, isMC_all, isMC_all], 
+            da_epochs, 
+            sample_weight=[
+                isMC_all.ravel(),
+                1-isMC_all.ravel(), 
+                onesarray,
+                onesarray], 
+            batch_size=batchsize, 
+            validation_split=0.2, 
+            nstepspermodel=None, 
+            custombatchgenerators=None, 
+            verbose=1,
+            history_addenda = {
+                'weight' : get_weight,
+                'real_weight' : get_expected_weight,
+                }
+            )
 
         print ('history type ', type(history))
         history=pd.DataFrame(history)
-        
+        #collate histories
+        if args.pretraindatamc:
+            history = pd.concat((
+                pd.DataFrame(hist_predatamc),
+                history
+            ))
+        if args.pretrainbtag:
+            history = pd.concat((
+                pd.DataFrame(hist_prebtag),
+                history
+            ))
         model=modelallopen
         
         if(training_method == 'corrected_domain_adaptation'):
@@ -307,9 +400,7 @@ def generate_noise(nsamples):
             model=mcmodel
             
             noise=generate_noise(X_test.shape[0])
-            X_test=[X_test,isMC_test,noise,isB_all]
-        
-        
+            X_test=[X_test,isMC_test,noise,isB_all]        
     else:
         raise ValueError('%s is an unknown run option' % training_method)
 
@@ -319,13 +410,23 @@ def generate_noise(nsamples):
     if args.nopred:
         return history
 
-    predictions = model.predict(X_test)
+    predictions = model.predict([X_test, weight_in_test])
     preds = pd.DataFrame()
     preds['prediction'] = predictions[0].ravel()
     preds['isB'] = isB_test
     preds['isMC'] = isMC_test
     save(preds, '%s/predictions.npy' %outdir)
     
+    from shutil import copyfile
+    import os
+    
+    copyfile(
+        os.path.abspath(__file__),
+        '%s/script.py' % outdir
+    )
+
+    with open('%s/opts.txt' % outdir, 'w') as out:
+        out.write(args.__str__())
     
     return history
 
diff --git a/block_models.py b/block_models.py
index 180286d..c2eaaa3 100644
--- a/block_models.py
+++ b/block_models.py
@@ -1,5 +1,6 @@
 from keras.initializers import RandomNormal
-from keras.layers import Dense, Dropout, Multiply, Add, Concatenate
+from keras.layers import Dense, Dropout, Multiply, Add, \
+    Concatenate, Reshape, LocallyConnected1D, Flatten
 from keras.models import Model
 from Layers import GradientReversal
 
@@ -15,30 +16,44 @@ def common_features(Inputs, dropoutrate=0.03):
     X = Inputs
     X = Dense(30, activation='relu', name='common_dense_0')(X)
     X = Dropout(dropoutrate)(X)
-    X = Dense(20, activation='relu')(X)
+    X = Dense(20, activation='relu', name='common_dense_1')(X)
     X = Dropout(dropoutrate)(X)
-    X = Dense(20, activation='relu')(X)
+    X = Dense(20, activation='relu', name='common_dense_2')(X)
     X = Dropout(dropoutrate)(X)
-    X = Dense(15, activation='relu')(X)
+    X = Dense(15, activation='relu', name='common_dense_3')(X)
     X = Dropout(dropoutrate)(X)
     return Model(inputs=Inputs, outputs=X, name='common_features')
 
 
-def datamc_discriminator(XInputs, rev_grad=10, dropoutrate=0.03):
-    Ad = GradientReversal(hp_lambda=rev_grad, name='Ad_gradrev')(XInputs)
-    Ad = Dense(20, activation='relu')(Ad)
+def datamc_discriminator(XInputs, rev_grad=10, dropoutrate=0.03, weight_rev=10.):
+    Ad = GradientReversal(hp_lambda=rev_grad, name='ada_gradrev')(XInputs[0])
+    Ad = Dense(20, activation='relu', name='ada_dense_0')(Ad)
     Ad = Dropout(dropoutrate)(Ad)
-    Ad = Dense(15, activation='relu')(Ad)
+    Ad = Dense(15, activation='relu', name='ada_dense_1')(Ad)
     Ad = Dropout(dropoutrate)(Ad)
-    Ad = Dense(15, activation='relu')(Ad)
+    Ad = Dense(15, activation='relu', name='ada_dense_2')(Ad)
     Ad = Dropout(dropoutrate)(Ad)
-    Ad = Dense(10, activation='relu')(Ad)
+    Ad = Dense(10, activation='relu', name='ada_dense_3')(Ad)
     Ad = Dropout(dropoutrate)(Ad)
-    Ad = Dense(1, activation='sigmoid', name='Add')(Ad)
+    Ad = Dense(1, activation='sigmoid', name='ada_out')(Ad)
     # just for compatibility for now
     Ad1 = Dense(
         1, activation='linear', use_bias=False, trainable=False,
         name='Add1')(Ad)
+
+    #make tunable weight
+    weight = Reshape((1,1),name='weight_reshape')(XInputs[1])
+    weight = LocallyConnected1D(
+	1,1, 
+	activation='linear', use_bias=False, 
+	kernel_initializer='zeros',
+	name="weight_layer",
+	#kernel_regularizer=boundary_regularizer,
+	) (weight)    
+    weight= Flatten()(weight)
+    weight = GradientReversal(name='weight_reversal', hp_lambda=weight_rev)(weight)
+    Ad = Concatenate(name='ada_weight')([Ad, weight]) 
+
     return Model(inputs=XInputs, outputs=[Ad, Ad1], name="datamc_discriminator")
 
 
diff --git a/make_plots.py b/make_plots.py
index 6654a89..f2e1dee 100644
--- a/make_plots.py
+++ b/make_plots.py
@@ -4,6 +4,7 @@
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 from argparse import ArgumentParser
+from pdb import set_trace
 
 parser = ArgumentParser()
 parser.add_argument('inputdir')
@@ -77,7 +78,23 @@ def makeEpochPlot(idstring,fill):
 			)
 	
 	plt.plot(mc_history['val_btag_discriminator_'+idstring+'_2_mean'],label=dataonmc, c='red')
+	if fill:
+		plt.fill_between(
+			range(nepochs), 
+			mc_history['val_btag_discriminator_'+idstring+'_2_mean']-mc_history['val_btag_discriminator_'+idstring+'_2_std'], 
+			mc_history['val_btag_discriminator_'+idstring+'_2_mean']+mc_history['val_btag_discriminator_'+idstring+'_2_std'], 
+			color='red',
+			alpha=0.3
+			)
 	plt.plot(mc_history['val_btag_discriminator_'+idstring+'_1_mean'],label=mconmc, c='blueviolet',linestyle=':')
+	if fill:
+		plt.fill_between(
+			range(nepochs), 
+			mc_history['val_btag_discriminator_'+idstring+'_1_mean']-mc_history['val_btag_discriminator_'+idstring+'_1_std'], 
+			mc_history['val_btag_discriminator_'+idstring+'_1_mean']+mc_history['val_btag_discriminator_'+idstring+'_1_std'], 
+			color='blueviolet',
+			alpha=0.3
+			)
 	plt.plot(data_history['val_btag_discriminator_'+idstring+'_2_mean'],label=dataondata, c='orange')
 	plt.plot(data_history['val_btag_discriminator_'+idstring+'_1_mean'],label=mcondata, c='brown',linestyle=':')
 	
@@ -94,9 +111,23 @@ def makeEpochPlot(idstring,fill):
 	
 
 makeEpochPlot('loss',True)
-makeEpochPlot('weighted_acc',False)
-
+makeEpochPlot('weighted_binary_accuracy',False)
 
+#plot weights
+plt.clf()
+plt.plot(da_history.index, da_history.weight_mean, label='fitted weight', color='blue')
+plt.fill_between(
+	da_history.index,
+	da_history.weight_mean - da_history.weight_std,
+	da_history.weight_mean + da_history.weight_std,
+	color='blue', alpha=0.3
+	)
+plt.plot([da_history.index.min(), da_history.index.max()], [da_history.real_weight_mean, da_history.real_weight_mean], label='best value', ls='--')
+plt.xlabel('epoch')
+plt.ylabel('weight')
+#plt.legend(loc='best')
+plt.grid(True)
+plt.savefig('%s/weights%s.png' % (args.inputdir, args.postfix))
 
 from sklearn.metrics import roc_curve, roc_auc_score
 from scipy.interpolate import InterpolatedUnivariateSpline
diff --git a/make_samples.py b/make_samples.py
index 3dc741a..dc69287 100644
--- a/make_samples.py
+++ b/make_samples.py
@@ -1,9 +1,9 @@
 import numpy as np
+np.random.seed(1234567)
 from glob import glob
+from pdb import set_trace
 
-
-def make_sample(input_dir, add_svs):
-
+def make_sample(input_dir, add_svs, keep_probability=None): #default value tuned for current sample
     x_file = glob('%s/*features_0.npy' % input_dir)[0]
     y_file = glob('%s/*truth_1.npy' % input_dir)[0]
     X_all = np.load(x_file)
@@ -20,16 +20,15 @@ def make_sample(input_dir, add_svs):
     isMC_all = isMC_all.ravel()
     print(isMC_all.shape)
     # select MC only
-    
-    if False:
-        np.random.seed(1234)
+    if True:
         isMC_all = np.random.randint(2, size=X_all.shape[0])
         isMC_all = np.reshape(isMC_all,(isMC_all.shape[0],1))
 
-    X_ptRel = X_all[:, :1]#3
-    X_2Ds = X_all[:, 5:6]#8
+    X_ptRel = X_all[:, :3]
+    X_2Ds = X_all[:, 5:8]
+
     X_3Ds = X_all[:, 10:11]
-    X_ptPro = X_all[:, 15:16]#18
+    X_ptPro = X_all[:, 15:18]
     # now we can increase the smearing
     # noise = np.random.randn(X_all.shape[0],5)*0.5
     # noise2 = np.random.randn(X_all.shape[0],5)*0.5
@@ -47,8 +46,8 @@ def addMCStretch(Xin, stretch, data=False):
         Xin = np.multiply(Xin,selected)
         return Xin
 
-    # X_2Ds=addMCStretch(X_2Ds, 5.5)
-    # X_3Ds=addMCStretch(X_3Ds, 5.5)
+    X_2Ds=addMCStretch(X_2Ds, 0.1)
+    X_3Ds=addMCStretch(X_3Ds, 0.1)
     
     
     # poisson_b = (np.random.rand(X_all.shape[0], 1) > 0.15) * isB_all
@@ -61,5 +60,71 @@ def addMCStretch(Xin, stretch, data=False):
     # X_2Ds =  X_2Ds + noise * X_2Ds #* X_2Ds * (isMC_all<.1)
     # X_ptRel= noise #* X_3Ds * (isMC_all<.1)
     # X_ptPro= noise #* X_3Ds * (isMC_all<.1)
-    return np.concatenate(
-        [X_ptRel, X_2Ds, X_3Ds, X_ptPro], axis=1), isB_all, isMC_all
+    
+    X_all = np.concatenate([X_ptRel, X_2Ds, X_3Ds, X_ptPro], axis=1)
+    #
+    # flavour weights stuff
+    #
+    ismc = (isMC_all.ravel() > 0.5)
+    isb = (isB_all.ravel() > 0.5)
+    #compute scaling to get same composition
+    cnst = float((isb & np.invert(ismc)).sum())/np.invert(ismc).sum()
+    nb = (isb & ismc).sum()
+    nl = (np.invert(isb) & ismc).sum()
+    fraction = (nb/cnst - nb)/nl
+    if fraction > 1.:
+        fraction = -1*nl/(nb/cnst - nb)
+    print ' -- DATASET --'
+    print 'Before masking:'
+    print 'MC fraction: %.3f' % (float(ismc.sum())/ismc.shape[0])
+    print 'Data B fraction: %.3f' % (float((isb & np.invert(ismc)).sum())/np.invert(ismc).sum())
+    print 'MC B fraction: %.3f' % (float((isb & ismc).sum())/ismc.sum())
+    print '\n'
+    if keep_probability is None:
+        print 'Forcing fractions to be the same'
+        keep_probability = fraction
+
+    if keep_probability > 0:
+        mask = np.invert(ismc) | \
+               (ismc & isb) | \
+               (ismc & np.invert(isb) & (np.random.rand(isMC_all.shape[0]) < keep_probability))
+    else:
+        mask = np.invert(ismc) | \
+               (ismc & np.invert(isb)) | \
+               (ismc & isb & (np.random.rand(isMC_all.shape[0]) < -1*keep_probability))
+    
+    X_all    = X_all[mask]		
+    isB_all  = isB_all[mask]
+    isMC_all = isMC_all[mask]
+    ismc = (isMC_all.ravel() > 0.5)
+    #make data and mc 50%
+    mc_frac = (float(ismc.sum())/ismc.shape[0])
+    if mc_frac > 0.5:
+        dropout = float(np.invert(ismc).sum())/float(ismc.sum())
+        mask = np.invert(ismc) | (ismc & (np.random.rand(isMC_all.shape[0]) < dropout))
+    else:
+        dropout = float(ismc.sum())/float(np.invert(ismc).sum())
+        mask = ismc | (np.invert(ismc) & (np.random.rand(isMC_all.shape[0]) < dropout))
+
+    X_all    = X_all[mask]		
+    isB_all  = isB_all[mask]
+    isMC_all = isMC_all[mask]
+    ismc = (isMC_all.ravel() > 0.5)
+    isb  = (isB_all.ravel() > 0.5)
+    print 'After masking:'
+    print 'MC fraction: %.3f' % (float(ismc.sum())/ismc.shape[0])
+    print 'Data B fraction: %.3f' % (((isb & np.invert(ismc)).sum())/float(np.invert(ismc).sum()))
+    print 'MC B fraction: %.3f' % (((isb & ismc).sum())/float(ismc.sum()))
+    print '\n'
+    input_weights = ((isb == 1) & ismc).astype(float)
+    input_weights = input_weights.reshape((input_weights.shape[0],1))
+
+    nb_mc = (isb & ismc).sum()
+    nl_mc = (ismc & np.invert(isb)).sum()
+    data_b_frac = (np.invert(ismc) & isb).sum() / float((np.invert(ismc)).sum())
+    print 'MC: #B: %d #L: %d' % (nb_mc, nl_mc)
+    exp_f = data_b_frac*nl_mc/(nb_mc*(1-data_b_frac))
+    exp_f -= 1
+    print 'Expected weight: %.3f' % exp_f
+    
+    return X_all, isB_all, isMC_all, input_weights, exp_f
diff --git a/training_tools.py b/training_tools.py
index 51144b3..18326fc 100644
--- a/training_tools.py
+++ b/training_tools.py
@@ -3,7 +3,7 @@
 import numpy as np
 from sklearn.utils import shuffle
 from random import randint
-
+from pdb import set_trace
 
 def train_models_adversarial(modellist,
                              x,
@@ -15,7 +15,8 @@ def train_models_adversarial(modellist,
                              nstepspermodel=None,
                              custombatchgenerators=None,
                              modify_labels=None,
-                             verbose=0):
+                             verbose=0,
+                             history_addenda={}):
     '''
     modellist,x,y,sample_weight need to be lists, even if only one entry
    
@@ -33,12 +34,15 @@ def train_models_adversarial(modellist,
     modify_labels: applies user defined function to the label list. Needs to be a list
     with same length as the model list. This function will be called for each batch, so keep it simple
     
+    history_addenda is a string : function dictionary, with the function acting on the model outputing a
+    float, such float gets added to the output history
     '''
 
     # create the batches
     totalsize = x[0].shape[0]
     ntrainbatches = int((1-validation_split)*totalsize/batch_size)
 
+    history = {i: [] for i in history_addenda}
     split = []
     # make split - last split will be val sample
     for i in range(ntrainbatches):
@@ -170,6 +174,9 @@ def randomise_array_lists():
         valhist.append(model.test_on_batch(x_val, y_val, 
                                            sample_weight = sw_splitval))
 
+        for name, fcn in history_addenda.iteritems():
+            history[name].append(fcn(model))
+
         if verbose > 0:
             for i in range(len(usedmetrics)):
                 print(usedmetrics[i]+": "+str(trainhist[-1][i]), end=' - ')
@@ -177,7 +184,6 @@ def randomise_array_lists():
                 print("val_"+usedmetrics[i]+": "+str(valhist[-1][i]), end=' - ')
             print('\n')
 
-    history = {}
 
     # use usedmetrics to make dict out of it
     for i in range(len(usedmetrics)):
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..f9a136c
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,37 @@
+'''collection of useful functions'''
+import os
+import numpy as np
+import pandas as pd
+from fnmatch import fnmatch
+
+def save(df, fname):
+    'saves a pandas DataFrame to a numpy file'
+    dname = os.path.dirname(fname)
+    if not os.path.isdir(dname):
+        os.makedirs(dname)
+    records = df.to_records(index=False)
+    records.dtype.names = [str(i) for i in records.dtype.names]
+    np.save(fname, records)
+
+def fix_layers(my_model):
+    'sets all the layers in a model as non trainable'
+    for layer in my_model.layers:
+        layer.trainable = False
+    return my_model
+		
+def open_layers(my_model):
+    'sets all the layers in a model as trainable'
+    for layer in my_model.layers:
+        layer.trainable=True
+    return my_model
+
+def set_trainable(m, patterns, value=True):
+    '''sets single layers are (non)-trainable, depending on the name.
+POSIX regex allowed as single string or list. Defaults to trainable'''
+    if isinstance(patterns, basestring):
+        patterns = [patterns]
+    for layer in m.layers:
+        name = layer.name
+        if any(fnmatch(name, i) for i in patterns):
+            layer.trainable = value
+    return m
diff --git a/weightedobjectives.py b/weightedobjectives.py
new file mode 100644
index 0000000..69007fb
--- /dev/null
+++ b/weightedobjectives.py
@@ -0,0 +1,87 @@
+from keras import backend as K
+from tensorflow import where, greater, abs, zeros_like, exp
+import tensorflow as tf
+from keras import losses
+from keras import metrics
+from pdb import set_trace
+
+def tf2np(fcn, session, *args):
+   '''tf2np - TensorFlow to NumPy
+Allows to use TF-based objective functions with numpy inputs and outputs to ease debugging.
+Usage tf2np(fcn, session, *args):
+fcn - TF-based function (accepts and handles tensors)
+session - TF session
+*args - TF-based function arguments, NumPy format
+   '''
+   args = [tf.convert_to_tensor(i) for i in args]
+   return fcn(*args).eval(session=session)
+
+def binary_crossentropy_labelweights(y_true, y_pred):
+    """Modification of the binary cross-entropy to use fitted weights, 
+which are passed as one additional column(s) to the y_pred
+    """
+    printAll=False
+
+    if printAll:
+        print('in binary_crossentropy_labelweights_Delphes')
+    
+    # the prediction if it is data or MC is in the first index (see model)
+    isMCpred = y_pred[:,:1]
+
+    #the weights are in the remaining parts of the vector
+    Weightpred = y_pred[:,1:]
+        
+    # the truth if it is data or MC
+    isMCtrue = y_true[:,:1]
+    # labels: B, C, UDSG - not needed here, but maybe later
+    # labels_true = y_true[:,1:]
+    if printAll:
+        print('isMCpred ', isMCpred)
+        print('Weightpred ', Weightpred)
+        print('isMCtrue ', isMCtrue)
+
+    #only apply label weight deltas to MC, for data will be 1 (+1)
+    #as a result of locally connected if will be only !=0 for one label
+    #set_trace()
+    nmc = K.sum(isMCtrue)
+    flat_mc = K.flatten(isMCtrue)
+    #compute weights for all
+    #set_trace()
+    mc_weights = K.clip(flat_mc * K.sum(Weightpred, axis=1)+1, K.epsilon(), 5)
+    #remove data and compute normalization
+    mc_weights -= (1-flat_mc)
+    mc_factor = nmc/K.sum(mc_weights)
+    mc_weights *= mc_factor
+    #add back data
+    weights = mc_weights+(1-K.flatten(isMCtrue))
+    weighted_xentr = weights*K.flatten(losses.binary_crossentropy(isMCtrue, isMCpred))
+    
+    if printAll:
+        print('weightsum', weightsum)
+        print('weighted_xentr', weighted_xentr.get_shape())
+
+    return weighted_xentr
+
+
+def binary_accuracy_labelweights(y_true, y_pred):
+   '''Modification of the binary accuracy to use fitted weights, 
+which are passed as one additional column(s) to the y_pred
+   '''
+   acc = metrics.binary_accuracy(y_true, y_pred[:,:1])
+   fitted_weights = y_pred[:,1:]
+   
+   #only apply label weight deltas to MC, for data will be 1 (+1)
+   #as a result of locally connected if will be only !=0 for one label
+   #set_trace()
+   nmc = K.sum(y_true)
+   flat_mc = K.flatten(y_true)
+   #compute weights for all
+   mc_weights = K.clip(flat_mc * K.sum(fitted_weights, axis=1)+1, K.epsilon(), 5)
+   #remove data and compute normalization
+   mc_weights -= (1-flat_mc)
+   mc_factor = nmc/K.sum(mc_weights)
+   mc_weights *= mc_factor
+   #add back data
+   weights = mc_weights+(1-K.flatten(flat_mc))
+   weighted_acc = tf.cast(weights, tf.float32)*K.flatten(acc)
+   return weighted_acc