diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 0000000..e496c08
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,22 @@
+name: SAMADhi
+
+on: [push]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python: [3.7, 3.8, 3.9]
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: Setup Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Install tox and SAMADhi
+        run: pip install tox
+      - name: Run tox
+        run: tox -e py
diff --git a/.gitignore b/.gitignore
index 93e2cbf..b65e671 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@ python/__init__.py
 dist
 build
 eggs
+.eggs
 parts
 bin
 var
@@ -26,6 +27,7 @@ pip-log.txt
 .coverage
 .tox
 nosetests.xml
+.pytest_cache
 
 # Translations
 *.mo
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..a8fe8fd
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,85 @@
+exclude: '(^tests/data/|^html|^data)'
+repos:
+
+- repo: https://github.com/psf/black
+  rev: 21.9b0
+  hooks:
+    - id: black
+      language_version: python3
+
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.0.1
+  hooks:
+  - id: check-added-large-files
+  - id: check-case-conflict
+  - id: check-merge-conflict
+  - id: trailing-whitespace
+    exclude: '^ext/jetclasses.patch$'
+  - id: end-of-file-fixer
+  - id: mixed-line-ending
+  - id: check-yaml
+  - id: check-ast
+  - id: fix-byte-order-marker
+  #- id: check-builtin-literals
+  - id: check-toml
+  - id: debug-statements
+
+- repo: https://github.com/pre-commit/pygrep-hooks
+  rev: v1.9.0
+  hooks:
+  - id: python-check-blanket-noqa
+  - id: python-check-blanket-type-ignore
+  - id: python-no-log-warn
+  - id: python-no-eval
+  - id: python-use-type-annotations
+  - id: rst-backticks
+  - id: rst-directive-colons
+  - id: rst-inline-touching-normal
+
+- repo: https://github.com/PyCQA/isort
+  rev: 5.9.3
+  hooks:
+  - id: isort
+    exclude: '^examples/df_nano.py$'
+
+- repo: https://github.com/asottile/pyupgrade
+  rev: v2.29.0
+  hooks:
+  - id: pyupgrade
+    args: ["--py36-plus"]
+
+- repo: https://github.com/asottile/setup-cfg-fmt
+  rev: v1.18.0
+  hooks:
+  - id: setup-cfg-fmt
+
+- repo: https://github.com/asottile/yesqa
+  rev: v1.2.3
+  hooks:
+  - id: yesqa
+    exclude: docs/conf.py
+    additional_dependencies: &flake8_dependencies
+      - flake8-bugbear
+      - flake8-print
+
+- repo: https://github.com/pycqa/flake8
+  rev: 3.9.2
+  hooks:
+  - id: flake8
+    exclude: docs/conf.py
+    additional_dependencies: *flake8_dependencies
+    args: ['--ignore=E501,W503']
+
+- repo: local
+  hooks:
+  - id: disallow-caps
+    name: Disallow improper capitalization
+    language: pygrep
+    entry: PyBind|Numpy|Cmake|CCache|Github|PyTest
+    exclude: .pre-commit-config.yaml
+
+- repo: https://github.com/mgedmin/check-manifest
+  rev: "0.47"
+  hooks:
+  - id: check-manifest
+    stages: [manual]
diff --git a/.travis.yml b/.travis.yml
index 6b82c6f..164870d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,4 +5,5 @@ python:
     - "2.7"
     - "3.6"
     - "3.7"
-script: python --version
+install: pip install tox-travis
+script: tox
diff --git a/README.md b/README.md
index a4f1b78..9ae00a8 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@ Samādhi in Hinduism, Buddhism, Jainism, Sikhism and yogic schools is a higher l
 
 This project is to develop a database to keep track of samples used by our group for CMS data analysis, and of (groups of) analysis results.
 
-A python interface is provided via the STORM package.
+A python interface is provided via the [peewee](http://docs.peewee-orm.com/en/latest/) package.
 
 Setup inside a CMSSW project area:
 ```
@@ -21,13 +21,13 @@ source installdeps_cmssw.sh   ## only on first use
 scram b
 ```
 
-Standalone setup on ingrid:
+For standalone use the python interface can be installed with setuptools or pip,
+e.g. in a [virtual environment](https://packaging.python.org/tutorials/installing-packages/#creating-virtual-environments) with
+```bash
+python -m venv samadhi_env
+source samadhi_env/bin/activate
+pip install git+https://github.com/cp3-llbb/SAMADhi.git
 ```
-source setup_standalone.sh    ## in every new shell
-```
-this will create an install tree and symlink if needed, and otherwise only set some environment variables.
-The python installation used can be customized with the `--python` option (e.g. `--python=/nfs/soft/python/python-2.7.5-sl6_amd64_gcc44/bin/python` on `ingrid-ui2`),
-and the install tree location can be set with the `--install` option.
 
 
 To start the xataface interface in a docker image:
diff --git a/documentation/SAMADhi_examples.py b/documentation/SAMADhi_examples.py
index 126b9cf..ec38642 100644
--- a/documentation/SAMADhi_examples.py
+++ b/documentation/SAMADhi_examples.py
@@ -1,61 +1,80 @@
-from cp3_llbb.SAMADhi import SAMADhi
+from cp3_llbb.SAMADhi.SAMADhi import SAMADhiDB, Sample
+
 
 # Example method to generate a dictionary relating PAT name and luminosity
-# This version is optimized and only load the needed columns. 
+# This version is optimized and only load the needed columns.
 # We also do an implicit join between dataset and sample.
-def getPATlumi(sampletype=u"mc"): # can be u"mc", u"data", u"%"
-  dbstore = SAMADhi.DbStore()
-  pattuples = dbstore.find(SAMADhi.Sample,SAMADhi.Dataset.dataset_id==SAMADhi.Sample.source_dataset_id,
-                           (SAMADhi.Sample.sampletype==u"PAT") & (SAMADhi.Dataset.datatype.like(sampletype)))
-  luminosities = pattuples.values(SAMADhi.Sample.name, SAMADhi.Sample.luminosity)
-  dictionary = {}
-  for name,lumi in luminosities:
-    dictionary[name]=lumi
-  return dictionary
+def getPATlumi(sampletype="mc"):  # can be "mc", "data", "%"
+    with SAMADhiDB() as db:
+        return {
+            smp.name: smp.luminosity
+            for smp in Sample.select(Sample.name, Sample.luminosity).where(
+                (Sample.sampletype == "PAT") & (Sample.source_dataset.datatype % sampletype)
+            )
+        }
+
 
 # Example method to access a PAT based on the path and access results and dataset
-def getPAT(path=u"%"):
-  dbstore = SAMADhi.DbStore()
-  pattuples = dbstore.find(SAMADhi.Sample,(SAMADhi.Sample.sampletype==u"PAT") & (SAMADhi.Sample.path.like(path)))
-  for pattuple in pattuples:
-    print pattuple
-    print "results obtained from that sample:"
-    for res in pattuple.results:
-      print res
-    print "source dataset:"
-    print pattuple.source_dataset
+def getPAT(path="%"):
+    with SAMADhiDB() as db:
+        for pattuple in Sample.select().where((Sample.sampletype == "PAT") & (Sample.path % path)):
+            print(pattuple)
+            print("results obtained from that sample:")
+            for res in pattuple.results:
+                print(res)
+            print("source dataset:")
+            print(pattuple.source_dataset)
+
 
 # Example to access the weight of an event
 def getWeights(dataset, run, event):
-  dbstore = SAMADhi.DbStore()
-  event = dbstore.find(SAMADhi.Event,(SAMADhi.Event.run_number==run) & (SAMADhi.Event.event_number==event) & (SAMADhi.Event.dataset_id==dataset))
-  theEvent = event.one()
-  for w in theEvent.weights:
-    print "weight for process %s (version %d): %g+/-%g"%(w.process.name,w.version,w.value,w.uncertainty)
+    dbstore = SAMADhi.DbStore()
+    event = dbstore.find(
+        SAMADhi.Event,
+        (SAMADhi.Event.run_number == run)
+        & (SAMADhi.Event.event_number == event)
+        & (SAMADhi.Event.dataset_id == dataset),
+    )
+    theEvent = event.one()
+    for w in theEvent.weights:
+        print(
+            "weight for process %s (version %d): %g+/-%g"
+            % (w.process.name, w.version, w.value, w.uncertainty)
+        )
+
 
 # Get a single event weight
 # Note that I think that the getWeights above will be faster than n times this method.
 def getWeight(dataset, run, event, process, version=None):
-  dbstore = SAMADhi.DbStore()
-  weight = dbstore.find(SAMADhi.Weight, SAMADhi.Weight.event_id==SAMADhi.Event.event_id, 
-                        (SAMADhi.Event.run_number==run) & (SAMADhi.Event.event_number==event) & (SAMADhi.Event.dataset_id==dataset) &
-                        (SAMADhi.Weight.madweight_process==process))
-  if version is None: # take the most recent
-    w = weight.order_by(SAMADhi.Weight.version).last()
-  else:
-    w = weight.find(SAMADhi.Weight.version==version).one()
-  return (w.value, w.uncertainty)
+    dbstore = SAMADhi.DbStore()
+    weight = dbstore.find(
+        SAMADhi.Weight,
+        SAMADhi.Weight.event_id == SAMADhi.Event.event_id,
+        (SAMADhi.Event.run_number == run)
+        & (SAMADhi.Event.event_number == event)
+        & (SAMADhi.Event.dataset_id == dataset)
+        & (SAMADhi.Weight.madweight_process == process),
+    )
+    if version is None:  # take the most recent
+        w = weight.order_by(SAMADhi.Weight.version).last()
+    else:
+        w = weight.find(SAMADhi.Weight.version == version).one()
+    return (w.value, w.uncertainty)
+
 
 # In the example above, you need the dataset id. It can be obtained this way
 # It could be combined in a complex query, but typically you will get this once
 # and avoid doing the joined query for every event.
 def dataset_id(dataset=None, pat=None):
-  dbstore = SAMADhi.DbStore()
-  if dataset is None and pat is not None:
-    dset = dbstore.find(SAMADhi.Dataset,SAMADhi.Dataset.dataset_id==SAMADhi.Sample.source_dataset_id,SAMADhi.Sample.name==pat)
-  elif dataset is not None and pat is None:
-    dset = dbstore.find(SAMADhi.Dataset,SAMADhi.Dataset.name==dataset)
-  else:
-    return 0
-  return dset.one().dataset_id
-  
+    dbstore = SAMADhi.DbStore()
+    if dataset is None and pat is not None:
+        dset = dbstore.find(
+            SAMADhi.Dataset,
+            SAMADhi.Dataset.dataset_id == SAMADhi.Sample.source_dataset_id,
+            SAMADhi.Sample.name == pat,
+        )
+    elif dataset is not None and pat is None:
+        dset = dbstore.find(SAMADhi.Dataset, SAMADhi.Dataset.name == dataset)
+    else:
+        return 0
+    return dset.one().dataset_id
diff --git a/installdeps_cmssw.sh b/installdeps_cmssw.sh
index 977a17f..edc08d9 100644
--- a/installdeps_cmssw.sh
+++ b/installdeps_cmssw.sh
@@ -39,22 +39,24 @@ if [ $? -ne 0 ]; then
     python "${pipinstall}/get-pip.py" --prefix="${pipinstall}" --no-setuptools
   fi
   export PYTHONPATH="${pipinstall}/lib/python${pymajmin}/site-packages:${PYTHONPATH}"
+  python -m pip install --prefix="${pipinstall}" --upgrade Cython
 fi
 
 ## install dependencies
 installpath="${CMSSW_BASE}/install/samadhidep"
-echo "--> Installing MySQL-python and storm"
-python -m pip install --prefix="${installpath}" --ignore-installed --upgrade --upgrade-strategy=only-if-needed MySQL-python storm
+echo "--> Installing peewee and pymysql"
+NO_SQLITE=1 python -m pip install --prefix="${installpath}" --ignore-installed --upgrade peewee pymysql pytest pytest-console-scripts future
 
 # root_interface toolfile
 toolfile="${installpath}/samadhidep.xml"
 cat <<EOF_TOOLFILE >"${toolfile}"
-<tool name="samadhidep" version="1.2.0">
+<tool name="samadhidep" version="2.1.0">
   <info url="https://github.com/cp3-llbb/SAMADhi"/>
   <client>
     <environment name="SAMADHIDEP_BASE" default="${installpath}"/>
     <runtime name="LD_LIBRARY_PATH"     value="\$SAMADHIDEP_BASE/lib" type="path"/>
     <runtime name="PYTHONPATH"          value="\$SAMADHIDEP_BASE/lib/python${pymajmin}/site-packages" type="path"/>
+    <runtime name="PATH"                value="\$SAMADHIDEP_BASE/bin" type="path"/>
   </client>
 </tool>
 EOF_TOOLFILE
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..b5e6299
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,9 @@
+[build-system]
+requires = ["setuptools", "wheel", "setuptools_scm[toml]>=6.0"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools_scm]
+
+[tool.black]
+line-length = 100
+target-version = ['py37']
diff --git a/python/SAMADhi.py b/python/SAMADhi.py
index 3d8b424..62fa2fd 100644
--- a/python/SAMADhi.py
+++ b/python/SAMADhi.py
@@ -1,304 +1,388 @@
-try:
-    from storm.locals import *
-except ImportError as error:
-    raise ImportError("Could not import storm, please make sure to install the dependencies (source installdeps_cmssw.sh inside CMSSW, or SAMADhi/install_standalone.sh otherwise): {0}".format(error))
-
-#db store connection
-
-def DbStore(credentials='~/.samadhi'):
-    """create a database object and returns the db store from STORM"""
-
+import datetime
+import os
+import warnings
+from contextlib import contextmanager
+
+from peewee import *
+
+"""
+Object representation of the SAMADhi database tables (based on peewee)
+
+Example:
+>>> from cp3_llbb.SAMADhi.SAMADhi import * ## import models and SAMADhiDB
+>>> with SAMADhiDB():
+>>>     mySamples = Sample.select().where(Sample.author == "MYUSERNAME")
+"""
+
+__all__ = ["loadCredentials", "SAMADhiDB"]  ## models added below
+_models = []  ## list for binding a database
+
+warnings.filterwarnings(
+    "ignore", module="peewee", category=UserWarning, message="Unable to determine MySQL version: .*"
+)
+if os.getenv("CMSSW_VERSION") is not None:
+    """Silence some warnings if inside CMSSW"""
+    for warnMod in ("pysqlite2.dbapi2", "peewee"):
+        warnings.filterwarnings(
+            "ignore",
+            module=warnMod,
+            category=DeprecationWarning,
+            message="Converters and adapters are deprecated. Please use only supported SQLite types. Any type mapping should happen in layer above this module.",
+        )
+
+
+def loadCredentials(path="~/.samadhi"):
     import json, os, stat
-    credentials = os.path.expanduser(credentials)
-    if not os.path.exists(credentials):
-        raise IOError('Credentials file %r not found.' % credentials)
 
+    credentials = os.path.expanduser(path)
+    if not os.path.exists(credentials):
+        raise OSError("Credentials file %r not found." % credentials)
     # Check permission
     mode = stat.S_IMODE(os.stat(credentials).st_mode)
-    if mode != int('400', 8):
-        raise IOError('Credentials file has wrong permission. Please execute \'chmod 400 %s\'' % credentials)
+    if mode != stat.S_IRUSR:
+        raise OSError(
+            "Credentials file has wrong permission. Please execute 'chmod 400 %s'" % credentials
+        )
 
-    with open(credentials, 'r') as f:
+    with open(credentials) as f:
         data = json.load(f)
-
-        login = data['login']
-        password = data['password']
-        hostname = data['hostname'] if 'hostname' in data else 'localhost'
-        database = data['database']
-
-        db_connection_string = "mysql://%s:%s@%s/%s" % (login, password, hostname, database)
-        return Store(create_database(db_connection_string))
-
-#definition of the DB interface classes 
-
-class Dataset(Storm):
-  """Table to represent one sample from DAS
-     on which we run the analysis"""
-  __storm_table__ = "dataset"
-  dataset_id = Int(primary=True)
-  name = Unicode()
-  nevents = Int()
-  dsize = Int()
-  process = Unicode()
-  xsection = Float()
-  cmssw_release = Unicode()
-  globaltag = Unicode()
-  datatype = Unicode()
-  user_comment = Unicode()
-  energy = Float()
-  creation_time = DateTime()
-  samples = ReferenceSet(dataset_id,"Sample.source_dataset_id")
-  
-  def __init__(self, name, datatype):
-    """Initialize a dataset by name and datatype.
-       Other attributes may be null and should be set separately"""
-    self.name = name
-    if datatype==u"mc" or datatype==u"data":
-      self.datatype = datatype
-    else:
-      raise ValueError('dataset type must be mc or data')
-
-  def replaceBy(self, dataset):
-    """Replace one entry, but keep the same key"""
-    self.name = dataset.name
-    self.nevents = dataset.nevents
-    self.dsize = dataset.dsize
-    self.process = dataset.process
-    self.xsection = dataset.xsection
-    self.cmssw_release = dataset.cmssw_release
-    self.globaltag = dataset.globaltag
-    self.datatype = dataset.datatype
-    self.user_comment = dataset.user_comment
-    self.energy = dataset.energy
-    self.creation_time = dataset.creation_time
-  
-  def __str__(self):
-    result  = "Dataset #%s:\n"%str(self.dataset_id)
-    result += "  name: %s\n"%str(self.name)
-    result += "  process: %s\n"%str(self.process)
-    result += "  cross-section: %s\n"%str(self.xsection)
-    result += "  number of events: %s\n"%str(self.nevents)
-    result += "  size on disk: %s\n"%str(self.dsize)
-    result += "  CMSSW release: %s\n"%str(self.cmssw_release)
-    result += "  global tag: %s\n"%str(self.globaltag)
-    result += "  type (data or mc): %s\n"%str(self.datatype)
-    result += "  center-of-mass energy: %s TeV\n"%str(self.energy)
-    result += "  creation time (on DAS): %s\n"%str(self.creation_time)
-    result += "  comment: %s"%str(self.user_comment)
-    return result
-
-class Sample(Storm):
-  """Table to represent one processed sample,
-     typically a PATtupe, skim, RDS, CP, etc."""
-  __storm_table__ = "sample"
-  sample_id = Int(primary=True)
-  name = Unicode()
-  path = Unicode()
-  sampletype = Unicode()
-  nevents_processed = Int()
-  nevents = Int()
-  normalization = Float()
-  event_weight_sum = Float()
-  extras_event_weight_sum = Unicode() #  MEDIUMTEXT in MySQL
-  luminosity = Float()
-  processed_lumi = Unicode() #  MEDIUMTEXT in MySQL
-  code_version = Unicode()
-  user_comment = Unicode()
-  author = Unicode()
-  creation_time = DateTime()
-  source_dataset_id = Int()
-  source_sample_id  = Int()
-  source_dataset = Reference(source_dataset_id, "Dataset.dataset_id")
-  source_sample = Reference(source_sample_id, "Sample.sample_id")
-  derived_samples = ReferenceSet(sample_id,"Sample.source_sample_id") 
-  results = ReferenceSet(sample_id,"SampleResult.sample_id","SampleResult.result_id","Result.result_id")
-  files = ReferenceSet(sample_id, "File.sample_id")
-
-  SampleTypes = [ "PAT", "SKIM", "RDS", "LHCO", "NTUPLES", "HISTOS", "OTHER" ]
-  
-  def __init__(self, name, path, sampletype, nevents_processed):
-    """Initialize a dataset by name and datatype.
-       Other attributes may be null and should be set separately"""
-    self.name = name
-    self.path = path
-    self.nevents_processed = nevents_processed
-    if sampletype in self.SampleTypes:
-      self.sampletype = sampletype
+    if data.get("test", False):
+        if "database" not in data:
+            raise KeyError(f"Credentials json file at {credentials} does not contain 'database'")
     else:
-      raise ValueError('sample type %s is unkwown'%sampletype)
-
-  def replaceBy(self, sample):
-    """Replace one entry, but keep the same key"""
-    self.name = sample.name
-    self.path = sample.path
-    self.sampletype = sample.sampletype
-    self.nevents_processed = sample.nevents_processed
-    self.nevents = sample.nevents
-    self.normalization = sample.normalization
-    self.event_weight_sum = sample.event_weight_sum
-    self.extras_event_weight_sum = sample.extras_event_weight_sum
-    self.luminosity = sample.luminosity
-    self.code_version = sample.code_version
-    self.user_comment = sample.user_comment
-    self.source_dataset_id = sample.source_dataset_id
-    self.source_sample_id = sample.source_sample_id
-    self.author = sample.author
-    self.creation_time = sample.creation_time
-
-  def removeFiles(self, store):
-    store.find(File, File.sample_id == self.sample_id).remove()
-    self.files.clear()
-
-
-  def getLuminosity(self):
-    """Computes the sample (effective) luminosity"""
-    if self.luminosity is not None:
-      return self.luminosity
-    else:
-      if self.source_dataset is not None:
-        if self.source_dataset.datatype=="mc":
-          # for MC, it can be computed as Nevt/xsection
-          if self.nevents_processed is not None and self.source_dataset.xsection is not None:
-            return self.nevents_processed/self.source_dataset.xsection
-        else:
-          # for DATA, it can only be obtained from the parent sample
-          if self.source_sample is not None:
-            return self.source_sample.luminosity()
-    # in all other cases, it is impossible to compute a number.
-    return None
-
-  def __str__(self):
-    result  = "Sample #%s (created on %s by %s):\n"%(str(self.sample_id),str(self.creation_time),str(self.author))
-    result += "  name: %s\n"%str(self.name)
-    result += "  path: %s\n"%str(self.path)
-    result += "  type: %s\n"%str(self.sampletype)
-    result += "  number of processed events: %s\n"%str(self.nevents_processed)
-    result += "  number of events: %s\n"%str(self.nevents)
-    result += "  normalization: %s\n"%str(self.normalization)
-    result += "  sum of event weight: %s\n"%str(self.event_weight_sum)
-    if self.extras_event_weight_sum:
-        result += "  has extras sum of event weight\n"
-    result += "  (effective) luminosity: %s\n"%str(self.luminosity)
-    if self.processed_lumi:
-        result += "  has processed luminosity sections information\n"
-    else:
-        result += "  does not have processed luminosity sections information\n"
-    result += "  code version: %s\n"%str(self.code_version)
-    result += "  comment: %s\n"%str(self.user_comment)
-    result += "  source dataset: %s\n"%str(self.source_dataset_id)
-    result += "  source sample: %s\n"%str(self.source_sample_id)
-    if self.sample_id:
-        result += "  %d files: \n" % (self.files.count())
-        front_files = []
-        last_file = None
-        if self.files.count() > 5:
-            c = 0
-            for f in self.files:
-                if c < 3:
-                    front_files.append(f)
-
-                if c == self.files.count() - 1:
-                    last_file = f
-                c += 1
-        else:
-            front_files = self.files
+        for ky in ("login", "password", "database"):
+            if ky not in data:
+                raise KeyError(f"Credentials json file at {credentials} does not contain '{ky}'")
+        if "hostname" not in data:
+            data["hostname"] = "localhost"
 
-        for f in front_files:
-            result += "    - %s (%d entries)\n" % (str(f.lfn), f.nevents)
-        if last_file:
-            result += "    - ...\n"
-            result += "    - %s (%d entries)\n" % (str(last_file.lfn), last_file.nevents)
-    else:
-        # No way to know if some files are here
-        result += "  no files"
-
-    return result
-
-class Result(Storm):
-  """Table to represent one physics result,
-     combining several samples."""
-  __storm_table__ = "result"
-  result_id = Int(primary=True)
-  path = Unicode()
-  description = Unicode()
-  author = Unicode()
-  creation_time = DateTime()
-  analysis_id = Int()
-  analysis = Reference(analysis_id, "Analysis.analysis_id")
-  elog = Unicode()
-  samples = ReferenceSet(result_id,"SampleResult.result_id","SampleResult.sample_id","Sample.sample_id")
-
-  def __init__(self,path):
-    self.path = path
-
-  def replaceBy(self, result):
-    """Replace one entry, but keep the same key"""
-    self.path = result.path
-    self.description = result.description
-    self.author = result.author
-    self.analysis_id = result.analysis_id
-    self.elog = result.elog
-
-  def __str__(self):
-    result  = "Result in %s \n  created on %s by %s\n  "%(str(self.path),str(self.creation_time),str(self.author))
-    result += "%s"%str(self.description)
-    if self.analysis is not None:
-        result += "\n  part of analysis %s"%str(self.analysis.description)
-    if self.elog is not None:
-        result += "\n  more details in %s"%str(self.elog)
-    return result
-
-class SampleResult(Storm):
-  """Many to many relationship between samples and results."""
-  __storm_table__ = "sampleresult"
-  __storm_primary__ = "sample_id", "result_id"
-  sample_id = Int()
-  result_id = Int()
-
-class File(Storm):
-    __storm_table__ = "file"
-    id = Int(primary=True)
-    sample_id = Int()
-    lfn = Unicode()  # Local file name: /store/
-    pfn = Unicode()  # Physical file name: srm:// or root://
-    event_weight_sum = Float()
-    extras_event_weight_sum = Unicode() #  MEDIUMTEXT in MySQL
-    nevents = Int()
-
-    sample = Reference(sample_id, "Sample.sample_id")
-
-    def __init__(self, lfn, pfn, event_weight_sum, extras_event_weight_sum, nevents):
-        self.lfn = lfn
-        self.pfn = pfn
-        self.event_weight_sum = event_weight_sum
-        self.extras_event_weight_sum = extras_event_weight_sum
-        self.nevents = nevents
+    return data
+
+
+database = DatabaseProxy()
+
+# Code generated by:
+# python -m pwiz -e mysql --host=cp3.irmp.ucl.ac.be --user=llbb --password --info llbb
+# Peewee version: 3.9.4
+class BaseModel(Model):
+    class Meta:
+        database = database
+
+
+class Analysis(BaseModel):
+    id = AutoField(column_name="analysis_id")
+    cadiline = TextField(null=True)
+    contact = TextField(null=True)
+    description = TextField(null=True)
+
+    class Meta:
+        table_name = "analysis"
+
+    def __str__(self):
+        return (
+            "{0.description}\n" "{cadi}" "{contact}" "  Number of associated results: {nresults:d}"
+        ).format(
+            self,
+            cadi=(f"  CADI line: {self.cadiline}\n" if self.cadiline else ""),
+            contact=(f"  Contact/Promotor: {self.contact}\n" if self.contact else ""),
+            nresults=self.results.count(),
+        )
+
+
+class Dataset(BaseModel):
+    """Table to represent one sample from DAS on which we run the analysis
+
+    When creating a Dataset, at least the name and datatype (mc or data) attributes must be specified.
+    """
+
+    cmssw_release = CharField(null=True)
+    creation_time = DateTimeField(null=True)
+    id = AutoField(column_name="dataset_id")
+    datatype = CharField()
+    dsize = BigIntegerField(null=True)
+    energy = FloatField(null=True)
+    globaltag = CharField(null=True)
+    name = CharField(index=True)
+    nevents = IntegerField(null=True)
+    process = CharField(null=True)
+    user_comment = TextField(null=True)
+    xsection = FloatField(null=True)
+
+    class Meta:
+        table_name = "dataset"
+
+    @classmethod
+    def create(cls, **kwargs):
+        """Initialize a dataset by name and datatype. Other attributes may be null and should be set separately"""
+        for rK in ("name", "datatype"):
+            if rK not in kwargs:
+                raise RuntimeError(
+                    f"Argument '{rK}' is required to construct {self.__class__.__name__}"
+                )
+        if kwargs["datatype"] not in ("mc", "data"):
+            raise ValueError("dataset type must be mc or data, not {!r}".format(kwargs["datatype"]))
+        return super().create(**kwargs)
+
+    def __str__(self):
+        return (
+            "Dataset #{0.id:d}:\n"
+            "  name: {0.name}\n"
+            "  process: {0.process}\n"
+            "  cross-section: {xsection}\n"
+            "  number of events: {nevents}\n"
+            "  size on disk: {dsize}\n"
+            "  CMSSW release: {0.cmssw_release}\n"
+            "  global tag: {0.globaltag}\n"
+            "  type (data or mc): {0.datatype}\n"
+            "  center-of-mass energy: {energy} TeV\n"
+            "  creation time (on DAS): {0.creation_time!s}\n"
+            "  comment: {0.user_comment}"
+        ).format(
+            self,
+            nevents=(f"{self.nevents:d}" if self.nevents is not None else "None"),
+            dsize=(f"{self.dsize:d}" if self.dsize is not None else "None"),
+            xsection=(f"{self.xsection:f}" if self.xsection is not None else "None"),
+            energy=(f"{self.energy:f}" if self.energy is not None else "None"),
+        )
+
+
+class Sample(BaseModel):
+    """Table to represent one processed sample, typically a PATtupe, skim, RDS, CP, etc.
+
+    When creating a Sample, at least the name, path, sampletype (any of Sample.SampleTypes)
+    and nevents_processed attributes must be specified.
+    """
+
+    author = TextField(null=True)
+    code_version = CharField(null=True)
+    creation_time = DateTimeField(
+        constraints=[SQL("DEFAULT CURRENT_TIMESTAMP")], default=datetime.datetime.now
+    )
+    event_weight_sum = FloatField(null=True)
+    extras_event_weight_sum = TextField(null=True)
+    luminosity = FloatField(null=True)
+    name = CharField(index=True)
+    nevents = IntegerField(null=True)
+    nevents_processed = IntegerField(null=True)
+    normalization = FloatField(constraints=[SQL("DEFAULT 1")], default=1.0)
+    path = CharField()
+    processed_lumi = TextField(null=True)
+    id = AutoField(column_name="sample_id")
+    sampletype = CharField()
+    source_dataset = ForeignKeyField(Dataset, null=True, backref="samples")
+    source_sample = ForeignKeyField("self", null=True, backref="derived_samples")
+    user_comment = TextField(null=True)
+
+    class Meta:
+        table_name = "sample"
+
+    @property
+    def results(self):
+        return Result.select().join(SampleResult).join(Sample).where(Sample.id == self.id)
+
+    SampleTypes = ["PAT", "SKIM", "RDS", "LHCO", "NTUPLES", "HISTOS", "OTHER"]
+
+    @classmethod
+    def create(cls, **kwargs):
+        for rK in ("name", "path", "sampletype", "nevents_processed"):
+            if rK not in kwargs:
+                raise RuntimeError(
+                    f"Argument '{rK}' is required to construct {self.__class__.__name__}"
+                )
+        if kwargs["sampletype"] not in Sample.SampleTypes:
+            raise ValueError(
+                "sample type {} is unknown (need one of {})".format(
+                    kwargs["sampletype"], ", ".join(Sample.SampleTypes)
+                )
+            )
+        return super().create(**kwargs)
+
+    def removeFiles(self):
+        File.delete().where(File.sample == self).execute()
+
+    def getLuminosity(self):
+        """Computes the sample (effective) luminosity"""
+        if self.luminosity is not None:
+            return self.luminosity
+        else:
+            if self.source_dataset is not None:
+                if self.source_dataset.datatype == "MC":
+                    # for MC, it can be computed as Nevt/xsection
+                    if (
+                        self.nevents_processed is not None
+                        and self.source_dataset.xsection is not None
+                    ):
+                        return self.nevents_processed / self.source_dataset.xsection
+                else:
+                    # for DATA, it can only be obtained from the parent sample
+                    if self.source_sample is not None:
+                        return self.source_sample.luminosity
+        ## in cases not treated above it is impossible to compute a number, so return None
 
     def __str__(self):
-        return "%s"%(self.lfn)
-
-class Analysis(Storm):
-    __storm_table__ = "analysis"
-    analysis_id = Int(primary=True)
-    description = Unicode()
-    cadiline = Unicode()
-    contact = Unicode()
-    results = ReferenceSet(analysis_id, "Result.analysis_id")
-
-    def __init__(self,description):
-        self.description = description
-
-    def replaceBy(self, analysis):
-        self.description = analysis.description
-        self.cadiline = analysis.cadiline
-        self.contact = analysis.contact
-    
+        return (
+            "Sample #{0.id:d} (created on {0.creation_time!s} by {0.author})\n"
+            "  name: {0.name}\n"
+            "  path: {0.path}\n"
+            "  type: {0.sampletype}\n"
+            "  number of processed events: {0.nevents_processed:d}\n"
+            "  number of events: {nevents}\n"
+            "  normalization: {0.normalization}\n"
+            "  sum of event weights: {0.event_weight_sum}\n"
+            "{sumw_extras}"
+            "  (effective) luminosity: : {0.luminosity}\n"
+            "  {hasproclumi} processed luminosity sections information\n"
+            "  code version: {0.code_version}\n"
+            "  comment: {0.user_comment}\n"
+            "{source_dataset}"
+            "{source_sample}"
+            "  {files}"
+        ).format(
+            self,
+            nevents=(f"{self.nevents:d}" if self.nevents is not None else "none"),
+            sumw_extras=(
+                "  has extras sum of event weight\n" if self.extras_event_weight_sum else ""
+            ),
+            hasproclumi=("has" if self.processed_lumi else "does not have"),
+            source_dataset=(
+                f"  source dataset: {self.source_dataset.id:d}\n"
+                if self.source_dataset is not None
+                else ""
+            ),
+            source_sample=(
+                f"  source sample: {self.source_sample.id:d}\n"
+                if self.source_sample is not None
+                else ""
+            ),
+            files=(
+                "{:d} files: \n    - {}".format(
+                    self.files.count(),
+                    "\n    - ".join(
+                        (
+                            "{0.lfn} ({nevents} entries)".format(
+                                fl, nevents=f"{fl.nevents:d}" if fl.nevents is not None else "no"
+                            )
+                            for fl in self.files
+                        )
+                        if self.files.count() < 6
+                        else (
+                            ["{0.lfn} ({0.nevents:d} entries)".format(fl) for fl in self.files[:3]]
+                            + ["...", "{0.lfn} ({0.nevents:d} entries)".format(self.files[-1])]
+                        )
+                    ),
+                )
+                if self.id
+                else "no files"
+            ),
+        )
+
+
+class File(BaseModel):
+    """Table to represent a file (in a sample)
+
+    When creating a File, at least the lfn, pfn, event_weight_sum and nevents attributes must be specified.
+    """
+
+    event_weight_sum = FloatField(null=True)
+    extras_event_weight_sum = TextField(null=True)
+    id = BigAutoField()
+    lfn = CharField()  # Local file name: /store/
+    nevents = BigIntegerField(null=True)
+    pfn = CharField()  # Physical file name: srm:// or root://
+    sample = ForeignKeyField(Sample, backref="files")
+
+    class Meta:
+        table_name = "file"
+
+    @classmethod
+    def create(cls, **kwargs):
+        for rK in ("lfn", "pfn", "event_weight_sum", "nevents"):
+            if rK not in kwargs:
+                raise RuntimeError(
+                    f"Argument '{rK}' is required to construct {self.__class__.__name__}"
+                )
+        return super().create(**kwargs)
+
     def __str__(self):
-        result = "%s\n"%self.description
-        if self.cadiline is not None:
-            result += "  CADI line: %s\n"%self.cadiline
-        if self.contact is not None:
-            result += "  Contact/Promotor: %s\n"%self.contact
-        result += "  Number of associated results: %d"%self.results.count()
-        return result
+        return self.lfn
+
 
+class Result(BaseModel):
+    """Table to represent one physics result, combining several samples.
+
+    When creating a Result, at least the path attribute must be specified.
+    """
+
+    analysis = ForeignKeyField(Analysis, null=True, backref="results")
+    author = TextField(null=True)
+    creation_time = DateTimeField(
+        constraints=[SQL("DEFAULT CURRENT_TIMESTAMP")], default=datetime.datetime.now
+    )
+    description = TextField(null=True)
+    elog = CharField(null=True)
+    path = CharField(index=True)
+    id = AutoField(column_name="result_id")
+
+    class Meta:
+        table_name = "result"
+
+    @property
+    def samples(self):
+        return Sample.select().join(SampleResult).join(Result).where(Result.id == self.id)
+
+    @classmethod
+    def create(cls, **kwargs):
+        for rK in ("path",):
+            if rK not in kwargs:
+                raise RuntimeError(
+                    f"Argument '{rK}' is required to construct {self.__class__.__name__}"
+                )
+        return super().create(**kwargs)
+
+    def __str__(self):
+        return (
+            "Result in {0.path}\n"
+            "  created on {0.creation_time!s} by {0.author}"
+            "{desc}"
+            "{elog}"
+        ).format(
+            self,
+            desc=(f"\n  part of analysis {self.analysis.description}" if self.analysis else ""),
+            elog=(f"\n  more details in {self.elog}" if self.elog else ""),
+        )
+
+
+class SampleResult(BaseModel):
+    result = ForeignKeyField(Result, column_name="result_id")
+    sample = ForeignKeyField(Sample, column_name="sample_id")
+
+    class Meta:
+        table_name = "sampleresult"
+        indexes = ((("sample", "result"), True),)
+        primary_key = CompositeKey("result", "sample")
+
+
+# all models, for binding in SAMADhiDB and import
+_models = [Analysis, Dataset, Sample, File, Result, SampleResult]
+__all__ += _models
+
+
+@contextmanager
+def SAMADhiDB(credentials="~/.samadhi"):
+    """create a database object and returns the db handle from peewee"""
+    cred = loadCredentials(path=credentials)
+    if cred.get("test", False):
+        import os.path
+
+        dbPath = cred["database"]
+        if not os.path.isabs(dbPath):
+            dbPath = os.path.join(
+                os.path.abspath(os.path.dirname(os.path.expanduser(credentials))), dbPath
+            )
+        db = SqliteDatabase(dbPath)
+    else:
+        db = MySQLDatabase(
+            cred["database"], user=cred["login"], password=cred["password"], host=cred["hostname"]
+        )
+    with db.bind_ctx(_models):
+        yield db
diff --git a/python/das_import.py b/python/das_import.py
index 9e217b5..7e27885 100755
--- a/python/das_import.py
+++ b/python/das_import.py
@@ -1,98 +1,85 @@
-import re
+import datetime
 import json
+import re
 import subprocess
 
-from .SAMADhi import Dataset, DbStore
-from .userPrompt import confirm
+from .utils import confirm_transaction
+
 
 def do_das_query(query):
     """
     Execute das_client for the specified query, and return parsed JSON output
     """
 
-    args = ['dasgoclient', '-json', '-format', 'json', '--query', query]
+    args = ["dasgoclient", "-json", "-query", query]
     result = subprocess.check_output(args)
 
     return json.loads(result)
 
-def fillDataset(dataset, dct):
-  """
-  Fill an instance of Dataset with values from a dictionnary
-  """
-  import datetime
-
-  # definition of the conversion key -> column
-  conversion = { "process": u'process', 
-                 "user_comment": u'comment',
-                 "energy": u'energy',
-                 "nevents": u'nevents',
-                 "cmssw_release": u'release',
-                 "dsize": u'file_size',
-                 "globaltag": u'globalTag',
-                 "xsection": u'xsection' }
-
-  for column, key in conversion.items():
-    setattr(dataset, column, dct[key])
-
-  # special cases
-  #dataset.creation_time = datetime.datetime.strptime(dct[u'creation_time'], "%Y-%m-%d %H:%M:%S")
-  dataset.creation_time = datetime.datetime.fromtimestamp(dct[u'creation_time'])
-
-  return dataset
 
 def query_das(dataset):
     """
     Do a DAS request for the given dataset and return the metadata collected
     """
 
-    summary_query  = "summary dataset=%s" % dataset
-    metadata_query  = "dataset=%s" % dataset
-    release_query  = "release dataset=%s" % dataset
-    config_query  = "config dataset=%s system=dbs3" % dataset
+    summary_query = "summary dataset=%s" % dataset
+    metadata_query = "dataset=%s" % dataset
+    release_query = "release dataset=%s" % dataset
+    config_query = "config dataset=%s system=dbs3" % dataset
 
     summary_results = do_das_query(summary_query)
     metadata_results = do_das_query(metadata_query)
     release_results = do_das_query(release_query)
     config_results = do_das_query(config_query)
 
-    if not 'nresults' in summary_results:
-        raise Exception("Invalid DAS response")
-
-    if summary_results['nresults'] > 1:
-        raise Exception("Error: more than one result for DAS query:%d"%summary_results['nresults'])
-
     # Grab results from DAS
     metadata = {}
-    for d in metadata_results["data"][0]["dataset"]:
+    for d in next(
+        entry for entry in metadata_results if "dbs3:dataset_info" in entry["das"]["services"]
+    )["dataset"]:
         for key, value in d.items():
             metadata[key] = value
-    for d in summary_results["data"][0]["summary"]:
+    for d in summary_results[0]["summary"]:
         for key, value in d.items():
             metadata[key] = value
 
     # Set release in global tag
-    metadata.update({
-        u'release': unicode(release_results["data"][0]["release"][0]["name"][0]),
-        u'globalTag': unicode(config_results["data"][0]["config"][0]["global_tag"])
-    })
+    metadata.update(
+        {
+            "release": release_results[0]["release"][0]["name"][0],
+            "globalTag": config_results[0]["config"][0]["global_tag"],
+        }
+    )
 
     # Last chance for the global tag
-    for d in config_results["data"]:
-      if metadata[u'globalTag']==u'UNKNOWN':
-        metadata[u'globalTag']=unicode(d["config"][0]["global_tag"])
-    if metadata[u'globalTag']==u'UNKNOWN':
-      del metadata[u'globalTag']
+    for d in config_results:
+        if metadata["globalTag"] == "UNKNOWN":
+            metadata["globalTag"] = d["config"][0]["global_tag"]
+    if metadata["globalTag"] == "UNKNOWN":
+        del metadata["globalTag"]
 
     return metadata
 
-def import_cms_dataset(dataset, process=None, energy=None, xsection=1.0, comment="", prompt=False):
+
+def import_cms_dataset(
+    dataset,
+    process=None,
+    energy=None,
+    xsection=1.0,
+    comment="",
+    assumeDefault=False,
+    credentials=None,
+):
     """
     Do a DAS request for the given dataset and insert it into SAMAdhi
     """
 
+    if subprocess.call(["voms-proxy-info", "--exists", "--valid", "0:5"]) != 0:
+        raise RuntimeError("No valid proxy found (with at least 5 minutes left)")
+
     # Guess default sane values for unspecifed parameters
     if not process:
-        splitString = dataset.split('/', 2)
+        splitString = dataset.split("/", 2)
         if len(splitString) > 1:
             process = splitString[1]
 
@@ -102,41 +89,219 @@ def import_cms_dataset(dataset, process=None, energy=None, xsection=1.0, comment
             energy = float(energyRe.group(1))
 
     metadata = query_das(dataset)
+    metadata.update(
+        {"process": process, "xsection": xsection, "energy": energy, "comment": comment}
+    )
+    if not all(ky in metadata for ky in ("name", "datatype")):
+        raise RuntimeError(f"Could not find all required keys (name and datatype) in {metadata!s}")
+
+    # definition of the conversion key -> column
+    column_conversion = {
+        "process": "process",
+        "user_comment": "comment",
+        "energy": "energy",
+        "nevents": "nevents",
+        "cmssw_release": "release",
+        "dsize": "file_size",
+        "globaltag": "globalTag",
+        "xsection": "xsection",
+    }
+    # columns of the dataset to create (if needed)
+    dset_columns = {col: metadata[key] for col, key in column_conversion.items()}
+    dset_columns["creation_time"] = (
+        datetime.datetime.fromtimestamp(metadata["creation_time"])
+        if "creation_time" in metadata
+        else None
+    )
+
+    from .SAMADhi import SAMADhiDB, Dataset
+
+    with SAMADhiDB(credentials) as db:
+        existing = Dataset.get_or_none(Dataset.name == metadata["name"])
+        with confirm_transaction(
+            db,
+            "Insert into the database?" if existing is None else "Update this dataset?",
+            assumeDefault=assumeDefault,
+        ):
+            dataset, created = Dataset.get_or_create(
+                name=metadata["name"], datatype=metadata["datatype"], defaults=dset_columns
+            )
+            print(dataset)
+            return dataset
+
+
+def main(args=None):
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Import CMS datasets into SAMADhi")
+    parser.add_argument("dataset", help="CMS dataset")
+    parser.add_argument("-p", "--process", help="Process name")
+    parser.add_argument("--xsection", type=float, default=1.0, help="Cross-section in pb")
+    parser.add_argument("--energy", type=float, dest="energy", help="CoM energy, in TeV")
+    parser.add_argument("--comment", default="", help="User defined comment")
+    parser.add_argument(
+        "--database",
+        default="~/.samadhi",
+        help="JSON Config file with database connection settings and credentials",
+    )
+    parser.add_argument(
+        "-y",
+        "--continue",
+        dest="assumeDefault",
+        action="store_true",
+        help="Insert or replace without prompt for confirmation",
+    )
+    args = parser.parse_args(args=args)
+
+    import_cms_dataset(
+        args.dataset,
+        args.process,
+        args.energy,
+        args.xsection,
+        args.comment,
+        assumeDefault=args.assumeDefault,
+        credentials=args.database,
+    )
+
+
+def get_nanoFile_data(fileName):
+    from cppyy import gbl
+
+    f = gbl.TFile.Open(fileName)
+    if not f:
+        print(f"Warning: could not open file {fileName}")
+        return None, None
+    eventsTree = f.Get("Events")
+    if (not eventsTree) or (not isinstance(eventsTree, gbl.TTree)):
+        print(f"No tree with name 'Events' found in {fileName}")
+        return None, None
+    entries = eventsTree.GetEntries()
+    runs = f.Get("Runs")
+    if (not runs) or (not isinstance(runs, gbl.TTree)):
+        print(f"No tree with name 'Runs' found in {fileName}")
+        return entries, None
+    sums = dict()
+    runs.GetEntry(0)
+    for lv in runs.GetListOfLeaves():
+        lvn = lv.GetName()
+        if lvn != "run":
+            if lv.GetLeafCount():
+                lvcn = lv.GetLeafCount().GetName()
+                if lvcn in sums:
+                    del sums[lvcn]
+                sums[lvn] = [lv.GetValue(i) for i in range(lv.GetLeafCount().GetValueLong64())]
+            else:
+                sums[lvn] = lv.GetValue()
+    for entry in range(1, runs.GetEntries()):
+        runs.GetEntry(entry)
+        for cn, vals in sums.items():
+            if hasattr(vals, "__iter__"):
+                entryvals = getattr(runs, cn)
+                ## warning and workaround (these should be consistent for all NanoAODs in a sample)
+                if len(vals) != len(entryvals):
+                    logger.error(
+                        f"Runs tree: array of sums {cn} has a different length in entry {entry:d}: {len(entryvals):d} (expected {len(vals):d})"
+                    )
+                for i in range(min(len(vals), len(entryvals))):
+                    vals[i] += entryvals[i]
+            else:
+                sums[cn] += getattr(runs, cn)
+    return entries, sums
+
+
+def import_nanoAOD_sample(args=None):
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        "Add a NanoAOD sample based on the DAS path and (optionally) cross-section"
+    )
+    parser.add_argument("path", help="DAS path")
+    parser.add_argument("--xsection", default=1.0, type=float, help="Cross-section value")
+    parser.add_argument("--energy", default=13.0, type=float, help="CoM energy, in TeV")
+    parser.add_argument("-p", "--process", help="Process name")
+    parser.add_argument("--comment", default="", help="User defined comment")
+    parser.add_argument("--datasetcomment", default="", help="User defined comment")
+    parser.add_argument(
+        "--store", required=True, help="root path of the local CMS storage (e.g. /storage/data/cms)"
+    )
+    parser.add_argument(
+        "--database",
+        default="~/.samadhi",
+        help="JSON Config file with database connection settings and credentials",
+    )
+    parser.add_argument(
+        "-y",
+        "--continue",
+        dest="assumeDefault",
+        action="store_true",
+        help="Insert or replace without prompt for confirmation",
+    )
+    args = parser.parse_args(args=args)
+
+    if subprocess.call(["voms-proxy-info", "--exists", "--valid", "0:5"]) != 0:
+        raise RuntimeError("No valid proxy found (with at least 5 minutes left)")
+
+    parent_results = do_das_query(f"parent dataset={args.path}")
+    if not (len(parent_results) == 1 and len(parent_results[0]["parent"]) == 1):
+        raise RuntimeError("Parent dataset query result has an unexpected format")
+    parent_name = parent_results[0]["parent"][0]["name"]
+    source_dataset = import_cms_dataset(
+        parent_name,
+        process=args.process,
+        energy=args.energy,
+        xsection=args.xsection,
+        comment=args.datasetcomment,
+        assumeDefault=args.assumeDefault,
+        credentials=args.database,
+    )
+
+    files_results = do_das_query(f"file dataset={args.path}")
+    nevents = sum(fr["file"][0]["nevents"] for fr in files_results)
+
+    from .SAMADhi import Sample, File, SAMADhiDB
+    import os.path
+
+    ## Next: the add_sample part
+    with SAMADhiDB(credentials=args.database) as db:
+        existing = Sample.get_or_none(Sample.name == args.path)
+        with confirm_transaction(
+            db,
+            "Insert into the database?" if existing is None else f"Replace existing {existing!s}?",
+            assumeDefault=args.assumeDefault,
+        ):
+            sample, created = Sample.get_or_create(
+                name=args.path,
+                path=args.path,
+                defaults={"sampletype": "NTUPLES", "nevents_processed": nevents},
+            )
+            sample.nevents = nevents
+            sample.normalization = 1.0
+            sample.source_dataset = source_dataset
+            sample.source_sample = None
+
+            sample_weight_sum = 0
+            for fRes in files_results:
+                if len(fRes["file"]) != 1:
+                    raise RuntimeError("File result from DAS query has an unexpected format")
+                fileInfo = fRes["file"][0]
+                pfn = os.path.join(args.store, fileInfo["name"].lstrip(os.path.sep))
+                entries, weight_sums = get_nanoFile_data(pfn)
+                # print("For debug: nevents from DAS={0:d}, from file={1:d}".format(fileInfo["nevents"], entries))
+                event_weight_sum = weight_sums["genEventSumw"]
+                # print("All event weight sums: {0!r}".format(weight_sums))
+                sample_weight_sum += event_weight_sum
+                File.create(
+                    lfn=fileInfo["name"],
+                    pfn=pfn,
+                    event_weight_sum=event_weight_sum,
+                    nevents=(entries if entries is not None else 0),
+                    sample=sample,
+                )  ## FIXME extras_event_weight_sum
+
+            sample.event_weight_sum = sample_weight_sum
+            sample.luminosity = sample.getLuminosity()  ## from xsection and sum of weights
+            sample.comment = args.comment
+            sample.author = "CMS"
+            sample.save()
 
-    metadata.update({
-        u"process": unicode(process),
-        u"xsection": xsection, 
-        u"energy": energy,
-        u"comment": unicode(comment)
-    })
-
-    # Connect to the database
-    dbstore = DbStore()
-
-    # Check if the dataset is already in the dataset
-    update = False
-    dbResult = dbstore.find(Dataset, Dataset.name == unicode(metadata['name']))
-    if (dbResult.is_empty()):
-        dataset = Dataset(metadata['name'], metadata['datatype'])
-    else:
-        update = True
-        dataset = dbResult.one()
-
-    fillDataset(dataset, metadata)
-
-    if prompt:
-        if not update:
-            dbstore.add(dataset)
-            dbstore.flush()
-
-        print dataset
-        prompt = "Insert into the database?" if not update else "Update this dataset?"
-        if confirm(prompt=prompt, resp=True):
-            dbstore.commit()
-        else:
-            dbstore.rollback()
-
-    else:
-        if not update:
-            dbstore.add(dataset)
-        dbstore.commit()
+            print(sample)
diff --git a/python/dbAnalysis.py b/python/dbAnalysis.py
new file mode 100755
index 0000000..b9e0bca
--- /dev/null
+++ b/python/dbAnalysis.py
@@ -0,0 +1,599 @@
+#!/usr/bin/env python
+""" Script to do basic checks to the database and output statistics on usage and issues """
+
+import argparse
+import errno
+import json
+import os
+import re
+from collections import defaultdict
+from contextlib import contextmanager
+from datetime import datetime
+
+import numpy as np
+
+from playhouse.shortcuts import model_to_dict
+
+from .das_import import query_das
+from .SAMADhi import Analysis, Dataset
+from .SAMADhi import File as SFile
+from .SAMADhi import Result, SAMADhiDB, Sample
+
+
+@contextmanager
+def openRootFile(fileName, noOp=False, mode="update"):
+    if noOp:
+        yield
+    else:
+        from cppyy import gbl
+
+        rootfile = gbl.TFile.Open(fileName, mode)
+        yield
+        rootfile.Write()
+        rootfile.Close()
+
+
+def json_serialize(obj):
+    if isinstance(obj, datetime):
+        return obj.isoformat()
+    elif isinstance(obj, np.int64):
+        return str(obj)
+    else:
+        try:
+            return model_to_dict(obj)
+        except Exception as ex:
+            raise TypeError(f"Object {obj!r} could not be serialized: {ex}")
+
+
+def saveReportJSON(jReport, outFileName, outDir=".", symlinkDir=None):
+    outFullName = os.path.join(outDir, outFileName)
+    with open(outFullName, "w") as outFile:
+        json.dump(jReport, outFile, default=json_serialize)
+    if symlinkDir:
+        force_symlink(outFullName, os.path.join(symlinkDir, outFileName))
+
+
+def main(args=None):
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "-p",
+        "--path",
+        type=(lambda p: os.path.abspath(os.path.expandvars(os.path.expanduser(p)))),
+        default=datetime.now().strftime("%y%m%d-%H:%M:%S"),
+        help="Destination path",
+    )
+    parser.add_argument("-b", "--basedir", help="Directory where the website will be installed")
+    parser.add_argument(
+        "-f",
+        "--full",
+        action="store_true",
+        dest="DAScrosscheck",
+        help="Full check: compares each Dataset entry to DAS and check for consistency (slow!)",
+    )
+    parser.add_argument(
+        "-d", "--dry", action="store_true", dest="dryRun", help="Dry run: do no write to disk"
+    )
+    parser.add_argument(
+        "--database",
+        default="~/.samadhi",
+        help="JSON Config file with database connection settings and credentials",
+    )
+    args = parser.parse_args(args=args)
+    if not args.dryRun:
+        if os.path.exists(args.path):
+            raise OSError(errno.EEXIST, "Existing directory", args.path)
+        else:
+            os.makedirs(args.path)
+
+    # connect to the MySQL database using default credentials
+    with SAMADhiDB(credentials=args.database) as db, openRootFile(
+        os.path.join(args.path, "analysisReport.root"), noOp=args.dryRun, mode="UPDATE"
+    ):
+        # run each of the checks and collect data
+        # collect general statistics
+        general = collectGeneralStats()
+        if not args.dryRun:
+            saveReportJSON(
+                general,
+                "stats.json",
+                outDir=args.path,
+                symlinkDir=os.path.join(args.basedir, "data"),
+            )
+        # check datasets
+        datasets = {
+            "DatabaseInconsistencies": (
+                checkDatasets() if args.DAScrosscheck else copyInconsistencies(args.basedir)
+            ),
+            "Orphans": findOrphanDatasets(),
+            "IncompleteData": checkDatasetsIntegrity(),
+            "DatasetsStatistics": analyzeDatasetsStatistics(writeRoot=(not args.dryRun)),
+        }
+        if not args.dryRun:
+            saveReportJSON(
+                datasets,
+                "DatasetsAnalysisReport.json",
+                outDir=args.path,
+                symlinkDir=os.path.join(args.basedir, "data"),
+            )
+        # check samples
+        samples = {
+            "MissingDirSamples": checkSamplePath(),
+            "DatabaseInconsistencies": checkSampleConsistency(),
+            "SampleStatistics": analyzeSampleStatistics(writeRoot=(not args.dryRun)),
+        }
+        if not args.dryRun:
+            saveReportJSON(
+                samples,
+                "SamplesAnalysisReport.json",
+                outDir=args.path,
+                symlinkDir=os.path.join(args.basedir, "data"),
+            )
+        # now, check results
+        results = {
+            "MissingDirSamples": checkResultPath(),
+            "DatabaseInconsistencies": checkResultConsistency(),
+            "SelectedResults": selectResults(os.path.join(args.basedir, "data")),
+            "ResultsStatistics": analyzeResultsStatistics(writeRoot=(not args.dryRun)),
+        }
+        if not args.dryRun:
+            saveReportJSON(
+                results,
+                "ResultsAnalysisReport.json",
+                outDir=args.path,
+                symlinkDir=os.path.join(args.basedir, "data"),
+            )
+        # finally, some stats about Analysis objects
+        analyses = {"AnalysisStatistics": analyzeAnalysisStatistics(writeRoot=(not args.dryRun))}
+        if not args.dryRun:
+            saveReportJSON(
+                analyses,
+                "AnalysisAnalysisReport.json",
+                outDir=args.path,
+                symlinkDir=os.path.join(args.basedir, "data"),
+            )
+
+
+def collectGeneralStats():
+    # get number of datasets, samples, results, analyses
+    result = {
+        "nDatasets": Dataset.select(Dataset.id).count(),
+        "nSamples": Sample.select(Sample.id).count(),
+        "nResults": Result.select(Result.id).count(),
+        "nAnalysis": Analysis.select(Analysis.id).count(),
+    }
+    print("\nGeneral statistics:")
+    print("======================")
+    for kt, num in result.items():
+        print(f"{num:d} {kt[1:].lower()}")
+    return result
+
+
+def checkDatasets():
+    print("\nDatasets inconsistent with DAS:")
+    print("==================================")
+    result = []
+    for dataset in Dataset.select():
+        # query DAS to get the same dataset, by name
+        try:
+            metadata = query_das(dataset.name)
+        except:
+            result.append([dataset, "Inconsistent with DAS"])
+            print(
+                "{0.name} (imported on {0.creation_time!s}) -- Error getting dataset in DAS".format(
+                    dataset
+                )
+            )
+            continue
+
+        # perform some checks:
+        try:
+            # release name either matches or is unknown in DAS
+            test1 = metadata["release"] == "unknown" or dataset.cmssw_release == metadata["release"]
+            # datatype matches
+            test2 = dataset.datatype == metadata["datatype"]
+            # nevents matches
+            test3 = dataset.nevents == metadata["nevents"]
+            # size matches
+            test4 = dataset.dsize == metadata["file_size"]
+        except:
+            result.append([dataset, "Inconsistent with DAS"])
+            print("{0.name} (imported on {0.creation_time!s})".format(dataset))
+        else:
+            if not (test1 and test2 and test3 and test4):
+                result.append([dataset, "Inconsistent with DAS"])
+                print("{0.name} (imported on {0.creation_time!s})".format(dataset))
+    return result
+
+
+def findOrphanDatasets():
+    print("\nOrphan Datasets:")
+    print("===================")
+    result = []
+    for dataset in Dataset.select():
+        if dataset.samples.count() == 0:
+            result.append(dataset)
+            print("{0.name} (imported on {0.creation_time!s})".format(dataset))
+    if len(result) == 0:
+        print("None")
+    return result
+
+
+def checkDatasetsIntegrity():
+    print("\nDatasets integrity issues:")
+    print("===========================")
+    result = []
+    for dataset in Dataset.select():
+        if dataset.cmssw_release is None:
+            result.append([dataset, "missing CMSSW release"])
+            print(
+                "{0.name} (imported on {0.creation_time!s}): missing CMSSW release".format(dataset)
+            )
+        elif dataset.energy is None:
+            result.append([dataset, "missing Energy"])
+            print("{0.name} (imported on {0.creation_time!s}): missing Energy".format(dataset))
+        elif dataset.globaltag is None:
+            result.append([dataset, "missing Globaltag"])
+            print("{0.name} (imported on {0.creation_time!s}): missing Globaltag".format(dataset))
+    if len(result) == 0:
+        print("None")
+    return result
+
+
+def makePie(uName, data, title=None, save=False):
+    from cppyy import gbl
+
+    pie = gbl.TPie(f"{uName}Pie", title if title is not None else uName, len(data))
+    for idx, (val, freq) in enumerate(data.items()):
+        pie.SetEntryVal(idx, freq)
+        pie.SetEntryLabel(idx, val)
+    pie.SetTextAngle(0)
+    pie.SetRadius(0.3)
+    pie.SetTextColor(1)
+    pie.SetTextFont(62)
+    pie.SetTextSize(0.03)
+    canvas = gbl.TCanvas(uName, "", 2)
+    pie.Draw("r")
+    if save:
+        gbl.gPad.Write()
+
+
+def getFreqs(model, attName, addNoneTo=None):
+    from peewee import fn
+
+    freqs = {
+        str(getattr(val, attName)): val.count
+        for val in model.select(
+            getattr(model, attName), fn.Count(model.id).alias("count")
+        ).group_by(getattr(model, attName))
+    }
+    if addNoneTo is not None and None in freqs:
+        freqs[addNoneTo] = freqs.get(addNoneTo, 0) + freqs[None]
+        del freqs[None]
+    return freqs
+
+
+def th1ToChart(histo):
+    return [
+        [histo.GetBinCenter(ib), histo.GetBinContent(ib)] for ib in range(1, histo.GetNbinsX() + 1)
+    ]
+
+
+def toTH1I(name, data, N, xMin, xMax, title=None):
+    from cppyy import gbl
+
+    if title is None:
+        title = name
+    h = gbl.TH1I(name, title, N, xMin, xMax)
+    for x in data:
+        h.Fill(x)
+    return h
+
+
+def toGraph(x, y=None):
+    from cppyy import gbl
+
+    if y is None:
+        y = np.array(range(len(x) + 1))
+    else:
+        assert len(x) == len(y)
+    gr = gbl.TGraph(len(x))
+    for i, (x, y) in enumerate(zip(x, y)):
+        gr.SetPoint(i, x, y)
+    return gr
+
+
+def analyzeDatasetsStatistics(writeRoot=False):
+    stats = {}
+    for prop in ("cmssw_release", "globaltag", "datatype", "energy"):
+        nDataset_by_prop = getFreqs(Dataset, prop, addNoneTo="Unknown")
+        stats[prop] = [[k, v] for k, v in nDataset_by_prop.items()]
+        makePie(
+            f"dataset{prop.capitalize()}",
+            nDataset_by_prop,
+            title=f"Datasets {prop}",
+            save=writeRoot,
+        )
+
+    dset_time, dset_nsamples, dset_nevents, dset_dsize = zip(
+        *(
+            (
+                (
+                    int(dset.creation_time.strftime("%s")) * 1000
+                    if dset.creation_time is not None
+                    else 0
+                ),
+                dset.samples.count(),
+                (dset.nevents if dset.nevents is not None else 0),
+                (dset.dsize if dset.dsize is not None else 0),
+            )
+            for dset in Dataset.select().order_by(Dataset.creation_time)
+        )
+    )
+    stats["datasetsNsamples"] = th1ToChart(toTH1I("dataseets_nsamples", dset_nsamples, 10, 0, 10))
+    stats["datasetsNevents"] = th1ToChart(toTH1I("dataseets_nevents", dset_nevents, 100, 0, -100))
+    stats["datasetsDsize"] = th1ToChart(toTH1I("dataseets_dsize", dset_dsize, 100, 0, -100))
+    stats["datasetsTimeprof"] = [[tm, i + 1] for i, tm in enumerate(dset_time)]
+    if writeRoot:
+        toGraph(np.array(dset_time) / 1000.0).Write("datasetsTimeprof_graph")
+
+    print("\nDatasets Statistics extracted.")
+    print("=================================")
+
+    return stats
+
+
+def checkResultPath():
+    # get all samples
+    print("\nResults with missing path:")
+    print("===========================")
+    result = []
+    for res in Result.select():
+        # check that the path exists, and keep track of the sample if not the case.
+        if not os.path.exists(res.path):
+            print("Result #{0.id} (created on {0.creation_time} by {0.author}):".format(res))
+            print(f" missing path: {res.path}")
+            result.append(res)
+    if len(result) == 0:
+        print("None")
+    return result
+
+
+def checkSamplePath():
+    print("\nSamples with missing path:")
+    print("===========================")
+    result = []
+    for sample in Sample.select():
+        # check that the path exists, and keep track of the sample if not the case.
+        vpath = getSamplePath(sample)
+        for path in vpath:
+            if not os.path.exists(path):
+                print(
+                    "Sample #{0.id:d} (created on {0.creation_time!s} by {0.author}):".format(
+                        sample
+                    )
+                )
+                print(f" missing path: {path}")
+                print(vpath)
+                result.append(sample)
+                break
+    if len(result) == 0:
+        print("None")
+    return result
+
+
+def getSamplePath(sample):
+    # the path should be stored in sample.path
+    # if it is empty, look for files in that path
+    if sample.path == "":
+        vpath = set()
+        regex = r".*SFN=(.*)"
+        for f in SFile.select().where(SFile.sample.id == sample.id):
+            m = re.search(regex, f.pfn)
+            if m:
+                vpath.add(os.path.dirname(m.group(1)))
+        return list(vpath)
+    else:
+        return [sample.path]
+
+
+def selectResults(symlinkDir):
+    # look for result records pointing to a ROOT file
+    # eventually further filter
+    print("\nSelected results:")
+    print("===========================")
+    result = []
+    for res in Result.select():
+        path = res.path
+        if os.path.exists(path) and os.path.isdir(path):
+            files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
+            if len(files) == 1:
+                path = os.path.join(path, f)
+                res.path = path
+        if os.path.exists(path) and os.path.isfile(path) and path.lower().endswith(".root"):
+            symlink = os.path.join(symlinkDir, f"res_{res.id}.root")
+            relpath = "../data/res_{0}.root" % (res.id)
+            force_symlink(path, symlink)
+            result.append([res, relpath])
+            print("res #{0.id} (created on {0.creation_time} by {0.author}): ".format(res))
+            print(symlink)
+    if len(result) == 0:
+        print("None")
+    return result
+
+
+def checkResultConsistency():
+    print("\nResults with missing source:")
+    print("=============================")
+    result = []
+    for res in Result.select():
+        # check that the source sample exists in the database.
+        # normaly, this should be protected already at the level of sql rules
+        for sample in res.samples:
+            if sample is None:
+                print(
+                    "Result #{0.id:d} (created on {0.creation_time!s} by {0.author}):".format(res)
+                )
+                print("inconsistent source sample")
+                result.append([res, "inconsistent source sample"])
+                print(res)
+                break
+    if len(result) == 0:
+        print("None")
+    return result
+
+
+def checkSampleConsistency():
+    print("\nSamples with missing source:")
+    print("=============================")
+    result = []
+    for sample in Sample.select():
+        # check that either the source dataset or the source sample exists in the database.
+        # normaly, this should be protected already at the level of sql rules
+        sourceDataset = sample.source_dataset
+        sourceSample = sample.source_sample
+        if sample.source_dataset_id is not None and sample.source_dataset is None:
+            print("Sample #{0.id} (created on {0.creation_time} by {0.author}".format(sample))
+            print("inconsistent source dataset")
+            result.append([sample, "inconsistent source dataset"])
+            print(sample)
+        if sample.source_sample_id is not None and sample.source_sample is None:
+            print("Sample #{0.id} (created on {0.creation_time} by {0.author}".format(sample))
+            print("inconsistent source sample")
+            result.append([sample, "inconsistent source sample"])
+    if len(result) == 0:
+        print("None")
+    return result
+
+
+def analyzeAnalysisStatistics(writeRoot=False):
+    stats = {}
+    nAnalyses_by_contact = getFreqs(Analysis, "contact", addNoneTo="Unknown")
+    stats["analysisContacts"] = [[k, v] for k, v in nAnalyses_by_contact.items()]
+    makePie("analysisContact", nAnalyses_by_contact, title="Analysis contacts", save=writeRoot)
+    nResults_by_analysis = {
+        ana.description: len(ana.results) for ana in Analysis.select() if len(ana.results) > 0
+    }
+    stats["analysisResults"] = [[k, v] for k, v in nResults_by_analysis.items()]
+    makePie("analysisResults", nResults_by_analysis, title="Analysis results", save=writeRoot)
+
+    # stats to collect: group distribution (from CADI line) (pie)
+    cadiExpr = re.compile(r".*([A-Z]{3})-\d{2}-\d{3}")
+    nAnalyses_by_physicsgroup = defaultdict(int)
+    for analysis in Analysis.select(Analysis.cadiline):
+        m = cadiExpr.search(analysis.cadiline)
+        nAnalyses_by_physicsgroup[m.group(1) if m else "NONE"] += 1
+    stats["physicsGroup"] = [[k, v] for k, v in nAnalyses_by_physicsgroup.items()]
+    makePie("physicsGroup", nAnalyses_by_physicsgroup, title="Physics groups", save=writeRoot)
+
+    print("\nAnalysis Statistics extracted.")
+    print("================================")
+
+    return stats
+
+
+def analyzeResultsStatistics(writeRoot=False):
+    stats = {}
+    nResults_by_author = getFreqs(Result, "author", addNoneTo="Unknown")
+    stats["resultsAuthors"] = [[k, v] for k, v in nResults_by_author.items()]
+
+    res_time, res_nsamples = zip(
+        *(
+            (
+                (
+                    int(res.creation_time.strftime("%s")) * 1000
+                    if res.creation_time is not None
+                    else 0
+                ),
+                res.samples.count(),
+            )
+            for res in Result.select().order_by(Result.creation_time)
+        )
+    )
+    stats["resultNsamples"] = th1ToChart(toTH1I("result_nsamples", res_nsamples, 20, 0, 20))
+    if writeRoot:
+        toGraph(np.array(res_time) / 1000.0).Write("resultsTimeprof_graph")
+
+    print("\nResults Statistics extracted.")
+    print("================================")
+
+    return stats
+
+
+def analyzeSampleStatistics(writeRoot=False):
+    stats = {}
+    nSamples_by_author = getFreqs(Sample, "author", addNoneTo="Unknown")
+    stats["sampleAuthors"] = [[k, v] for k, v in nSamples_by_author.items()]
+    makePie("sampleAuthors", nSamples_by_author, title="Sample authors", save=writeRoot)
+    nSamples_by_type = getFreqs(Sample, "sampletype", addNoneTo="Unknown")
+    stats["sampleTypes"] = [[k, v] for k, v in nSamples_by_type.items()]
+    makePie("sampleTypes", nSamples_by_type, title="Sample types", save=writeRoot)
+
+    samples_time, sample_nevents, sample_nevents_processed = zip(
+        *(
+            (
+                (
+                    int(smp.creation_time.strftime("%s")) * 1000
+                    if smp.creation_time is not None
+                    else 0
+                ),
+                (smp.nevents if smp.nevents is not None else 0),
+                (smp.nevents_processed if smp.nevents is not None else 0),
+            )
+            for smp in Sample.select(
+                Sample.creation_time, Sample.nevents, Sample.nevents_processed
+            ).order_by(Sample.creation_time)
+        )
+    )
+    stats["sampleNevents"] = th1ToChart(toTH1I("sample_nevents", sample_nevents, 100, 0, -100))
+    stats["sampleNeventsProcessed"] = th1ToChart(
+        toTH1I("sample_nevents_processed", sample_nevents_processed, 100, 0, -100)
+    )
+    stats["sampleNeventsTimeprof"] = list(
+        list(row) for row in zip(samples_time, np.cumsum(np.array(sample_nevents)))
+    )
+    stats["sampleNeventsProcessedTimeprof"] = list(
+        list(row) for row in zip(samples_time, np.cumsum(np.array(sample_nevents_processed)))
+    )
+    stats["samplesTimeprof"] = samples_time
+    if writeRoot:
+        samples_time_s = np.array(samples_time) / 1000.0
+        toGraph(samples_time_s, np.cumsum(sample_nevents)).Write("sampleNeventsTimeprof_graph")
+        toGraph(samples_time_s, np.cumsum(sample_nevents_processed)).Write(
+            "sampleNeventsProcessedTimeprof_graph"
+        )
+        toGraph(samples_time_s).Write("samplesTimeprof_graph")
+
+    print("\nSamples Statistics extracted.")
+    print("================================")
+
+    return stats
+
+
+def force_symlink(file1, file2):
+    try:
+        os.symlink(file1, file2)
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            os.remove(file2)
+            os.symlink(file1, file2)
+
+
+def copyInconsistencies(basedir):
+    # try to read inconsistencies from previous job
+    # the file must be there and must contain the relevant data
+    try:
+        with open(os.path.join(basedir, "data", "DatasetsAnalysisReport.json")) as jfile:
+            content = json.load(jfile)
+            return content["DatabaseInconsistencies"]
+    except OSError:
+        # no file. Return an empty string.
+        # This will happen if basedir is not (properly) set or if it is new.
+        print(
+            "No previous dataset analysis report found in path. The Database inconsistencies will be empty."
+        )
+        return []
+    except KeyError:
+        # no proper key. Return an empty string.
+        # This should not happen, so print a warning.
+        print("No DatabaseInconsistencies key in the previous json file ?!")
+        return []
diff --git a/python/luminosity.py b/python/luminosity.py
new file mode 100644
index 0000000..9dc9778
--- /dev/null
+++ b/python/luminosity.py
@@ -0,0 +1,179 @@
+import argparse
+import subprocess
+from itertools import chain
+
+
+"""
+Helper functions for computing the luminosity for a set of samples
+"""
+
+
+def parse_luminosity_csv(result):
+    """Parse the CSV file produced by brilcalc, and return the total recorded luminosity in /pb"""
+    import csv
+    import StringIO
+
+    f = StringIO.StringIO(result)
+
+    lumi = 0
+    reader = csv.reader(f, delimiter=",")
+    for row in reader:
+        if not row:
+            continue
+
+        if row[0][0] == "#":
+            continue
+        lumi += float(row[-1])
+
+    return lumi / 1000.0 / 1000.0
+
+
+def compute_luminosity(sample, local=False, normtag=None, username=None):
+    print("Computing luminosity for %r") % str(sample.name)
+
+    lumi = 0
+    if not local:
+        print(
+            "Running brilcalc on lxplus... You'll probably need to enter your lxplus password in a moment"
+        )
+        print("")
+
+        cmds = [
+            "brilcalc",
+            "lumi",
+            "--normtag",
+            normtag,
+            "--output-style",
+            "csv",
+            "-i",
+            '"%s"' % str(sample.processed_lumi.replace('"', "")),
+        ]
+        cmd = (
+            'export PATH="$HOME/.local/bin:/afs/cern.ch/cms/lumi/brilconda-1.1.7/bin:$PATH"; '
+            + " ".join(cmds)
+        )
+        ssh_cmds = ["ssh", "%s@lxplus.cern.ch" % username, cmd]
+        brilcalc_result = subprocess.check_output(ssh_cmds)
+
+        lumi = parse_luminosity_csv(brilcalc_result)
+    else:
+        print("Running brilcalc locally...")
+        # FIXME one day
+        print("Error: running brilcalc locally is not supported for the moment.")
+        return 0
+
+    print("Sample luminosity: %.3f /pb" % lumi)
+    print("")
+
+    # Update luminosity in the database
+    sample.luminosity = lumi
+
+    return lumi
+
+
+def install_brilcalc(local=False, username=None):
+
+    if local:
+        print("Local installation of brilcalc is not supported.")
+        return
+
+    print(
+        "Installing brilcalc on lxplus... You'll probably need to enter your lxplus password in a moment"
+    )
+
+    cmds = ["pip", "install", '--install-option="--prefix=$HOME/.local"', "--upgrade", "brilws"]
+    cmd = 'export PATH="$HOME/.local/bin:/afs/cern.ch/cms/lumi/brilconda-1.1.7/bin:$PATH"; %s' % (
+        " ".join(cmds)
+    )
+    ssh_cmds = ["ssh", "%s@lxplus.cern.ch" % username, cmd]
+    subprocess.call(ssh_cmds)
+
+
+def update_brilcalc(local=False, username=None):
+
+    if local:
+        print("Local installation of brilcalc is not supported.")
+        return
+
+    print(
+        "Updating brilcalc on lxplus... You'll probably need to enter your lxplus password in a moment"
+    )
+
+    cmds = [
+        "pip",
+        "install",
+        '--install-option="--prefix=$HOME/.local"',
+        "--upgrade",
+        "--force-reinstall",
+        "brilws",
+    ]
+    cmd = 'export PATH="$HOME/.local/bin:/afs/cern.ch/cms/lumi/brilconda-1.1.7/bin:$PATH"; %s' % (
+        " ".join(cmds)
+    )
+    ssh_cmds = ["ssh", "%s@lxplus.cern.ch" % username, cmd]
+    subprocess.call(ssh_cmds)
+
+
+def compute_sample_luminosity(args=None):
+    parser = argparse.ArgumentParser(description="Compute luminosity of a set of samples")
+    parser.add_argument(
+        "-i", "--id", type=int, nargs="+", dest="ids", help="IDs of the samples", metavar="ID"
+    )
+    parser.add_argument(
+        "--name", type=str, nargs="+", dest="names", help="Names of the samples", metavar="NAME"
+    )
+    parser.add_argument(
+        "--local", action="store_true", help="Run brilcalc locally instead of on lxplus"
+    )
+    parser.add_argument(
+        "--bootstrap", action="store_true", help="Install brilcalc. Needs to be done only once"
+    )
+    parser.add_argument("--update", action="store_true", help="Update brilcalc")
+    parser.add_argument(
+        "-n", "--username", help="Remote lxplus username (local username by default)"
+    )
+    parser.add_argument("-t", "--normtag", help="Normtag on /afs")
+    parser.add_argument(
+        "--database",
+        default="~/.samadhi",
+        help="JSON Config file with database connection settings and credentials",
+    )
+    options = parser.parse_args(args=args)
+
+    if (
+        not options.bootstrap
+        and not options.update
+        and options.ids is None
+        and options.names is None
+    ):
+        parser.error("You must specify at least one sample id or sample name.")
+    if not options.bootstrap and not options.update and not options.normtag:
+        parser.error("You must specify a normtag file")
+    if options.ids is None:
+        options.ids = []
+    if options.names is None:
+        options.names = []
+    if options.username is None:
+        import pwd, os
+
+        options.username = pwd.getpwuid(os.getuid()).pw_name
+
+    from .SAMADhi import Sample, SAMADhiDB
+    from .utils import replaceWildcards
+
+    if options.bootstrap:
+        install_brilcalc(local=options.local, username=options.username)
+    elif options.update:
+        update_brilcalc(local=options.local, username=options.username)
+    else:
+        with SAMADhiDB(credentials=args.database) as db:
+            for sample in chain(
+                (Sample.get_by_id(id_) for id_ in options.ids),
+                chain.from_iterable(
+                    Sample.select().where(Sample.name % replaceWildcards(name, db=db))
+                    for name in options.names
+                ),
+            ):
+                compute_luminosity(
+                    sample, normtag=options.normtag, local=options.local, username=options.username
+                )
diff --git a/python/scripts.py b/python/scripts.py
new file mode 100644
index 0000000..eb871c2
--- /dev/null
+++ b/python/scripts.py
@@ -0,0 +1,547 @@
+import argparse
+import glob
+import os.path
+from datetime import datetime
+
+
+"""
+Simple command-line SAMADhi utilities: search, interactive shell etc.
+"""
+
+
+def interactive(args=None):
+    """iSAMADhi: Explore (and manipulate) the SAMADhi database in an IPython shell"""
+    parser = argparse.ArgumentParser(
+        description="Explore (and manipulate) the SAMADhi database in an IPython shell"
+    )
+    parser.add_argument(
+        "--database",
+        default="~/.samadhi",
+        help="JSON Config file with database connection settings and credentials",
+    )
+    args = parser.parse_args(args=args)
+
+    from .SAMADhi import _models, SAMADhiDB
+
+    import IPython
+
+    for md in _models:
+        locals()[md.__name__] = md
+    with SAMADhiDB(credentials=args.database) as db:
+        IPython.embed(
+            banner1=(
+                "Interactively exploring SAMADhi database {database}\n"
+                "Available models: {models}\n"
+                "WARNING: by default your changes *will* be committed to the database"
+            ).format(
+                database="{}({}){}".format(
+                    db.__class__.__name__,
+                    db.database,
+                    (
+                        " at {}".format(db.connect_params["host"])
+                        if "host" in db.connect_params
+                        else ""
+                    ),
+                ),
+                models=", ".join(md.__name__ for md in _models),
+            )
+        )
+
+
+def search(args=None):
+    """search_SAMADhi: search for datasets, samples, results, or analyses"""
+    parser = argparse.ArgumentParser(
+        description="Search for datasets, samples, results or analyses in SAMADhi"
+    )
+    parser.add_argument(
+        "type",
+        help="Object type to search for",
+        choices=["dataset", "sample", "result", "analysis"],
+    )
+    parser.add_argument("-l", "--long", action="store_true", help="detailed output")
+    pquery = parser.add_mutually_exclusive_group(required=True)
+    pquery.add_argument("-n", "--name", help="filter on name")
+    pquery.add_argument(
+        "-p",
+        "--path",
+        help="filter on path",
+        type=(lambda pth: os.path.abspath(os.path.expandvars(os.path.expanduser(pth)))),
+    )
+    pquery.add_argument("-i", "--id", type=int, help="filter on id")
+    parser.add_argument(
+        "--database",
+        default="~/.samadhi",
+        help="JSON Config file with database connection settings and credentials",
+    )
+    args = parser.parse_args(args=args)
+    # more validation
+    if args.type in ("dataset", "analysis") and args.path:
+        parser.error(f"Cannot search {args.type} by path")
+    elif args.type == "result" and args.name:
+        parser.error("Cannot search results by name")
+
+    from . import SAMADhi
+    from .SAMADhi import SAMADhiDB
+    from .utils import replaceWildcards
+
+    objCls = getattr(SAMADhi, args.type.capitalize())
+
+    with SAMADhiDB(credentials=args.database) as db:
+        qry = objCls.select()
+        if args.id:
+            qry = qry.where(objCls.id == args.id)
+        elif args.name:
+            qry = qry.where(objCls.name % replaceWildcards(args.name, db=db))
+        elif args.path:
+            qry = qry.where(objCls.path % replaceWildcards(args.path, db=db))
+        results = qry.order_by(objCls.id)
+
+        if args.long:
+            for entry in results:
+                print(str(entry))
+                print(86 * "-")
+        else:
+            fmtStr = "{{0.id}}\t{{0.{0}}}".format(
+                "name" if args.type not in ("result", "analysis") else "description"
+            )
+            for res in results:
+                print(fmtStr.format(res))
+
+
+def update_datasets_cross_section(args=None):
+    parser = argparse.ArgumentParser(description="Update cross-sections of datasets.")
+    parser.add_argument(
+        "regex",
+        type=str,
+        help=(
+            "Regular expression used to filter *samples*."
+            "Only '*' and '?' wildcards are supported. Take note that filtering is applied to samples, and not to datasets."
+        ),
+    )
+    parser.add_argument(
+        "-f",
+        "--force",
+        type=float,
+        help="For the cross-section of all datasets matching the regular expression to be this value",
+        metavar="XSEC",
+    )
+    parser.add_argument("-w", "--write", action="store_true", help="Write changes to the database")
+    parser.add_argument(
+        "--database",
+        default="~/.samadhi",
+        help="JSON Config file with database connection settings and credentials",
+    )
+    args = parser.parse_args(args)
+
+    from .SAMADhi import Dataset, Sample, SAMADhiDB
+    from .utils import replaceWildcards, maybe_dryrun
+
+    with SAMADhiDB(credentials=args.database) as db:
+        samples = Sample.select().where(Sample.name % replaceWildcards(args.regex, db=db))
+        if samples.count() == 0:
+            print("No sample found.")
+        else:
+            with maybe_dryrun(
+                db,
+                dryRun=(not args.write),
+                dryMessage="Currently running in dry-run mode. If you are happy with the change, pass the '-w' flag to this script to store the changes into the database.",
+            ):
+                for sample in samples:
+                    if sample.source_dataset.datatype == "data":
+                        continue
+                    # Consider a cross-section of one as a non-updated value
+                    if (
+                        sample.source_dataset.xsection == 1
+                        or sample.source_dataset.xsection is None
+                    ):
+                        # Try to find a similar sample in the database, with the same center of mass energy
+                        print(f"Updating cross-section of {sample.source_dataset.process}")
+                        if args.force:
+                            print(f"  Forcing the cross-section to {args.force}")
+                            sample.source_dataset.xsection = args.force
+                        else:
+                            possible_matches = Dataset.select().where(
+                                (Dataset.process % sample.source_dataset.process)
+                                & (Dataset.energy == sample.source_dataset.energy)
+                                & (Dataset.id != sample.source_dataset.id)
+                            )
+                            if possible_matches.count() == 0:
+                                print("No match for this dataset found")
+                            elif (possible_matches.count() > 1) and not all(
+                                p.xsec == possible_matches[0].xsec for p in possible_matches
+                            ):
+                                print(
+                                    "  Warning: more than one possible match found for this dataset, and they do not have the same cross-section. I do not know what to do..."
+                                )
+                            else:
+                                xsec = possible_matches[0].xsec
+                                print(f"  Updating with cross-section = {xsec}")
+                                sample.source_dataset.xsection = xsec
+
+
+def get_file_data(f_):
+    from cppyy import gbl
+
+    f = gbl.TFile.Open(f_)
+    if not f:
+        return (None, None)
+
+    weight_sum = f.Get("event_weight_sum")
+    if weight_sum:
+        weight_sum = weight_sum.GetVal()
+    else:
+        weight_sum = None
+
+    entries = None
+    tree = f.Get("t")
+    if tree:
+        entries = tree.GetEntriesFast()
+
+    return (weight_sum, entries)
+
+
+def add_sample(args=None):
+    from .utils import (
+        parsePath,
+        userFromPath,
+        timeFromPath,
+        confirm_transaction,
+        prompt_dataset,
+        prompt_sample,
+    )
+
+    parser = argparse.ArgumentParser(description="Add a sample to the database")
+    parser.add_argument("--name", help="specify sample name")
+    parser.add_argument(
+        "--processed",
+        type=int,
+        dest="nevents_processed",
+        help="number of processed events (from the input)",
+    )
+    parser.add_argument("--nevents", type=int, help="number of events (in the sample)")
+    parser.add_argument("--norm", type=float, default=1.0, help="additional normalization factor")
+    parser.add_argument(
+        "--weight-sum", type=float, default=1.0, help="additional normalization factor"
+    )
+    parser.add_argument("--lumi", type=float, help="sample (effective) luminosity")
+    parser.add_argument(
+        "--code_version",
+        default="",
+        help="version of the code used to process that sample (e.g. git tag or commit)",
+    )
+    parser.add_argument("--comment", default="", help="comment about the dataset")
+    parser.add_argument("--source_dataset", type=int, help="reference to the source dataset")
+    parser.add_argument("--source_sample", type=int, help="reference to the source sample, if any")
+    parser.add_argument(
+        "-a", "--author", help="author of the result. If not specified, is taken from the path."
+    )
+    parser.add_argument("--files", help="list of files (full path, comma-separated values)")
+    parser.add_argument(
+        "-t",
+        "--time",
+        help='result timestamp. If set to "path", timestamp will be taken from the path. Otherwise, it must be formated like YYYY-MM-DD HH:MM:SS. Default is current time.',
+    )
+    parser.add_argument(
+        "--database",
+        default="~/.samadhi",
+        help="JSON Config file with database connection settings and credentials",
+    )
+    parser.add_argument(
+        "-y",
+        "--continue",
+        dest="assumeDefault",
+        action="store_true",
+        help="Assume defaults instead of prompt",
+    )
+    parser.add_argument(
+        "type", choices=["PAT", "SKIM", "RDS", "NTUPLES", "HISTOS"], help="Sample type"
+    )
+    parser.add_argument("path", help="location of the sample on disk", type=parsePath)
+    args = parser.parse_args(args=args)
+
+    if args.author is None:
+        args.author = userFromPath(args.path)
+    if args.time == "path":
+        args.time = timeFromPath(args.path)
+    elif args.time is not None:
+        args.time = datetime.strptime(args.time, "%Y-%m-%d %H:%M:%S")
+    else:
+        args.time = datetime.now()
+    if args.name is None:
+        args.name = next(tk for tk in reversed(args.path.split("/")) if len(tk))
+
+    from .SAMADhi import Dataset, Sample, File, SAMADhiDB
+
+    with SAMADhiDB(credentials=args.database) as db:
+        existing = Sample.get_or_none(Sample.name == args.name)
+        with confirm_transaction(
+            db,
+            "Insert into the database?" if existing is None else f"Replace existing {existing!s}?",
+            assumeDefault=args.assumeDefault,
+        ):
+            sample, created = Sample.get_or_create(
+                name=args.name,
+                path=args.path,
+                defaults={"sampletype": args.type, "nevents_processed": args.nevents_processed},
+            )
+            sample.sampletype = args.type
+            sample.nevents_processed = args.nevents_processed
+            sample.nevents = args.nevents
+            sample.normalization = args.norm
+            sample.event_weight_sum = args.weight_sum
+            sample.luminosity = args.lumi
+            sample.code_version = args.code_version
+            sample.user_comment = args.comment
+            sample.source_dataset = (
+                Dataset.get_or_none(Dataset.id == args.source_dataset)
+                if args.source_dataset is not None
+                else None
+            )
+            sample.source_sample = (
+                Sample.get_or_none(Sample.id == args.source_sample)
+                if args.source_sample is not None
+                else None
+            )
+            sample.author = args.author
+            sample.creation_time = args.time
+
+            if sample.source_dataset is None and not args.assumeDefault:
+                prompt_dataset(sample)  ## TODO: check existence
+            if sample.source_sample is None and not args.assumeDefault:
+                prompt_sample(sample)  ## TODO: check existence
+
+            if sample.nevents_processed is None:
+                if sample.source_sample is not None:
+                    sample.nevents_processed = sample.source_sample.nevents_processed
+                elif sample.source_dataset is not None:
+                    sample.nevents_processed = sample.source_dataset.nevents
+                else:
+                    print("Warning: Number of processed events not given, and no way to guess it.")
+
+            if args.files is not None:
+                files = list(args.files.split(","))
+            else:
+                files = glob.glob(os.path.join(sample.path, "*.root"))
+            if not files:
+                print(f"Warning: no root files found in {sample.path!r}")
+            for fName in files:
+                weight_sum, entries = get_file_data(fName)
+                File.create(
+                    lfn=fName,
+                    pfn=fName,
+                    event_weight_sum=weight_sum,
+                    nevents=(entries if entries is not None else 0),
+                    sample=sample,
+                )  ## FIXME extras_event_weight_sum
+
+            if sample.luminosity is None:
+                sample.luminosity = sample.getLuminosity()
+            sample.save()
+
+            print(sample)
+
+
+def add_result(args=None):
+    from .utils import parsePath, userFromPath, timeFromPath, confirm_transaction, prompt_samples
+
+    parser = argparse.ArgumentParser(description="Add a result to the database")
+    parser.add_argument("path", type=parsePath)
+    parser.add_argument(
+        "-s",
+        "--sample",
+        dest="inputSamples",
+        help="comma separated list of samples used as input to produce that result",
+    )
+    parser.add_argument("-d", "--description", help="description of the result")
+    parser.add_argument("-e", "--elog", help="elog with more details")
+    parser.add_argument("-A", "--analysis", type=int, help="analysis whose result belong to")
+    parser.add_argument(
+        "-a", "--author", help="author of the result. If not specified, is taken from the path"
+    )
+    parser.add_argument(
+        "-t",
+        "--time",
+        help='result timestamp. If set to "path", timestamp will be taken from the path. Otherwise, it must be formated like YYYY-MM-DD HH:MM:SS',
+    )
+    parser.add_argument(
+        "--database",
+        default="~/.samadhi",
+        help="JSON Config file with database connection settings and credentials",
+    )
+    parser.add_argument(
+        "-y",
+        "--continue",
+        dest="assumeDefault",
+        action="store_true",
+        help="Assume defaults instead of prompt",
+    )
+    args = parser.parse_args(args=args)
+
+    if args.author is None:
+        args.author = userFromPath(args.path)
+    if args.time == "path":
+        time = timeFromPath(args.path)
+    elif args.time is not None:
+        time = datetime.strptime(args.time, "%Y-%m-%d %H:%M:%S")
+    else:
+        time = datetime.now()
+
+    from .SAMADhi import Sample, Result, SampleResult, SAMADhiDB
+
+    with SAMADhiDB(credentials=args.database) as db:
+        with confirm_transaction(db, "Insert into the database?", assumeDefault=args.assumeDefault):
+            result = Result.create(
+                path=args.path,
+                description=args.description,
+                author=args.author,
+                creation_time=time,
+                elog=args.elog,
+                analysis=args.analysis,
+            )
+            if args.inputSamples is None:
+                inputSampleIDs = prompt_samples()
+            else:
+                inputSampleIDs = [int(x) for x in args.inputSamples.split(",")]
+            for smpId in inputSampleIDs:
+                smp = Sample.get_or_none(Sample.id == smpId)
+                if not smp:
+                    print(f"Could not find sample #{smpId:d}")
+                else:
+                    SampleResult.create(sample=smp, result=result)
+            print(result)
+
+
+def splitWith(sequence, predicate):
+    trueList, falseList = [], []
+    for element in sequence:
+        if predicate(element):
+            trueList.append(element)
+        else:
+            falseList.append(element)
+
+
+def checkAndClean(args=None):
+    from .utils import parsePath, redirectOut, arg_loadJSON
+
+    parser = argparse.ArgumentParser(description="Script to check samples for deletion")
+    parser.add_argument(
+        "-p",
+        "--path",
+        default="./",
+        type=parsePath,
+        help="Path to the json files with db analysis results",
+    )
+    parser.add_argument("-o", "--output", default="-", help="Name of the output file")
+    parser.add_argument(
+        "-M",
+        "--cleanupMissing",
+        action="store_true",
+        help="Clean samples with missing path from the database.",
+    )
+    parser.add_argument(
+        "-U",
+        "--cleanupUnreachable",
+        action="store_true",
+        help="Clean samples with unreachable path from the database",
+    )
+    parser.add_argument(
+        "-D",
+        "--cleanupDatasets",
+        action="store_true",
+        help="Clean orphan datasets from the database",
+    )
+    parser.add_argument(
+        "-w",
+        "--whitelist",
+        type=arg_loadJSON,
+        help="JSON file with sample whitelists per analysis.",
+    )
+    parser.add_argument(
+        "-d",
+        "--dry-run",
+        action="store_true",
+        help="Dry run: do not write to file and/or touch the database.",
+    )
+    parser.add_argument(
+        "--database",
+        default="~/.samadhi",
+        help="JSON Config file with database connection settings and credentials",
+    )
+    args = parser.parse_args(args=args)
+
+    from .SAMADhi import SAMADhiDB
+
+    with redirectOut(args.output if not args.dry_run else "-"):
+        # open the sample analysis report and classify bad samples
+        samples_missing = arg_loadJSON(os.path.join(args.path, "SamplesAnalysisReport.json")).get(
+            "MissingDirSamples", []
+        )
+        smp_white, smp_nonWhite = splitWith(
+            samples_missing,
+            lambda smp: any(label in smp["name"] for v in args.whitelist.values() for label in v),
+        )
+        smp_empty, smp_investigate = splitWith(smp_white, lambda smp: smp["path"] == "")
+        smp_empty_delete, smp_delete = splitWith(smp_nonwhite, lambda smp: smp["path"] == "")
+        # now clean orphan datasets
+        ds_orphan = arg_loadJSON(os.path.join(args.path, "DatasetsAnalysisReport.json")).get(
+            "Orphans", []
+        )
+        ## print a summary now
+        print(
+            "\n\nWhitelisted sample with missing path. Investigate:\n{}".format(
+                "\n".join(smp["name"] for smp in smp_empty)
+            )
+        )
+        print(
+            "\n\nWhitelisted sample with unreachable path. Investigate:\n{}".format(
+                "\n".join(smp["name"] for smp in smp_investigate)
+            )
+        )
+        print(
+            "\n\nSamples to be deleted because of missing path:\n{}".format(
+                "\n".join(smp["name"] for smp in smp_empty_delete)
+            )
+        )
+        print(
+            "\n\nSamples to be deleted because of unreachable path:\n{}".format(
+                "\n".join(smp["name"] for smp in smp_delete)
+            )
+        )
+        ## actually perform the cleanup
+        with SAMADhiDB(credentials=args.database) as db:
+            with maybe_dryrun(db, dryRun=args.dry_run):
+                if opts.cleanupMissing:
+                    for smp in smp_empty_delete:
+                        sample = Sample.get_or_none(
+                            (Sample.id == smp["id"]) & (Sample.name == smp["name"])
+                        )
+                        if sample is None:
+                            print(
+                                "Could not find sample #{id} {name}".format(smp["id"], smp["name"])
+                            )
+                        else:
+                            smp.removeFiles()
+                            smp.delete_instance()
+                if opts.cleanupUnreachable:
+                    for smp in smp_delete:
+                        sample = Sample.get_or_none(
+                            (Sample.id == smp["id"]) & (Sample.name == smp["name"])
+                        )
+                        if sample is None:
+                            print(
+                                "Could not find sample #{id} {name}".format(smp["id"], smp["name"])
+                            )
+                        else:
+                            sample.removeFiles()
+                            sample.delete_instance()
+                if args.cleanupDatasets:
+                    for ids in ds_orphan:
+                        dataset = Dataset.get_or_none(
+                            (Dataset.id == ids["id"]) & (Dataset.name == ids["name"])
+                        )
+                        if dataset is None:
+                            print(
+                                "Could not find dataset #{id} {name}".format(ids["id"], ids["name"])
+                            )
+                        else:
+                            dataset.delete_instance()
diff --git a/python/userPrompt.py b/python/userPrompt.py
deleted file mode 100644
index 32e2199..0000000
--- a/python/userPrompt.py
+++ /dev/null
@@ -1,107 +0,0 @@
-from cp3_llbb.SAMADhi.SAMADhi import Sample, Dataset
-
-def confirm(prompt=None, resp=False):
-    """prompts for yes or no response from the user. Returns True for yes and
-    False for no. 'resp' should be set to the default value assumed by the caller when
-    user simply types ENTER.
-    >>> confirm(prompt='Create Directory?', resp=True)
-    Create Directory? [y]|n: 
-    True
-    >>> confirm(prompt='Create Directory?', resp=False)
-    Create Directory? [n]|y: 
-    False
-    >>> confirm(prompt='Create Directory?', resp=False)
-    Create Directory? [n]|y: y
-    True
-    """
-    if prompt is None:
-        prompt = 'Confirm'
-    if resp:
-        prompt = '%s [%s]|%s: ' % (prompt, 'y', 'n')
-    else:
-        prompt = '%s [%s]|%s: ' % (prompt, 'n', 'y')
-    while True:
-        ans = raw_input(prompt)
-        if not ans:
-            return resp
-        if ans not in ['y', 'Y', 'n', 'N']:
-            print 'please enter y or n.'
-            continue
-        if ans == 'y' or ans == 'Y':
-            return True
-        if ans == 'n' or ans == 'N':
-            return False
-
-def parse_samples(inputString):
-  """parse a comma-separated list of samples"""
-  return [ int(x) for x in inputString.split(',') ]
-
-def prompt_samples(store):
-  """prompts for the source sample among the existing ones"""
-  print "No source sample defined."
-  print "Please select the samples associated with this result."
-  # full list of samples
-  print "Sample\t\tName"
-  check = store.find(Sample)
-  all_samples = check.values(Sample.sample_id,Sample.name)
-  for dset in all_samples:
-    print "%i\t\t%s"%(dset[0], dset[1])
-  # prompt
-  while True:
-    try:
-      return parse_samples(raw_input("Comma-separated list of sample id [None]?"))
-    except:
-      continue
-
-def prompt_sample(sample,store):
-  """prompts for the source sample among the existing ones"""
-  print "Please select the sample associated with this sample."
-  # full list of samples
-  print "Sample\t\tName"
-  check = store.find(Sample)
-  all_samples = check.values(Sample.sample_id,Sample.name)
-  for dset in all_samples:
-    print "%i\t\t%s"%(dset[0], dset[1])
-  # prompt
-  while True:
-    try:
-      ans = int(raw_input("Sample id [None]?"))
-    except:
-      sample.source_sample_id = None
-      return
-    check = store.find(Sample,Sample.sample_id==ans)
-    if check.is_empty(): continue
-    else:
-      sample.source_sample_id = ans
-      return
-
-def prompt_dataset(sample,store):
-  """prompts for the source dataset among the existing ones"""
-  print "Please select the dataset associated with this sample."
-  # full list of datasets
-  print "Dataset\t\tName"
-  check = store.find(Dataset)
-  all_datasets = check.values(Dataset.dataset_id,Dataset.name)
-  for dset in all_datasets:
-    print "%i\t\t%s"%(dset[0], dset[1])
-  # datasets whose name contain the sample name
-  check = store.find(Dataset,Dataset.name.contains_string(sample.name))
-  if not check.is_empty():
-    print "Suggestions:"
-    print "Dataset\t\tName"
-    suggested_datasets = check.values(Dataset.dataset_id,Dataset.name)
-    for dset in suggested_datasets:
-      print "%i\t\t%s"%(dset[0], dset[1])
-  # prompt
-  while True:
-    try:
-      ans = int(raw_input("Dataset id [None]?"))
-    except:
-      sample.source_dataset_id = None
-      return
-    check = store.find(Dataset,Dataset.dataset_id==ans)
-    if check.is_empty(): continue
-    else:
-      sample.source_dataset_id = ans
-      return
-
diff --git a/python/utils.py b/python/utils.py
new file mode 100644
index 0000000..9cc6223
--- /dev/null
+++ b/python/utils.py
@@ -0,0 +1,202 @@
+from contextlib import contextmanager
+
+
+def parsePath(pth):
+    """Expand (user and vars), and check that a path is a valid file or directory"""
+    import os.path
+    import argparse
+
+    pth = os.path.abspath(os.path.expandvars(os.path.expanduser(pth)))
+    if not os.path.exists(pth) or not (os.path.isdir(pth) or os.path.isfile(pth)):
+        raise argparse.ArgumentError(f"{pth} is not an existing file or directory")
+    return pth
+
+
+def userFromPath(pth):
+    """Get the username of the path owner"""
+    import os
+    from pwd import getpwuid
+
+    return getpwuid(os.stat(pth).st_uid).pw_name
+
+
+def timeFromPath(pth):
+    import os.path
+    from datetime import datetime
+
+    return datetime.fromtimestamp(os.path.getctime(pth))
+
+
+def checkWriteable(pth):
+    """Expand path, and check that it is writeable and does not exist yet"""
+    import os, os.path
+
+    pth = os.path.abspath(os.path.expandvars(os.path.expanduser(pth)))
+    if not os.access(pth, os.W_OK):
+        raise argparse.ArgumentError(f"Cannot write to {pth}")
+    if os.path.isfile(pth):
+        raise argparse.ArgumentError(f"File already exists: {pth}")
+    return pth
+
+
+@contextmanager
+def redirectOut(outArg):
+    """Redirect sys.stdout to file (if the argument is a writeable file that does not exist yet),
+    no-op if the argument is '-'"""
+    if outArg == "-":
+        yield
+    else:
+        outPth = checkWriteable(outArg)
+        import sys
+
+        with open(outPth, "W") as outF:
+            bk_stdout = sys.stdout
+            sys.stdout = outF
+            yield
+            sys.stdout = bk_stdout
+
+
+def arg_loadJSON(pth):
+    """Try to parse the JSON file (type for argparse argumet)"""
+    if pth:
+        import json
+
+        with open(parsePath(pth)) as jsF:
+            return json.load(jsF)
+    else:
+        return dict()
+
+
+def replaceWildcards(arg, db=None):
+    if db:
+        from peewee import SqliteDatabase
+
+        if isinstance(db, SqliteDatabase):
+            return arg  ## sqlite uses the usual * etc.
+    return arg.replace("*", "%").replace("?", "_")
+
+
+def confirm(prompt=None, resp=False, assumeDefault=False):
+    """prompts for yes or no response from the user. Returns True for yes and
+    False for no. 'resp' should be set to the default value assumed by the caller when
+    user simply types ENTER.
+    >>> confirm(prompt='Create Directory?', resp=True)
+    Create Directory? [y]|n:
+    True
+    >>> confirm(prompt='Create Directory?', resp=False)
+    Create Directory? [n]|y:
+    False
+    >>> confirm(prompt='Create Directory?', resp=False)
+    Create Directory? [n]|y: y
+    True
+    """
+    if prompt is None:
+        prompt = "Confirm"
+    if resp:
+        prompt = "{} [{}]|{}: ".format(prompt, "y", "n")
+    else:
+        prompt = "{} [{}]|{}: ".format(prompt, "n", "y")
+    if assumeDefault:
+        print("".join((prompt, ("y" if resp else "n"))))
+        return resp
+    while True:
+        ans = input(prompt)
+        if not ans:
+            return resp
+        if ans not in ["y", "Y", "n", "N"]:
+            print("please enter y or n.")
+            continue
+        if ans == "y" or ans == "Y":
+            return True
+        if ans == "n" or ans == "N":
+            return False
+
+
+def prompt_samples():
+    """prompts for the source sample among the existing ones"""
+    from .SAMADhi import Sample
+
+    print("No source sample defined.")
+    print("Please select the samples associated with this result.")
+    # full list of samples
+    print("Sample\t\tName")
+    for smp in Sample.select():
+        print("%i\t\t%s" % (smp.id, smp.name))
+    # prompt
+    while True:
+        try:
+            return [int(x) for x in input("Comma-separated list of sample id [None]?").split(",")]
+        except:
+            continue
+
+
+def prompt_sample(sample):
+    """prompts for the source sample among the existing ones"""
+    from .SAMADhi import Sample
+
+    print("Please select the sample associated with this sample.")
+    # full list of samples
+    print("Sample\t\tName")
+    for smp in Sample.select():
+        print("%i\t\t%s" % (smp.id, smp.name))
+    # prompt
+    while True:
+        try:
+            ans = int(input("Sample id [None]?"))
+        except:
+            sample.source_sample = None
+            return
+        smp_db = Sample.get_or_none(Sample.id == ans)
+        if smp_db is not None:
+            sample.source_sample = smp_db
+        else:
+            continue
+
+
+def prompt_dataset(sample):
+    """prompts for the source dataset among the existing ones"""
+    from .SAMADhi import Dataset
+
+    print("Please select the dataset associated with this sample.")
+    # full list of datasets
+    print("Dataset\t\tName")
+    for ds in Dataset.select():
+        print("%i\t\t%s" % (ds.id, ds.name))
+    # datasets whose name contain the sample name
+    suggestions = Dataset.select().where(Dataset.name.contains(sample.name))
+    if suggestions.count() > 0:
+        print("Suggestions:")
+        print("Dataset\t\tName")
+        suggested_datasets = check.values(Dataset.id, Dataset.name)
+        for ds in suggested_datasets:
+            print("%i\t\t%s" % (ds.id, ds.name))
+    # prompt
+    while True:
+        try:
+            ans = int(input("Dataset id [None]?"))
+        except:
+            sample.source_dataset = None
+            return
+        dset_db = Dataset.get_or_none(Dataset.id == ans)
+        if dset_db is not None:
+            sample.source_dataset = smp_db
+        else:
+            continue
+
+
+@contextmanager
+def confirm_transaction(db, prompt, assumeDefault=False):
+    with db.atomic() as txn:
+        yield
+        answer = confirm(prompt=prompt, resp=True, assumeDefault=assumeDefault)
+        if not answer:
+            txn.rollback()
+
+
+@contextmanager
+def maybe_dryrun(db, dryMessage=None, dryRun=False):
+    with db.atomic() as txn:
+        yield
+        if dryRun:
+            print(dryMessage)
+            txn.rollback()
diff --git a/scripts/SAMADhi_dbAnalysis.py b/scripts/SAMADhi_dbAnalysis.py
deleted file mode 100755
index 3dcd189..0000000
--- a/scripts/SAMADhi_dbAnalysis.py
+++ /dev/null
@@ -1,745 +0,0 @@
-#!/usr/bin/env python
-
-# Script to do basic checks to the database and output statistics on usage and issues
-
-import os,errno,json
-import re
-import ROOT
-ROOT.gROOT.SetBatch()
-from optparse import OptionParser, OptionGroup
-from datetime import date
-from collections import defaultdict
-from cp3_llbb.SAMADhi.SAMADhi import Analysis, Dataset, Sample, Result, DbStore
-from cp3_llbb.SAMADhi.SAMADhi import File as SFile
-from storm.info import get_cls_info
-from datetime import datetime
-from collections import defaultdict
-from cp3_llbb.SAMADhi.das_import import query_das
-
-class MyOptionParser:
-    """
-    Client option parser
-    """
-    def __init__(self):
-        usage  = "Usage: %prog [options]\n"
-        self.parser = OptionParser(usage=usage)
-        self.parser.add_option("-p","--path", action="store", type="string",
-                               dest="path", default=datetime.now().strftime("%y%m%d-%H:%M:%S"),
-             help="Destination path")
-        self.parser.add_option("-b","--basedir", action="store", type="string",
-                               dest="basedir", default="",
-             help="Directory where the website will be installed")
-        self.parser.add_option("-f","--full", action="store_true",
-                               dest="DAScrosscheck", default=False,
-             help="Full check: compares each Dataset entry to DAS and check for consistency (slow!)")
-        self.parser.add_option("-d","--dry", action="store_true",
-                               dest="dryRun", default=False,
-             help="Dry run: do no write to disk")
-
-    def get_opt(self):
-        """
-        Returns parse list of options
-        """
-        opts, args = self.parser.parse_args()
-        if opts.path is not None:
-          opts.path = os.path.abspath(os.path.expandvars(os.path.expanduser(opts.path)))
-        if not opts.dryRun and os.path.exists(opts.path):
-           raise OSError(errno.EEXIST,"Existing directory",opts.path);
-        return opts
-
-def main():
-    """Main function"""
-    # get the options
-    optmgr = MyOptionParser()
-    opts = optmgr.get_opt()
-    # connect to the MySQL database using default credentials
-    dbstore = DbStore()
-    # prepare the output directory
-    if not os.path.exists(opts.path) and not opts.dryRun:
-      os.makedirs(opts.path)
-    # run each of the checks and collect data
-
-    # collect general statistics
-    outputDict = collectGeneralStats(dbstore,opts)
-    if not opts.dryRun:
-      with open(opts.path+'/stats.json', 'w') as outfile:
-        json.dump(outputDict, outfile, default=encode_storm_object)
-	force_symlink(opts.path+'/stats.json',opts.basedir+'/data/stats.json')
- 
-    # check datasets
-    outputDict = {}
-    outputDict["DatabaseInconsistencies"] = checkDatasets(dbstore,opts) if opts.DAScrosscheck else copyInconsistencies(opts.basedir)
-    dbstore = DbStore() # reconnect, since the checkDatasets may take very long...
-    outputDict["Orphans"] = findOrphanDatasets(dbstore,opts)
-    outputDict["IncompleteData"] = checkDatasetsIntegrity(dbstore,opts)
-    outputDict["DatasetsStatistics"] = analyzeDatasetsStatistics(dbstore,opts)
-    if not opts.dryRun:
-      with open(opts.path+'/DatasetsAnalysisReport.json', 'w') as outfile:
-        json.dump(outputDict, outfile, default=encode_storm_object)
-	force_symlink(opts.path+'/DatasetsAnalysisReport.json',opts.basedir+'/data/DatasetsAnalysisReport.json')
-
-    # check samples
-    outputDict = {}
-    outputDict["MissingDirSamples"] = checkSamplePath(dbstore,opts)
-    outputDict["DatabaseInconsistencies"] = checkSampleConsistency(dbstore,opts)
-    outputDict["SampleStatistics"] = analyzeSampleStatistics(dbstore,opts)
-    if not opts.dryRun:
-      with open(opts.path+'/SamplesAnalysisReport.json', 'w') as outfile:
-        json.dump(outputDict, outfile, default=encode_storm_object)
-	force_symlink(opts.path+'/SamplesAnalysisReport.json',opts.basedir+'/data/SamplesAnalysisReport.json')
-
-    # now, check results
-    outputDict = {}
-    outputDict["MissingDirSamples"] = checkResultPath(dbstore,opts)
-    outputDict["DatabaseInconsistencies"] = checkResultConsistency(dbstore,opts)
-    outputDict["SelectedResults"] = selectResults(dbstore,opts)
-    outputDict["ResultsStatistics"] = analyzeResultsStatistics(dbstore,opts)
-    if not opts.dryRun:
-      with open(opts.path+'/ResultsAnalysisReport.json', 'w') as outfile:
-        json.dump(outputDict, outfile, default=encode_storm_object)
-	force_symlink(opts.path+'/ResultsAnalysisReport.json',opts.basedir+'/data/ResultsAnalysisReport.json')
-
-    # finally, some stats about Analysis objects
-    outputDict = {}
-    outputDict["AnalysisStatistics"] = analyzeAnalysisStatistics(dbstore,opts)
-    if not opts.dryRun:
-      with open(opts.path+'/AnalysisAnalysisReport.json', 'w') as outfile:
-        json.dump(outputDict, outfile, default=encode_storm_object)
-        force_symlink(opts.path+'/AnalysisAnalysisReport.json',opts.basedir+'/data/AnalysisAnalysisReport.json')
-
-def collectGeneralStats(dbstore,opts):
-    # get number of datasets, samples, results, analyses
-    result = {}
-    results = dbstore.find(Result)
-    samples = dbstore.find(Sample)
-    datasets = dbstore.find(Dataset)
-    analyses = dbstore.find(Analysis)
-    result["nDatasets"] = datasets.count()
-    result["nSamples"] = samples.count()
-    result["nResults"] = results.count()
-    result["nAnalyses"] = analyses.count()
-    print "\nGeneral statistics:"
-    print '======================'
-    print datasets.count(), " datasets"
-    print samples.count(), " samples"
-    print results.count(), " results"
-    return result
-
-def checkDatasets(dbstore,opts):
-    datasets = dbstore.find(Dataset)
-    print "\nDatasets inconsistent with DAS:"
-    print '=================================='
-    result = []
-    for dataset in datasets:
-      # query DAS to get the same dataset, by name
-      metadata = {}
-      try:
-        metadata = query_das(dataset.name)
-      except:
-        result.append([dataset,"Inconsistent with DAS"])
-        print "%s (imported on %s) -- Error getting dataset in DAS"%(str(dataset.name),str(dataset.creation_time))
-        continue
-        
-      # perform some checks: 
-      try:
-        # release name either matches or is unknown in DAS
-        test1 = str(metadata[u'release'])=="unknown" or dataset.cmssw_release == str(metadata[u'release'])
-        # datatype matches
-        test2 = dataset.datatype == metadata[u'datatype']
-        # nevents matches
-        test3 = dataset.nevents == metadata[u'nevents']
-        # size matches
-        test4 = dataset.dsize == metadata[u'file_size']
-      except:
-         result.append([dataset,"Inconsistent with DAS"])
-         print "%s (imported on %s)"%(str(dataset.name),str(dataset.creation_time))
-      else:
-         if not(test1 and test2 and test3 and test4):
-             result.append([dataset,"Inconsistent with DAS"])
-             print "%s (imported on %s)"%(str(dataset.name),str(dataset.creation_time))
-    return result
-
-def findOrphanDatasets(dbstore,opts):
-    datasets = dbstore.find(Dataset)
-    print "\nOrphan Datasets:"
-    print '==================='
-    result = []
-    for dataset in datasets:
-        if dataset.samples.count()==0:
-            result.append(dataset)
-            print "%s (imported on %s)"%(str(dataset.name),str(dataset.creation_time))
-    if len(result)==0:
-       print "None"
-    return result
-
-def checkDatasetsIntegrity(dbstore,opts):
-    datasets = dbstore.find(Dataset)
-    print "\nDatasets integrity issues:"
-    print '==========================='
-    result = []
-    for dataset in datasets:
-        if dataset.cmssw_release is None:
-            result.append([dataset,"missing CMSSW release"])
-            print "%s (imported on %s): missing CMSSW release"%(str(dataset.name),str(dataset.creation_time))
-        elif dataset.energy is None:
-            result.append([dataset,"missing Energy"])
-            print "%s (imported on %s): missing Energy"%(str(dataset.name),str(dataset.creation_time))
-        elif dataset.globaltag is None:
-            result.append([dataset,"missing Globaltag"])
-            print "%s (imported on %s): missing Globaltag"%(str(dataset.name),str(dataset.creation_time))
-    if len(result)==0:
-       print "None"
-    return result
-    
-
-def analyzeDatasetsStatistics(dbstore,opts):
-    # ROOT output
-    if not opts.dryRun:
-      rootfile = ROOT.TFile(opts.path+"/analysisReport.root","update")
-    stats = {}
-    # Releases used
-    output =  dbstore.execute("select dataset.cmssw_release,COUNT(dataset.dataset_id) as numOfDataset FROM dataset GROUP BY cmssw_release")
-    stats["cmssw_release"] = output.get_all()
-    if None in stats["cmssw_release"]: 
-        stats["cmssw_release"]["Unknown"] = stats["cmssw_release"][None] + stats["cmssw_release"].get("Unknown",0)
-        del stats["cmssw_release"][None]
-    releasePie = ROOT.TPie("datasetReleasePie","Datasets release",len(stats["cmssw_release"]))
-    for index,entry in enumerate(stats["cmssw_release"]):
-      releasePie.SetEntryVal(index,entry[1])
-      releasePie.SetEntryLabel(index,"None" if entry[0] is None else entry[0])
-    releasePie.SetTextAngle(0);
-    releasePie.SetRadius(0.3);
-    releasePie.SetTextColor(1);
-    releasePie.SetTextFont(62);
-    releasePie.SetTextSize(0.03);
-    canvas = ROOT.TCanvas("datasetRelease","",2)
-    releasePie.Draw("r")
-    if not opts.dryRun:
-      ROOT.gPad.Write()
-    # GlobalTag used
-    output =  dbstore.execute("select dataset.globaltag,COUNT(dataset.dataset_id) as numOfDataset FROM dataset GROUP BY globaltag")
-    stats["globaltag"] = output.get_all()
-    if None in stats["globaltag"]: 
-        stats["globaltag"]["Unknown"] = stats["globaltag"][None] + stats["globaltag"].get("Unknown",0)
-        del stats["globaltag"][None]
-    globaltagPie = ROOT.TPie("datasetGTPie","Datasets globaltag",len(stats["globaltag"]))
-    for index,entry in enumerate(stats["globaltag"]):
-      globaltagPie.SetEntryVal(index,entry[1])
-      globaltagPie.SetEntryLabel(index,"None" if entry[0] is None else entry[0])
-    globaltagPie.SetTextAngle(0);
-    globaltagPie.SetRadius(0.3);
-    globaltagPie.SetTextColor(1);
-    globaltagPie.SetTextFont(62);
-    globaltagPie.SetTextSize(0.03);
-    canvas = ROOT.TCanvas("datasetGT","",2)
-    globaltagPie.Draw("r")
-    if not opts.dryRun:
-      ROOT.gPad.Write()
-    # Datatype
-    output =  dbstore.execute("select dataset.datatype,COUNT(dataset.dataset_id) as numOfDataset FROM dataset GROUP BY datatype")
-    stats["datatype"] = output.get_all()
-    if None in stats["datatype"]: 
-        stats["datatype"]["Unknown"] = stats["datatype"][None] + stats["datatype"].get("Unknown",0)
-        del stats["datatype"][None]
-    datatypePie = ROOT.TPie("datasetTypePie","Datasets datatype",len(stats["datatype"]))
-    for index,entry in enumerate(stats["datatype"]):
-      datatypePie.SetEntryVal(index,entry[1])
-      datatypePie.SetEntryLabel(index,"None" if entry[0] is None else entry[0])
-    datatypePie.SetTextAngle(0);
-    datatypePie.SetRadius(0.3);
-    datatypePie.SetTextColor(1);
-    datatypePie.SetTextFont(62);
-    datatypePie.SetTextSize(0.03);
-    canvas = ROOT.TCanvas("datasetType","",2)
-    datatypePie.Draw("r")
-    if not opts.dryRun:
-      ROOT.gPad.Write()
-    # Energy
-    output =  dbstore.execute("select dataset.energy,COUNT(dataset.dataset_id) as numOfDataset FROM dataset GROUP BY energy")
-    stats["energy"] = output.get_all()
-    energyPie = ROOT.TPie("datasetEnergyPie","Datasets energy",len(stats["energy"]))
-    for index,entry in enumerate(stats["energy"]):
-      energyPie.SetEntryVal(index,entry[1])
-      energyPie.SetEntryLabel(index,"None" if entry[0] is None else str(entry[0]))
-    energyPie.SetTextAngle(0);
-    energyPie.SetRadius(0.3);
-    energyPie.SetTextColor(1);
-    energyPie.SetTextFont(62);
-    energyPie.SetTextSize(0.03);
-    canvas = ROOT.TCanvas("datasetEnergy","",2)
-    energyPie.Draw("r")
-    if not opts.dryRun:
-      ROOT.gPad.Write()
-    # get all datasets to loop
-    datasets = dbstore.find(Dataset)
-    datasets.order_by(Dataset.creation_time)
-    # time evolution of # datasets (still in db)
-    datasets_time = [[0,0]]
-    # various stats (histograms)
-    datasets_nsamples = ROOT.TH1I("dataseets_nsamples","datasets_nsamples",10,0,10)
-    datasets_nevents  = ROOT.TH1I("dataseets_nevents", "datasets_nevents" ,100,0,-100)
-    datasets_dsize    = ROOT.TH1I("dataseets_dsize",   "datasets_dsize"   ,100,0,-100)
-    # let's go... loop
-    for dataset in datasets:
-        # for Highcharts the time format is #seconds since epoch
-        time = int(dataset.creation_time.strftime("%s"))*1000
-        datasets_time.append([time,datasets_time[-1][1]+1])
-        datasets_nsamples.Fill(dataset.samples.count())
-        datasets_nevents.Fill(dataset.nevents)
-        datasets_dsize.Fill(dataset.dsize)
-    # drop this: just to initialize the loop
-    datasets_time.pop(0)
-    # output
-    stats["datasetsTimeprof"] = datasets_time
-    datasetsTimeprof_graph = ROOT.TGraph(len(datasets_time))
-    for i,s in enumerate(datasets_time):
-        datasetsTimeprof_graph.SetPoint(i,s[0]/1000,s[1])
-    if not opts.dryRun:
-        datasetsTimeprof_graph.Write("datasetsTimeprof_graph")
-    data = []
-    for bin in range(1,datasets_nsamples.GetNbinsX()+1):
-        data.append([datasets_nsamples.GetBinCenter(bin),datasets_nsamples.GetBinContent(bin)])
-    stats["datasetsNsamples"] = data
-    data = []
-    for bin in range(1,datasets_nevents.GetNbinsX()+1):
-        data.append([datasets_nevents.GetBinCenter(bin),datasets_nevents.GetBinContent(bin)])
-    stats["datasetsNevents"] = data
-    data = []
-    for bin in range(1,datasets_dsize.GetNbinsX()+1):
-        data.append([datasets_dsize.GetBinCenter(bin),datasets_dsize.GetBinContent(bin)])
-    stats["datasetsDsize"] = data
-    # some printout
-    print "\nDatasets Statistics extracted."
-    print '================================='
-    # ROOT output
-    if not opts.dryRun:
-      rootfile.Write();
-      rootfile.Close();
-    # JSON output
-    return stats
-
-
-def checkResultPath(dbstore,opts):
-    # get all samples
-    result = dbstore.find(Result)
-    print "\nResults with missing path:"
-    print '==========================='
-    array = []
-    for res in result:
-      # check that the path exists, and keep track of the sample if not the case.
-      if not os.path.exists(res.path):
-        print "Result #%s (created on %s by %s):"%(str(res.result_id),str(res.creation_time),str(res.author)),
-        print " missing path: %s" %res.path
-        array.append(res)
-    if len(array)==0: print "None"
-    return array
-
-    
-def checkSamplePath(dbstore,opts):
-    # get all samples
-    result = dbstore.find(Sample)
-    print "\nSamples with missing path:"
-    print '==========================='
-    array = []
-    for sample in result:
-      # check that the path exists, and keep track of the sample if not the case.
-      vpath = getSamplePath(sample,dbstore)
-      for path in vpath:
-        if not os.path.exists(path):
-          print "Sample #%s (created on %s by %s):"%(str(sample.sample_id),str(sample.creation_time),str(sample.author)),
-          print " missing path: %s" %path
-          print vpath
-          array.append(sample)
-          break
-    if len(array)==0: print "None"
-    return array
-
-def getSamplePath(sample,dbstore):
-    vpath=[]
-    # the path should be stored in sample.path
-    # if it is empty, look for files in that path
-    if sample.path=="":
-      regex = r".*SFN=(.*)"
-      files = dbstore.find(SFile, SFile.sample_id==sample.sample_id)
-      for file in files:
-        m = re.search(regex,str(file.pfn))
-        if m: vpath.append(os.path.dirname(m.group(1)))
-      vpath=list(set(vpath))
-      return vpath
-    else:
-      return [sample.path]
-
-def selectResults(dbstore,opts):
-    # look for result records pointing to a ROOT file
-    # eventually further filter 
-    results = dbstore.find(Result)
-    print "\nSelected results:"
-    print '==========================='
-    array = []
-    for result in results:
-        path = result.path
-        if os.path.exists(path) and os.path.isdir(path):
-            files = [ f for f in os.listdir(path) if os.path.isfile(path+"/"+f) ]
-            if len(files)==1:
-                path = path+"/"+f
-		result.path = path
-	if os.path.exists(path) and os.path.isfile(path) and path.lower().endswith(".root"):
-	    symlink = "%s/data/result_%s.root"%(opts.basedir,str(result.result_id))
-	    relpath = "../data/result_%s.root"%(str(result.result_id))
-	    force_symlink(path,symlink)
-	    array.append([result,relpath])
-            print "Result #%s (created on %s by %s): "%(str(result.result_id),str(result.creation_time),str(result.author)),
-            print symlink
-
-    if len(array)==0: print "None"
-    return array
-
-def checkResultConsistency(dbstore,opts):
-    # get all samples
-    result = dbstore.find(Result)
-    print "\nResults with missing source:"
-    print '============================='
-    array = []
-    for res in result:
-      # check that the source sample exists in the database.
-      # normaly, this should be protected already at the level of sql rules
-      for sample in res.samples:
-        if sample is None:
-          print "Result #%s (created on %s by %s):"%(str(res.result_id),str(res.creation_time),str(res.author)),
-          print "inconsistent source sample"
-          array.append([res,"inconsistent source sample"])
-          print res
-          break
-    if len(array)==0: print "None"
-    return array
-
-
-def checkSampleConsistency(dbstore,opts):
-    # get all samples
-    result = dbstore.find(Sample)
-    print "\nSamples with missing source:"
-    print '============================='
-    array = []
-    for sample in result:
-      # check that either the source dataset or the source sample exists in the database.
-      # normaly, this should be protected already at the level of sql rules
-      sourceDataset = sample.source_dataset
-      sourceSample = sample.source_sample
-      if (sample.source_dataset_id is not None) and (sourceDataset is None):
-        print "Sample #%s (created on %s by %s):"%(str(sample.sample_id),str(sample.creation_time),str(sample.author)),
-        print "inconsistent source dataset"
-        array.append([sample,"inconsistent source dataset"])
-        print sample
-      if (sample.source_sample_id is not None) and (sourceSample is None):
-        print "Sample #%s (created on %s by %s):"%(str(sample.sample_id),str(sample.creation_time),str(sample.author)),
-        print "inconsistent source sample"
-        array.append([sample,"inconsistent source sample"])
-    if len(array)==0: print "None"
-    return array
-
-def analyzeAnalysisStatistics(dbstore,opts):
-    stats = {}
-    # ROOT output
-    if not opts.dryRun:
-      rootfile = ROOT.TFile(opts.path+"/analysisReport.root","update")
-    # contact
-    output =  dbstore.execute("select analysis.contact,COUNT(analysis.analysis_id) as numOfAnalysis FROM analysis GROUP BY contact")
-    stats["analysisContacts"] = output.get_all()
-    if None in stats["analysisContacts"]: 
-        stats["analysisContacts"]["Unknown"] = stats["analysisContacts"][None] + stats["analysisContacts"].get("Unknown",0)
-        del stats["analysisContacts"][None]
-    contactPie = ROOT.TPie("AnalysisContactPie","Analysis contacts",len(stats["analysisContacts"]))
-    for index,entry in enumerate(stats["analysisContacts"]):
-      contactPie.SetEntryVal(index,entry[1])
-      contactPie.SetEntryLabel(index,"None" if entry[0] is None else entry[0])
-    contactPie.SetTextAngle(0);
-    contactPie.SetRadius(0.3);
-    contactPie.SetTextColor(1);
-    contactPie.SetTextFont(62);
-    contactPie.SetTextSize(0.03);
-    canvas = ROOT.TCanvas("analysisContact","",2)
-    contactPie.Draw("r")
-    if not opts.dryRun:
-      ROOT.gPad.Write()
-    # analysis size in terms of results (pie)
-    output =  dbstore.execute("select analysis.description,COUNT(result.result_id) as numOfResults  FROM result INNER JOIN analysis ON result.analysis_id=analysis.analysis_id GROUP BY result.analysis_id;")
-    stats["analysisResults"] = output.get_all()
-    if None in stats["analysisResults"]: 
-        stats["analysisResults"]["Unknown"] = stats["analysisResults"][None] + stats["analysisResults"].get("Unknown",0)
-        del stats["analysisResults"][None]
-    resultPie = ROOT.TPie("AnalysisResultsPie","Analysis results",len(stats["analysisResults"]))
-    for index,entry in enumerate(stats["analysisResults"]):
-      resultPie.SetEntryVal(index,entry[1])
-      resultPie.SetEntryLabel(index,"None" if entry[0] is None else entry[0])
-    resultPie.SetTextAngle(0);
-    resultPie.SetRadius(0.3);
-    resultPie.SetTextColor(1);
-    resultPie.SetTextFont(62);
-    resultPie.SetTextSize(0.03);
-    canvas = ROOT.TCanvas("analysisResults","",2)
-    resultPie.Draw("r")
-    if not opts.dryRun:
-      ROOT.gPad.Write()
-    # stats to collect: group distribution (from CADI line) (pie)
-    analyses = dbstore.find(Analysis)
-    regex = r".*([A-Z]{3})-\d{2}-\d{3}"
-    stats["physicsGroup"] = defaultdict(int)
-    for analysis in analyses:
-        m = re.search(regex,str(analysis.cadiline))
-        physicsGroup = "NONE"
-        if m: 
-            physicsGroup = m.group(1)
-        stats["physicsGroup"][physicsGroup] += 1
-    stats["physicsGroup"] = dict(stats["physicsGroup"])
-    if None in stats["physicsGroup"]: 
-        stats["physicsGroup"]["Unknown"] = stats["physicsGroup"][None] + stats["physicsGroup"].get("Unknown",0)
-        del stats["physicsGroup"][None]
-
-    # the end of the loop, we have all what we need to fill a pie chart.
-    physicsGroupPie = ROOT.TPie("physicsGroupPie","Physics groups",len(stats["physicsGroup"]))
-    for index,(group,count) in enumerate(stats["physicsGroup"].iteritems()):
-      physicsGroupPie.SetEntryVal(index,count)
-      physicsGroupPie.SetEntryLabel(index,group)
-    physicsGroupPie.SetTextAngle(0);
-    physicsGroupPie.SetRadius(0.3);
-    physicsGroupPie.SetTextColor(1);
-    physicsGroupPie.SetTextFont(62);
-    physicsGroupPie.SetTextSize(0.03);
-    canvas = ROOT.TCanvas("physicsGroup","",2)
-    physicsGroupPie.Draw("r")
-    if not opts.dryRun:
-      ROOT.gPad.Write()
-    # some printout
-    print "\nAnalysis Statistics extracted."
-    print '================================'
-    # ROOT output
-    if not opts.dryRun:
-      rootfile.Write();
-      rootfile.Close();
-    # JSON output
-    stats["physicsGroup"] = [ [a,b] for (a,b) in stats["physicsGroup"].items()]
-    return stats
-
-def analyzeResultsStatistics(dbstore,opts):
-    stats = {}
-    # ROOT output
-    if not opts.dryRun:
-      rootfile = ROOT.TFile(opts.path+"/analysisReport.root","update")
-    #authors statistics
-    output =  dbstore.execute("select result.author,COUNT(result.result_id) as numOfResults FROM result GROUP BY author")
-    stats["resultsAuthors"] = output.get_all()
-    if None in stats["resultsAuthors"]: 
-        stats["resultsAuthors"]["Unknown"] = stats["resultsAuthors"][None] + stats["resultsAuthors"].get("Unknown",0)
-        del stats["resultsAuthors"][None]
-    authorPie = ROOT.TPie("resultsAuthorsPie","Results authors",len(stats["resultsAuthors"]))
-    for index,entry in enumerate(stats["resultsAuthors"]):
-      authorPie.SetEntryVal(index,entry[1])
-      authorPie.SetEntryLabel(index,"None" if entry[0] is None else entry[0])
-    authorPie.SetTextAngle(0);
-    authorPie.SetRadius(0.3);
-    authorPie.SetTextColor(1);
-    authorPie.SetTextFont(62);
-    authorPie.SetTextSize(0.03);
-    canvas = ROOT.TCanvas("resultsAuthor","",2)
-    authorPie.Draw("r")
-    if not opts.dryRun:
-      ROOT.gPad.Write()
-    result_nsamples = ROOT.TH1I("result_nsamples","result_nsamples",20,0,20)
-    # get all samples to loop
-    results = dbstore.find(Result)
-    results.order_by(Result.creation_time)
-    # time evolution of # results (still in db)
-    results_time = [[0,0]]
-    # let's go... loop
-    for result in results:
-        # for Highcharts the time format is #seconds since epoch
-        time = int(result.creation_time.strftime("%s"))*1000
-        results_time.append([time,results_time[-1][1]+1])
-        result_nsamples.Fill(result.samples.count())
-    # drop this: just to initialize the loop
-    results_time.pop(0)
-    # output
-    stats["resultsTimeprof"] = results_time
-    resultsTimeprof_graph = ROOT.TGraph(len(results_time))
-    for i,s in enumerate(results_time):
-        resultsTimeprof_graph.SetPoint(i,s[0]/1000,s[1])
-    if not opts.dryRun:
-        resultsTimeprof_graph.Write("resultsTimeprof_graph")
-    data = []
-    for bin in range(1,result_nsamples.GetNbinsX()+1):
-        data.append([result_nsamples.GetBinCenter(bin),result_nsamples.GetBinContent(bin)])
-    stats["resultNsamples"] = data
-    # some printout
-    print "\nResults Statistics extracted."
-    print '================================'
-    # ROOT output
-    if not opts.dryRun:
-      rootfile.Write();
-      rootfile.Close();
-    # JSON output
-    return stats
-
-def analyzeSampleStatistics(dbstore,opts):
-    stats = {}
-    # ROOT output
-    if not opts.dryRun:
-      rootfile = ROOT.TFile(opts.path+"/analysisReport.root","update")
-    #authors statistics
-    output =  dbstore.execute("select sample.author,COUNT(sample.sample_id) as numOfSamples FROM sample GROUP BY author")
-    stats["sampleAuthors"] = output.get_all()
-    if None in stats["sampleAuthors"]: 
-        stats["sampleAuthors"]["Unknown"] = stats["sampleAuthors"][None] + stats["sampleAuthors"].get("Unknown",0)
-        del stats["sampleAuthors"][None]
-    authorPie = ROOT.TPie("sampleAuthorsPie","Samples authors",len(stats["sampleAuthors"]))
-    for index,entry in enumerate(stats["sampleAuthors"]):
-      authorPie.SetEntryVal(index,entry[1])
-      authorPie.SetEntryLabel(index,"None" if entry[0] is None else entry[0])
-    authorPie.SetTextAngle(0);
-    authorPie.SetRadius(0.3);
-    authorPie.SetTextColor(1);
-    authorPie.SetTextFont(62);
-    authorPie.SetTextSize(0.03);
-    canvas = ROOT.TCanvas("sampleAuthor","",2)
-    authorPie.Draw("r")
-    if not opts.dryRun:
-      ROOT.gPad.Write()
-    #sample types statistics
-    output =  dbstore.execute("select sample.sampletype,COUNT(sample.sample_id) as numOfSamples FROM sample GROUP BY sampletype")
-    stats["sampleTypes"] = output.get_all()
-    if None in stats["sampleTypes"]: 
-        stats["sampleTypes"]["Unknown"] = stats["sampleTypes"][None] + stats["sampleTypes"].get("Unknown",0)
-        del stats["sampleTypes"][None]
-    typePie = ROOT.TPie("sampleTypesPie","Samples types",len(stats["sampleTypes"]))
-    for index,entry in enumerate(stats["sampleTypes"]):
-      typePie.SetEntryVal(index,entry[1])
-      typePie.SetEntryLabel(index,"None" if entry[0] is None else entry[0])
-    typePie.SetTextAngle(0);
-    typePie.SetRadius(0.3);
-    typePie.SetTextColor(1);
-    typePie.SetTextFont(62);
-    typePie.SetTextSize(0.03);
-    canvas = ROOT.TCanvas("sampleType","",2)
-    typePie.Draw("r")
-    if not opts.dryRun:
-      ROOT.gPad.Write()
-    # get all samples to loop
-    result = dbstore.find(Sample)
-    result.order_by(Sample.creation_time)
-    # events statistics
-    sample_nevents_processed = ROOT.TH1I("sample_nevents_processed","sample_nevents_processed",100,0,-100)
-    sample_nevents = ROOT.TH1I("sample_nevents","sample_nevents",100,0,-100)
-    # time evolution of statistics & # samples (still in db)
-    sample_nevents_processed_time = [[0,0]]
-    sample_nevents_time = [[0,0]] 
-    samples_time = [[0,0]]
-    # let's go... loop
-    for sample in result:
-        # for Highcharts the time format is #seconds since epoch
-        time = int(sample.creation_time.strftime("%s"))*1000
-        ne = 0 if sample.nevents is None else sample.nevents
-        np = 0 if sample.nevents_processed is None else sample.nevents_processed
-        sample_nevents_processed.Fill(np)
-        sample_nevents.Fill(ne)
-        sample_nevents_processed_time.append([time,sample_nevents_processed_time[-1][1]+np])
-        sample_nevents_time.append([time,sample_nevents_time[-1][1]+ne])
-        samples_time.append([time,samples_time[-1][1]+1])
-    # drop this: just to initialize the loop
-    sample_nevents_processed_time.pop(0)
-    sample_nevents_time.pop(0)
-    samples_time.pop(0)
-    # output
-    stats["sampleNeventsTimeprof"] = sample_nevents_time
-    stats["sampleNeventsProcessedTimeprof"] = sample_nevents_processed_time
-    stats["samplesTimeprof"] = samples_time
-    sampleNeventsTimeprof_graph = ROOT.TGraph(len(sample_nevents_time))
-    sampleNeventsProcessedTimeprof_graph = ROOT.TGraph(len(sample_nevents_processed_time))
-    samplesTimeprof_graph = ROOT.TGraph(len(samples_time))
-    for i,s in enumerate(sample_nevents_time):
-        sampleNeventsTimeprof_graph.SetPoint(i,s[0]/1000,s[1])
-    for i,s in enumerate(sample_nevents_processed_time):
-        sampleNeventsProcessedTimeprof_graph.SetPoint(i,s[0]/1000,s[1])
-    for i,s in enumerate(samples_time):
-        samplesTimeprof_graph.SetPoint(i,s[0]/1000,s[1])
-    if not opts.dryRun:
-        sampleNeventsTimeprof_graph.Write("sampleNeventsTimeprof_graph")
-        sampleNeventsProcessedTimeprof_graph.Write("sampleNeventsProcessedTimeprof_graph")
-        samplesTimeprof_graph.Write("samplesTimeprof_graph")
-    # unfortunately, TBufferJSON is not available in CMSSW (no libRHttp) -> no easy way to export to JSON
-    # the JSON format for highcharts data is [ [x1,y1], [x2,y2], ... ]
-    data = []
-    for bin in range(1,sample_nevents.GetNbinsX()+1):
-      data.append([sample_nevents.GetBinCenter(bin),sample_nevents.GetBinContent(bin)])
-    stats["sampleNevents"] = data
-    data = []
-    for bin in range(1,sample_nevents_processed.GetNbinsX()+1):
-      data.append([sample_nevents_processed.GetBinCenter(bin),sample_nevents_processed.GetBinContent(bin)])
-    stats["sampleNeventsProcessed"] = data
-    # some printout
-    print "\nSamples Statistics extracted."
-    print '================================'
-    # ROOT output
-    if not opts.dryRun:
-      rootfile.Write();
-      rootfile.Close();
-    # JSON output
-    return stats
-
-# function to serialize the storm objects,
-# from Jamu Kakar and Mario Zito at https://lists.ubuntu.com/archives/storm/2010-May/001286.html
-def encode_storm_object(object):
-    ''' Serializes to JSON a Storm object
-    
-    Use:
-        from storm.info import get_cls_info
-        import json
-        ...
-        storm_object = get_storm_object()
-        print json.dumps(storm_object, default=encode_storm_object)
-            
-    Warnings:
-        Serializes objects containing Int, Date and Unicode data types
-        other datatypes are not tested. MUST be improved
-    '''
-    if not hasattr(object, "__storm_table__"):
-        raise TypeError(repr(object) + " is not JSON serializable")
-    result = {}
-    cls_info = get_cls_info(object.__class__)
-    for name in cls_info.attributes.iterkeys():
-        value= getattr(object, name)
-        if (isinstance(value, date)): 
-            value= str(value)
-        result[name] = value
-    return result
-
-def force_symlink(file1, file2):
-    try:
-        os.symlink(file1, file2)
-    except OSError, e:
-        if e.errno == errno.EEXIST:
-            os.remove(file2)
-            os.symlink(file1, file2)
-
-def copyInconsistencies(basedir):
-    # try to read inconsistencies from previous job
-    # the file must be there and must contain the relevant data
-    try:
-        with open(basedir+'/data/DatasetsAnalysisReport.json') as jfile:
-            content = json.load(jfile)
-            return content["DatabaseInconsistencies"]
-    except IOError:
-        # no file. Return an empty string.
-        # This will happen if basedir is not (properly) set or if it is new.
-        print("No previous dataset analysis report found in path. The Database inconsistencies will be empty.")
-        return []
-    except KeyError:
-        # no proper key. Return an empty string.
-        # This should not happen, so print a warning.
-        print("No DatabaseInconsistencies key in the previous json file ?!")
-        return []
-
-#
-# main
-#
-if __name__ == '__main__':
-    main()
-
diff --git a/scripts/add_result.py b/scripts/add_result.py
deleted file mode 100755
index 9e87297..0000000
--- a/scripts/add_result.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-
-# Script to add a sample to the database
-
-import os
-from pwd import getpwuid
-from datetime import datetime
-from optparse import OptionParser
-from cp3_llbb.SAMADhi.SAMADhi import Analysis, Sample, Result, DbStore
-from cp3_llbb.SAMADhi.userPrompt import confirm, prompt_samples, parse_samples
-
-class MyOptionParser: 
-    """
-    Client option parser
-    """
-    def __init__(self):
-        usage  = "Usage: %prog path [options]\n"
-        self.parser = OptionParser(usage=usage)
-        self.parser.add_option("-s", "--sample", action="store", type="string", 
-                               default=None, dest="inputSamples",
-             help="comma separated list of samples used as input to produce that result")
-        self.parser.add_option("-d", "--description", action="store", type="string", 
-                               default=None, dest="desc",
-             help="description of the result")
-        self.parser.add_option("-e", "--elog", action="store", type="string",
-                               default=None, dest="elog",
-             help="elog with more details")
-        self.parser.add_option("-A", "--analysis", action="store", type="int",
-                               default=None, dest="ana",
-             help="analysis whose result belong to")
-        self.parser.add_option("-a", "--author", action="store", type="string", 
-                               default=None, dest="author",
-             help="author of the result. If not specified, is taken from the path.")
-        self.parser.add_option("-t", "--time", action="store", type="string", 
-                               default=None, dest="time",
-             help="result timestamp. If set to \"path\", timestamp will be taken from the path. Otherwise, it must be formated like YYYY-MM-DD HH:MM:SS")
-
-    def get_opt(self):
-        """
-        Returns parse list of options
-        """
-        opts, args = self.parser.parse_args()
-        # check that the path exists
-        if len(args) < 1:
-          self.parser.error("path is mandatory")
-        opts.path = os.path.abspath(os.path.expandvars(os.path.expanduser(args[0])))
-        if not os.path.exists(opts.path) or not ( os.path.isdir(opts.path) or os.path.isfile(opts.path)) :
-          self.parser.error("%s is not an existing file or directory"%opts.path)
-        # set author
-        if opts.author is None:
-          opts.author = getpwuid(os.stat(opts.path).st_uid).pw_name
-        # set timestamp
-        if not opts.time is None:
-          if opts.time=="path":
-            opts.datetime = datetime.fromtimestamp(os.path.getctime(opts.path))
-          else:
-            opts.datetime = datetime.strptime(opts.time,'%Y-%m-%d %H:%M:%S')
-        else:
-          opts.datetime = datetime.now()
-        return opts
-
-def main():
-    """Main function"""
-    # get the options
-    optmgr = MyOptionParser()
-    opts   = optmgr.get_opt()
-    # build the result from user input
-    result = Result(unicode(opts.path))
-    result.description = unicode(opts.desc)
-    result.author = unicode(opts.author)
-    result.creation_time = opts.datetime
-    result.elog = unicode(opts.elog)
-    result.analysis_id = opts.ana
-    # connect to the MySQL database using default credentials
-    dbstore = DbStore()
-    # unless the source is set, prompt the user and present a list to make a choice
-    if opts.inputSamples is None:
-      inputSamples = prompt_samples(dbstore)
-    else:
-      inputSamples = parse_samples(opts.inputSamples)
-    # create and store the relations
-    samples = dbstore.find(Sample,Sample.sample_id.is_in(inputSamples))
-    if samples.is_empty():
-      dbstore.add(result)
-    else:
-      for sample in samples:
-        sample.results.add(result)
-    # flush (populates the analysis if needed)
-    dbstore.flush()
-    # print the resulting object and ask for confirmation
-    print result
-    if confirm(prompt="Insert into the database?", resp=True):
-      dbstore.commit()
-
-#
-# main
-#
-if __name__ == '__main__':
-    main()
diff --git a/scripts/add_sample.py b/scripts/add_sample.py
deleted file mode 100755
index 4970793..0000000
--- a/scripts/add_sample.py
+++ /dev/null
@@ -1,199 +0,0 @@
-#!/usr/bin/env python
-
-# Script to add a sample to the database
-
-import os
-import glob
-from pwd import getpwuid
-from optparse import OptionParser
-from datetime import datetime
-from cp3_llbb.SAMADhi.SAMADhi import Dataset, Sample, File, DbStore
-from cp3_llbb.SAMADhi.userPrompt import confirm, prompt_dataset, prompt_sample
-
-def get_file_data_(f_):
-    import ROOT
-
-    f = ROOT.TFile.Open(f_)
-    if not f:
-        return (None, None)
-
-    weight_sum = f.Get("event_weight_sum")
-    if weight_sum:
-        weight_sum = weight_sum.GetVal()
-    else:
-        weight_sum = None
-
-    entries = None
-    tree = f.Get("t")
-    if tree:
-        entries = tree.GetEntriesFast()
-
-    return (weight_sum, entries)
-
-
-class MyOptionParser: 
-    """
-    Client option parser
-    """
-    def __init__(self):
-        usage  = "Usage: %prog type path [options]\n"
-        usage += "where type is one of PAT, SKIM, RDS, NTUPLES, HISTOS, ...\n"
-        usage += "      and path is the location of the sample on disk"  
-        self.parser = OptionParser(usage=usage)
-        self.parser.add_option("--name", action="store", type="string", 
-                               default=None, dest="name",
-             help="specify sample name")
-        self.parser.add_option("--processed", action="store", type="int", 
-                               default=None, dest="nevents_processed",
-             help="number of processed events (from the input)")
-        self.parser.add_option("--nevents", action="store", type="int", 
-                               default=None, dest="nevents",
-             help="number of events (in the sample)")
-        self.parser.add_option("--norm", action="store", type="float", 
-                               default=1.0, dest="normalization",
-             help="additional normalization factor")
-        self.parser.add_option("--weight-sum", action="store", type="float", 
-                               default=1.0, dest="weight_sum",
-             help="additional normalization factor")
-        self.parser.add_option("--lumi", action="store", type="float", 
-                               default=None, dest="luminosity",
-             help="sample (effective) luminosity")
-        self.parser.add_option("--code_version", action="store", type="string",
-                               default="", dest="code_version",
-             help="version of the code used to process that sample (e.g. git tag or commit)")
-        self.parser.add_option("--comment", action="store", type="string",
-                               default="", dest="user_comment",
-             help="comment about the dataset")
-        self.parser.add_option("--source_dataset", action="store", type="int", 
-                               default=None, dest="source_dataset_id",
-             help="reference to the source dataset")
-        self.parser.add_option("--source_sample", action="store", type="int", 
-                               default=None, dest="source_sample_id",
-             help="reference to the source sample, if any")
-        self.parser.add_option("-a", "--author", action="store", type="string",
-                               default=None, dest="author",
-             help="author of the result. If not specified, is taken from the path.")
-        self.parser.add_option("--files", action="store", type="string",
-                               default="", dest="files",
-             help="list of files (full path, comma-separated values)")
-        self.parser.add_option("-t", "--time", action="store", type="string",
-                               default=None, dest="time",
-             help="result timestamp. If set to \"path\", timestamp will be taken from the path. Otherwise, it must be formated like YYYY-MM-DD HH:MM:SS. Default is current time.")
-
-    def get_opt(self):
-        """
-        Returns parse list of options
-        """
-        opts, args = self.parser.parse_args()
-        # mandatory arguments
-        if len(args) < 2:
-          self.parser.error("type and path are mandatory")
-        opts.sampletype = args[0]
-        opts.path = os.path.abspath(os.path.expandvars(os.path.expanduser(args[1])))
-        # check path
-        if not os.path.exists(opts.path) or not ( os.path.isdir(opts.path) or os.path.isfile(opts.path)) :
-          self.parser.error("%s is not an existing directory"%opts.path)
-        # set author
-        if opts.author is None:
-          opts.author = getpwuid(os.stat(opts.path).st_uid).pw_name
-        # set timestamp
-        if not opts.time is None:
-          if opts.time=="path":
-            opts.datetime = datetime.fromtimestamp(os.path.getctime(opts.path))
-          else:
-            opts.datetime = datetime.strptime(opts.time,'%Y-%m-%d %H:%M:%S')
-        else:
-          opts.datetime = datetime.now()
-        # set name
-        if opts.name is None:
-          if opts.path[-1]=='/':
-            opts.name = opts.path.split('/')[-2]
-          else:
-            opts.name = opts.path.split('/')[-1]
-        return opts
-
-def main():
-    """Main function"""
-    # get the options
-    optmgr = MyOptionParser()
-    opts   = optmgr.get_opt()
-    # build the sample from user input
-    sample  = Sample(unicode(opts.name), unicode(opts.path), unicode(opts.sampletype), opts.nevents_processed)
-    sample.nevents = opts.nevents
-    sample.normalization = opts.normalization
-    sample.event_weight_sum = opts.weight_sum
-    sample.luminosity = opts.luminosity
-    sample.code_version = unicode(opts.code_version)
-    sample.user_comment = unicode(opts.user_comment)
-    sample.source_dataset_id = opts.source_dataset_id
-    sample.source_sample_id = opts.source_sample_id
-    sample.author = unicode(opts.author)
-    sample.creation_time = opts.datetime
-    # connect to the MySQL database using default credentials
-    dbstore = DbStore()
-    # unless the source is set, prompt the user and present a list to make a choice
-    if sample.source_dataset_id is None:
-      prompt_dataset(sample,dbstore)
-    if sample.source_sample_id is None:
-      prompt_sample(sample,dbstore)
-    # check that source sample and dataset exist
-    if sample.source_dataset_id is not None:
-      checkExisting = dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id)
-      if checkExisting.is_empty():
-        raise IndexError("No dataset with such index: %d"%sample.source_dataset_id)
-    if sample.source_sample_id is not None:
-      checkExisting = dbstore.find(Sample,Sample.sample_id==sample.source_sample_id)
-      if checkExisting.is_empty():
-        raise IndexError("No sample with such index: %d"%sample.source_sample_id)
-    # if opts.nevents is not set, take #events from source sample (if set) or from source dataset (if set) in that order
-    if sample.nevents_processed is None and sample.source_sample_id is not None:
-      sample.nevents_processed = dbstore.find(Sample,Sample.sample_id==sample.source_sample_id).one().nevents_processed
-    if sample.nevents_processed is None and sample.source_dataset_id is not None:
-      sample.nevents_processed = dbstore.find(Dataset,Dataset.dataset_id==sample.source_dataset_id).one().nevents
-    if sample.nevents_processed is None:
-      print "Warning: Number of processed events not given, and no way to guess it."
-
-    # List input files
-    files = []
-    if opts.files == "":
-        files = glob.glob(os.path.join(sample.path, '*.root'))
-    else:
-        files = unicode(opts.files).split(",")
-    if len(files) == 0:
-      print "Warning: no root files found in %r" % sample.path
-
-    # Try to guess the number of events stored into the file, as well as the weight sum
-    for f in files:
-        (weight_sum, entries) = get_file_data_(f)
-        sample.files.add(File(f, f, weight_sum, entries))
-
-    # check that there is no existing entry
-    checkExisting = dbstore.find(Sample,Sample.name==sample.name)
-    if checkExisting.is_empty():
-      print sample
-      if confirm(prompt="Insert into the database?", resp=True):
-        dbstore.add(sample)
-        # compute the luminosity, if possible
-        if sample.luminosity is None:
-          dbstore.flush()
-          sample.luminosity = sample.getLuminosity()
-    else:
-      existing = checkExisting.one()
-      prompt  = "Replace existing "
-      prompt += str(existing)
-      prompt += "\nby new "
-      prompt += str(sample)
-      prompt += "\n?"
-      if confirm(prompt, resp=False):
-        existing.replaceBy(sample)
-        if existing.luminosity is None:
-          dbstore.flush()
-          existing.luminosity = existing.getLuminosity()
-    # commit
-    dbstore.commit()
-
-#
-# main
-#
-if __name__ == '__main__':
-    main()
diff --git a/scripts/checkAndClean.py b/scripts/checkAndClean.py
deleted file mode 100755
index d9907a6..0000000
--- a/scripts/checkAndClean.py
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/usr/bin/env python
-import json
-import os
-import sys
-from cp3_llbb.SAMADhi.SAMADhi import Analysis, Dataset, Sample, Result, DbStore
-from cp3_llbb.SAMADhi.SAMADhi import File as SFile
-from optparse import OptionParser, OptionGroup
-
-class MyOptionParser:
-    """
-    Client option parser
-    """
-    def __init__(self):
-        usage  = "Usage: %prog [options]\n"
-        self.parser = OptionParser(usage=usage)
-        self.parser.add_option("-p","--path", action="store", type="string",
-                               dest="path", default="./",
-             help="Path to the json files with db analysis results.")
-        self.parser.add_option("-o","--output", action="store", type="string",
-                               dest="output", default="-",
-             help="Name of the output file.")
-        self.parser.add_option("-M","--cleanupMissing", action="store_true",
-                               dest="cleanupMissing", default=False,
-             help="Clean samples with missing path from the database.")
-        self.parser.add_option("-U","--cleanupUnreachable", action="store_true",
-                               dest="cleanupUnreachable", default=False,
-             help="Clean samples with unreachable path from the database.")
-        self.parser.add_option("-D","--cleanupDatasets", action="store_true",
-                               dest="cleanupDatasets", default=False,
-             help="Clean orphan datasets from the database.")
-        self.parser.add_option("-w","--whitelist", action="store", type="string",
-                               dest="whitelist", default=None,
-             help="JSON file with sample whitelists per analysis.")
-        self.parser.add_option("-d","--dry-run", action="store_true",
-                               dest="dryrun", default=False,
-             help="Dry run: do not write to file and/or touch the database.")
-
-    def get_opt(self):
-        """
-        Returns parse list of options
-        """
-        opts, args = self.parser.parse_args()
-        if opts.path is not None:
-          opts.path = os.path.abspath(os.path.expandvars(os.path.expanduser(opts.path)))
-        if opts.output == "-":
-          opts.output = sys.__stdout__
-        else:
-          filepath = os.path.dirname(os.path.realpath(os.path.expanduser(opts.output)))
-          if not os.access(filepath,os.W_OK):
-            self.parser.error("Cannot write to %s"%filepath)
-          if os.path.isfile(opts.output):
-            self.parser.error("File already exists: %s"%opts.output) 
-          if not opts.dryrun:
-            try: 
-              opts.output = open(opts.output,"w")
-            except:
-              self.parser.error("Cannot write to %s"%opts.output)
-          else:
-            opts.output = sys.__stdout__
-        try:
-          opts.whitelist = open(opts.whitelist)
-        except:
-          self.parser.error("Cannot open whitelist.")
-        return opts
-
-class StoreCleaner():
-  """
-  handle to the db store, with basic facilities to cleanup entries
-  """
-
-  def __init__(self):
-    self.dbstore = DbStore()
-
-  def deleteSample(self,sample_id):
-     store = self.dbstore
-     # first remove the files associated with the sample
-     files = store.find(SFile,SFile.sample_id==sample_id)
-     for sampleFile in files:
-       store.remove(sampleFile)
-     # then remove the sample
-     sample = store.find(Sample,Sample.sample_id==sample_id).one()
-     print("deleting sample %d"%sample_id)
-     store.remove(sample)
-
-  def deleteDataset(self,dataset_id):
-     store = self.dbstore
-     # simply delete the dataset
-     dataset = store.find(Dataset,Dataset.dataset_id==dataset_id).one()
-     print("deleting dataset %d"%dataset_id)
-     store.remove(dataset)
-
-  def commit(self):
-     self.dbstore.commit()
-
-  def rollback(self):
-     self.dbstore.rollback()
-
- 
-# Script to check samples for deletion
-
-def main():
-  """Main function"""
-  # get the options
-  optmgr = MyOptionParser()
-  opts = optmgr.get_opt()
-
-  # set stdout
-  sys.stdout = opts.output
-
-  # whitelist with samples that we should not touch ever
-  if opts.whitelist is not None:
-    whitelist = json.load(opts.whitelist)
-  else: 
-    whitelist = {}
-
-  # utility class to clean the db
-  myCleaner = StoreCleaner()
-
-  # open the sample analysis report and classify bad samples
-  samplesAnalysisReport = os.path.join(opts.path, "SamplesAnalysisReport.json")
-  with open(samplesAnalysisReport) as jfile:
-    data = json.load(jfile)
-  samples = data["MissingDirSamples"]
-  investigate = []
-  delete = []
-  empty = []
-  empty_delete = []
-  for sample in samples:
-    whitelisted = False
-    for v in whitelist.values():
-      for label in v:
-        if label in sample["name"]:
-          whitelisted = True
-    if whitelisted:
-      if sample["path"]=="":
-           empty.append(sample)
-      else:
-         investigate.append(sample)
-    else:
-      if sample["path"]=="":
-           empty_delete.append(sample)
-      else:
-         delete.append(sample)
-  print("\n\nWhitelisted sample with missing path. Investigate:")
-  for sample in empty:
-    print(sample["name"])
-  print("\n\nWhitelisted sample with unreachable path. Investigate:")
-  for sample in investigate:
-    print(sample["name"])
-  print("\n\nSamples to be deleted because of missing path:")
-  for sample in empty_delete:
-    print(sample["name"])
-    if opts.cleanupMissing : myCleaner.deleteSample(sample["sample_id"])
-  print("\n\nSamples to be deleted because of unreachable path:")
-  for sample in delete:
-    print(sample["name"])
-    if opts.cleanupUnreachable : myCleaner.deleteSample(sample["sample_id"])
-
-  # now clean orphan datasets
-  datasetsAnalysisReport = os.path.join(opts.path, "DatasetsAnalysisReport.json")
-  with open(datasetsAnalysisReport) as jfile:
-    data = json.load(jfile)
-  datasets = data["Orphans"]
-  for dataset in datasets:
-    if opts.cleanupDatasets : myCleaner.deleteDataset(dataset["dataset_id"])
-
-  # and commit
-  if not opts.dryrun:
-    myCleaner.commit()
-  else:
-    myCleaner.rollback()
-
-#
-# main
-#
-if __name__ == '__main__':
-    main()
-
diff --git a/scripts/compute_sample_luminosity.py b/scripts/compute_sample_luminosity.py
deleted file mode 100755
index 88f4907..0000000
--- a/scripts/compute_sample_luminosity.py
+++ /dev/null
@@ -1,156 +0,0 @@
-#!/usr/bin/env python
-""" Simple script to compute the luminosity of a set of samples """
-
-import subprocess
-import argparse
-from cp3_llbb.SAMADhi.SAMADhi import Sample, DbStore
-
-def get_options():
-    parser = argparse.ArgumentParser(description='Compute luminosity of a set of samples.')
-
-    parser.add_argument('-i', '--id', type=int, nargs='+', dest='ids', help='IDs of the samples', metavar='ID')
-    parser.add_argument('--name', type=str, nargs='+', dest='names', help='Names of the samples', metavar='NAME')
-
-    parser.add_argument('--local', dest='local', action='store_true', help='Run brilcalc locally instead of on lxplus')
-
-    parser.add_argument('--bootstrap', dest='bootstrap', action='store_true', help='Install brilcalc. Needs to be done only once')
-
-    parser.add_argument('--update', dest='update', action='store_true', help='Update brilcalc')
-
-    parser.add_argument('-n', '--username', dest='username', help='Remote lxplus username (local username by default)')
-
-    parser.add_argument('-t', '--normtag', dest='normtag', help='Normtag on /afs')
-
-    options = parser.parse_args()
-
-    if not options.bootstrap and not options.update and options.ids is None and options.names is None:
-        parser.error('You must specify at least one sample id or sample name.')
-
-    if not options.bootstrap and not options.update and not options.normtag:
-        parser.error('You must specify a normtag file')
-
-    if options.ids is None:
-        options.ids = []
-
-    if options.names is None:
-        options.names = []
-
-    if options.username is None:
-        import pwd, os
-        options.username = pwd.getpwuid(os.getuid()).pw_name
-
-    return options
-
-def get_sample(id, name):
-
-    dbstore = DbStore()
-
-    if id is not None:
-        result = dbstore.find(Sample, Sample.sample_id == id)
-    elif name is not None:
-        result = dbstore.find(Sample, Sample.name.like(unicode(name.replace('*', '%').replace('?', '_'))))
-
-    return result.one()
-
-def parse_luminosity_csv(result):
-    """ Parse the CSV file produced by brilcalc, and return the total recorded luminosity in /pb """
-    import csv
-    import StringIO
-
-    f = StringIO.StringIO(result)
-
-    lumi = 0
-    reader = csv.reader(f, delimiter=',')
-    for row in reader:
-        if not row:
-            continue
-
-        if row[0][0] == '#':
-            continue
-        lumi += float(row[-1])
-
-    return lumi / 1000. / 1000.
-
-def compute_luminosity(sample, options):
-    print("Computing luminosity for %r") % str(sample.name)
-
-    lumi = 0
-    if not options.local:
-        print("Running brilcalc on lxplus... You'll probably need to enter your lxplus password in a moment")
-        print('')
-
-        cmds = ['brilcalc', 'lumi', '--normtag', options.normtag, '--output-style', 'csv', '-i', '"%s"' % str(sample.processed_lumi.replace('"', ''))]
-        cmd = 'export PATH="$HOME/.local/bin:/afs/cern.ch/cms/lumi/brilconda-1.1.7/bin:$PATH"; ' + ' '.join(cmds)
-        ssh_cmds = ['ssh', '%s@lxplus.cern.ch' % options.username, cmd]
-        brilcalc_result = subprocess.check_output(ssh_cmds)
-
-        lumi = parse_luminosity_csv(brilcalc_result)
-    else:
-        print("Running brilcalc locally...")
-        # FIXME one day
-        print("Error: running brilcalc locally is not supported for the moment.")
-        return 0
-
-    print("Sample luminosity: %.3f /pb" % lumi)
-    print('')
-
-    store = DbStore()
-    # Update luminosity in the database
-    store.find(Sample, Sample.sample_id == sample.sample_id).set(luminosity = lumi)
-
-    store.commit()
-
-    return lumi
-
-def install_brilcalc(options):
-
-    if options.local:
-        print("Local installation of brilcalc is not supported.")
-        return
-
-    print("Installing brilcalc on lxplus... You'll probably need to enter your lxplus password in a moment")
-
-    cmds = ['pip', 'install', '--install-option="--prefix=$HOME/.local"', '--upgrade', 'brilws']
-    cmd = 'export PATH="$HOME/.local/bin:/afs/cern.ch/cms/lumi/brilconda-1.1.7/bin:$PATH"; %s' % (" ".join(cmds))
-    ssh_cmds = ['ssh', '%s@lxplus.cern.ch' % options.username, cmd]
-    subprocess.call(ssh_cmds)
-
-def update_brilcalc(options):
-
-    if options.local:
-        print("Local installation of brilcalc is not supported.")
-        return
-
-    print("Updating brilcalc on lxplus... You'll probably need to enter your lxplus password in a moment")
-
-    cmds = ['pip', 'install', '--install-option="--prefix=$HOME/.local"', '--upgrade', '--force-reinstall', 'brilws']
-    cmd = 'export PATH="$HOME/.local/bin:/afs/cern.ch/cms/lumi/brilconda-1.1.7/bin:$PATH"; %s' % (" ".join(cmds))
-    ssh_cmds = ['ssh', '%s@lxplus.cern.ch' % options.username, cmd]
-    subprocess.call(ssh_cmds)
-
-def main():
-
-    options = get_options()
-
-    if options.bootstrap:
-        install_brilcalc(options)
-        return
-
-    if options.update:
-        update_brilcalc(options)
-        return
-
-    for id_ in options.ids:
-        sample = get_sample(id_, None)
-        compute_luminosity(sample, options)
-
-    for name in options.names:
-        sample = get_sample(None, name)
-        compute_luminosity(sample, options)
-
-
-#
-# main
-#
-if __name__ == '__main__':
-    main()
diff --git a/scripts/das_import.py b/scripts/das_import.py
deleted file mode 100755
index 9ed4a9d..0000000
--- a/scripts/das_import.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/usr/bin/env python
-
-import argparse
-
-from cp3_llbb.SAMADhi.das_import import import_cms_dataset
-
-def get_options():
-    parser = argparse.ArgumentParser(description='Import CMS datasets into SAMADhi')
-
-    parser.add_argument("-p", "--process", action="store", type=str, dest="process", help="Process name.")
-
-    parser.add_argument("--xsection", action="store", type=float, default=1.0, dest="xsection", help="Cross-section in pb.")
-
-    parser.add_argument("--energy", action="store", type=float, dest="energy", help="CoM energy, in TeV.")
-
-    parser.add_argument("--comment", action="store", type=str, default="", dest="comment", help="User defined comment")
-
-    parser.add_argument("dataset", action="store", type=str, nargs=1, help="CMS dataset")
-
-    args = parser.parse_args()
-
-    return args
-
-if __name__ == '__main__':
-    options = get_options()
-    import_cms_dataset(options.dataset[0], options.process, options.energy, options.xsection, options.comment, True)
diff --git a/scripts/search_SAMADhi.py b/scripts/search_SAMADhi.py
deleted file mode 100755
index b7189b9..0000000
--- a/scripts/search_SAMADhi.py
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env python
-
-# Script to add a sample to the database
-
-import os
-from optparse import OptionParser
-from cp3_llbb.SAMADhi.SAMADhi import Dataset, Sample, Result, DbStore, Analysis
-
-class MyOptionParser: 
-    """
-    Client option parser
-    """
-    def __init__(self):
-        usage  = "Usage: %prog type [options]\n"
-        usage += "Where type is one of dataset, sample, result, analysis"
-        self.parser = OptionParser(usage=usage)
-        self.parser.add_option("-l","--long", action="store_true", 
-                               dest="longOutput", default=False,
-             help="detailed output")
-        self.parser.add_option("-n","--name", action="store", type="string",
-                               dest="name", default=None,
-             help="filter on name")
-        self.parser.add_option("-p","--path", action="store", type="string",
-                               dest="path", default=None,
-             help="filter on path")
-        self.parser.add_option("-i","--id", action="store", type="int",
-                               dest="objid", default=None,
-             help="filter on id")
-
-    def get_opt(self):
-        """
-        Returns parse list of options
-        """
-        opts, args = self.parser.parse_args()
-        if len(args) == 0:
-          self.parser.error("must specify the type of item to search for")
-        if args[0] not in ["dataset","sample","result","analysis"]:
-          self.parser.error("type must be one of dataset, sample, result, analysis")
-        cnt = 0
-        if opts.path is not None: 
-          cnt +=1
-          opts.path = os.path.abspath(os.path.expandvars(os.path.expanduser(opts.path)))
-        if opts.name is not None: cnt +=1
-        if opts.objid is not None: cnt +=1
-        if cnt>1:
-          self.parser.error("only one selection criteria may be applied")
-        if args[0]=="dataset" and opts.path is not None:
-          self.parser.error("cannot search dataset by path")
-        if args[0]=="result" and opts.name is not None:
-          self.parser.error("cannot search a result by name")
-        if args[0]=="analysis" and opts.path is not None:
-          self.parser.error("cannot search analysis by path")
-        opts.objtype = args[0]
-        return opts
-
-def main():
-    """Main function"""
-    # get the options
-    optmgr = MyOptionParser()
-    opts = optmgr.get_opt()
-    # connect to the MySQL database using default credentials
-    dbstore = DbStore()
-    # build the query
-    if opts.objtype == "dataset":
-      objectClass = Dataset
-      objectId = Dataset.dataset_id
-    elif opts.objtype == "sample":
-      objectClass = Sample
-      objectId = Sample.sample_id
-    elif opts.objtype == "analysis":
-      objectClass = Analysis
-      objectId = Analysis.analysis_id
-    else:
-      objectClass = Result
-      objectId = Result.result_id
-
-    if opts.objid is not None:
-      result = dbstore.find(objectClass, objectId==opts.objid)
-    elif opts.path is not None:
-      result = dbstore.find(objectClass, objectClass.path.like(unicode(opts.path.replace('*', '%').replace('?', '_'))))
-    elif opts.name is not None:
-      result = dbstore.find(objectClass, objectClass.name.like(unicode(opts.name.replace('*', '%').replace('?', '_'))))
-    else: 
-      result = dbstore.find(objectClass)
-
-    result = result.order_by(objectId)
-    # loop and print
-    if opts.longOutput:
-      for entry in result:
-        print entry
-        print "--------------------------------------------------------------------------------------"
-    else:
-      if opts.objtype != "result" and opts.objtype != "analysis":
-        data = result.values(objectId, objectClass.name)
-      else:
-        data = result.values(objectId, objectClass.description)
-      for dset in data:
-        print "%i\t%s"%(dset[0], dset[1])
-
-#
-# main
-#
-if __name__ == '__main__':
-    main()
diff --git a/scripts/update_datasets_cross_section.py b/scripts/update_datasets_cross_section.py
deleted file mode 100755
index 66c314c..0000000
--- a/scripts/update_datasets_cross_section.py
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/usr/bin/env python
-""" Simple script to compute the luminosity of a set of samples """
-
-import subprocess
-import argparse
-from cp3_llbb.SAMADhi.SAMADhi import Dataset, Sample, DbStore
-from storm.locals import Desc
-
-def get_options():
-    parser = argparse.ArgumentParser(description='Update cross-sections of datasets.')
-
-    parser.add_argument('regex', type=str, help='Regular expression used to filter *samples*. Only \'*\' and \'?\' wildcards are supported. Take note that filtering is applied to samples, and not to datasets.', metavar='REGEX')
-
-    parser.add_argument('-f', '--force', type=float, dest='force', help='For the cross-section of all datasets matching the regular expression to be this value', metavar='XSEC')
-
-    parser.add_argument('-w', '--write', dest='write', action='store_true', help='Write changes to the database')
-
-    options = parser.parse_args()
-
-    return options
-
-
-dbstore = DbStore()
-
-def get_samples(name):
-    return dbstore.find(Sample, Sample.name.like(unicode(name.replace('*', '%').replace('?', '_'))))
-
-def main():
-    options = get_options()
-    samples = get_samples(options.regex)
-
-    if samples.count() == 0:
-        print("No sample found.")
-        return
-
-    for sample in samples:
-        if sample.source_dataset.datatype == "data":
-            continue
-
-        # Consider a cross-section of one as a non-updated value
-        if sample.source_dataset.xsection == 1:
-            # Try to find a similar sample in the database, with the same center of mass energy
-            print("Updating cross-section of {}".format(sample.source_dataset.process))
-
-            if options.force:
-                print("  Forcing the cross-section to {}".format(options.force))
-                if options.write:
-                    sample.source_dataset.xsection = options.force
-            else:
-                possible_matches = dbstore.find(Dataset, Dataset.process.like(sample.source_dataset.process),
-                        Dataset.energy == sample.source_dataset.energy,
-                        Dataset.dataset_id != sample.source_dataset.dataset_id)
-
-                xsec = None
-                if possible_matches.count() == 0:
-                    print("  No match for this dataset found.")
-                else:
-                    for p in possible_matches.order_by(Desc(Dataset.dataset_id)):
-                        if not xsec:
-                            xsec = p.xsection
-                        else:
-                            if xsec != p.xsection:
-                                print("  Warning: more than one possible match found for this dataset, and they do not have the same cross-section. I do not know what to do...")
-                                xsec = None
-                                break
-                    if xsec:
-                        print("  Updating with cross-section = {}".format(xsec))
-                        if options.write:
-                            sample.source_dataset.xsection = xsec
-
-
-    if options.write:
-        dbstore.commit()
-    else:
-        print("Currently running in dry-run mode. If you are happy with the change, pass the '-w' flag to this script to store the changes into the database.")
-        dbstore.rollback()
-#
-# main
-#
-if __name__ == '__main__':
-    main()
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..83e967f
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,65 @@
+[metadata]
+name = SAMADhi
+description = SAmple MAnagement Database
+long_description = file: README.md
+long_description_content_type = text/markdown
+url = 'https://github.com/cp3-llbb/SAMADhi'
+author = Christophe Delaere
+author_email = christophe.delaere@uclouvain.be
+license = unknown
+classifiers =
+    Development Status :: 3 - Beta
+    Intended Audience :: Developers
+    Intended Audience :: Science/Research
+    License :: Other/Proprietary License
+    Operating System :: OS Independent
+    Programming Language :: Python :: 3
+    Programming Language :: Python :: 3.6
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.10
+    Programming Language :: Python :: Implementation :: CPython
+    Topic :: Software Development :: Libraries :: Python Modules
+keywords = database
+
+[options]
+packages =
+    cp3_llbb.SAMADhi
+install_requires =
+    peewee
+    pymysql
+python_requires = !=2.*, >=3.6
+package_dir =
+    cp3_llbb.SAMADhi = python
+setup_requires =
+    pytest-runner
+
+[options.entry_points]
+console_scripts =
+    search_SAMADhi = cp3_llbb.SAMADhi.scripts:search
+    iSAMADhi = cp3_llbb.SAMADhi.scripts:interactive
+    update_datasets_cross_section = cp3_llbb.SAMADhi.scripts:update_datasets_cross_section
+    add_sample = cp3_llbb.SAMADhi.scripts:add_sample
+    add_result = cp3_llbb.SAMADhi.scripts:add_result
+    checkAndClean = cp3_llbb.SAMADhi.scripts:checkAndClean
+    das_import = cp3_llbb.SAMADhi.das_import:main
+    das_import_nanoAOD = cp3_llbb.SAMADhi.das_import:import_nanoAOD_sample
+    compute_sample_luminosity = cp3_llbb.SAMADhi.luminosity:compute_sample_luminosity
+    SAMADhi_dbAnalysis = cp3_llbb.SAMADhi.dbAnalysis:main
+
+[options.extras_require]
+tests =
+    pytest
+    pytest-console-scripts
+
+[bdist_wheel]
+universal = 0
+
+[aliases]
+test = pytest
+
+[flake8]
+max-line-length = 100
+select = E,F,W,B,B950
+ignore = E501,W503
diff --git a/setup_standalone.sh b/setup_standalone.sh
deleted file mode 100644
index 12cde67..0000000
--- a/setup_standalone.sh
+++ /dev/null
@@ -1,119 +0,0 @@
-# no shebang, must be sourced
-
-# Creates a (symlinked) python install directory for SAMADhi and sets up environment variables,
-# such that the `from cp3_llbb.SAMADhi.SAMADhi ...` imports can also be used standalone on ingrid.
-# The python interpreter to use and the install path can be set through options
-
-## deduce source location from the script name
-if [[ -z "${ZSH_NAME}" ]]; then
-  thisscript="$(readlink -f ${BASH_SOURCE})"
-else
-  thisscript="$(readlink -f ${0})"
-fi
-samadhipath="$(dirname ${thisscript})"
-
-## option defaults
-installpath="${samadhipath}/install"
-python="$(which python)"
-custom_python=""
-## parse options
-tmp_opts="$(getopt --longoptions=install:,python:,help --options=h -- $@)"
-eval set -- "${tmp_opts}"
-while true; do
-  case "${1}" in
-    --install)
-      installpath="${2}"
-      shift 2 ;;
-    --python)
-      python="${2}"
-      custom_python="yes"
-      shift 2 ;;
-    -h|--help)
-      echo "Usage: source install_standalone.sh [ --python=path_to_python_interpreter --install=./install ]"
-      shift
-      return 0 ;;
-    --)
-      shift; break ;;
-  esac
-done
-
-echo "--> Install path: ${installpath}"
-## prepend if necessary
-function checkAndPrepend()
-{
-  local in_path=""
-  if [[ -z "${ZSH_NAME}" ]]; then
-    ## bash version
-    IFS=: local exp_path=${!1}
-    for apath in ${exp_path}; do
-      if [[ "${apath}" == "${2}" ]]; then
-        in_path="yes"
-      fi
-    done
-  else
-    ## zsh version
-    local exp_path="${(P)1}"
-    for apath in ${(s.:.)exp_path}; do
-      if [[ "${apath}" == "${2}" ]]; then
-        in_path="yes"
-      fi
-    done
-  fi
-  if [[ -z "${in_path}" ]]; then
-    export ${1}="${2}:${exp_path}"
-    echo "--> Added ${2} to ${1}"
-  fi
-}
-## pick up python interpreter
-if [[ -n "${custom_python}" ]]; then
-  echo "--> Using python from ${python}"
-  pyinterpbindir="$(dirname ${python})"
-  pyinterprootdir="$(dirname ${pyinterpbindir})"
-  pyinterplibdir="${pyinterprootdir}/lib"
-  pyinterpsitedir="${pyinterplibdir}/python2.7/site-packages"
-  checkAndPrepend "LD_LIBRARY_PATH" "${pyinterplibdir}"
-  checkAndPrepend "PYTHONPATH" "${pyinterpsitedir}"
-fi
-pymajmin=$(${python} -c 'import sys; print(".".join(str(num) for num in sys.version_info[:2]))')
-if [[ "${pymajmin}" != "2.7" ]]; then
-  echo "--> Only python 2.7 is supported, please pass a suitable interpreter using the --python option (found version ${pymajmin} for ${python})"
-  return 1
-fi
-## install upgraded pip
-if [[ ! -d "${installpath}" ]]; then
-  mkdir -p "${installpath}"
-  echo "--> upgrading pip from $(${python} -m pip --version)"
-  ${python} -m pip install --prefix="${installpath}" -I pip
-fi
-checkAndPrepend "LD_LIBRARY_PATH" "${installpath}/lib"
-checkAndPrepend "LD_LIBRARY_PATH" "${installpath}/lib64"
-pysitedir="${installpath}/lib/python${pymajmin}/site-packages"
-checkAndPrepend "PYTHONPATH" "${pysitedir}"
-checkAndPrepend "PYTHONPATH" "${installpath}/lib64/python${pymajmin}/site-packages"
-( ${python} -c "import MySQLdb" > /dev/null 2> /dev/null ) || ${python} -m pip install --prefix="${installpath}" MySQL-python
-( ${python} -c "import storm"   > /dev/null 2> /dev/null ) || ${python} -m pip install --prefix="${installpath}" storm
-( ${python} -c "import ROOT"   > /dev/null 2> /dev/null ) || ${python} -m pip install --prefix="${installpath}" storm
-
-## Install SAMADhi
-if [[ ! -d "${pysitedir}/cp3_llbb" ]]; then
-  mkdir -p "${pysitedir}/cp3_llbb/"
-fi
-## __init__.py for cp3_llbb
-hatinitpy="${pysitedir}/cp3_llbb/__init__.py"
-if [[ ! -f "${hatinitpy}" ]]; then
-  echo "" > "${hatinitpy}"
-fi
-## symlink
-installpy="${pysitedir}/cp3_llbb/SAMADhi"
-if [[ ! -a "${installpy}" ]]; then
-  ln -s "${samadhipath}/python" "${installpy}"
-  echo "--> Created symlink to SAMADhi"
-elif [[ ! ( -L "${installpy}" ) ]]; then
-  echo "--> ${installpy} exists, but is not a symlink"
-  return 1
-fi
-## __init__.py for cp3_llbb/SAMADhi
-pkginitpy="${installpy}/__init__.py"
-if [[ ! -f "${pkginitpy}" ]]; then
-  echo "" > "${pkginitpy}"
-fi
diff --git a/tests/data/params.json b/tests/data/params.json
new file mode 100644
index 0000000..e03810a
--- /dev/null
+++ b/tests/data/params.json
@@ -0,0 +1,4 @@
+{
+  "test" : 1,
+  "database" : "test.db"
+}
diff --git a/tests/data/test.db b/tests/data/test.db
new file mode 100644
index 0000000..9e8571f
Binary files /dev/null and b/tests/data/test.db differ
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 0000000..7ba0269
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,205 @@
+import distutils.spawn
+import logging
+import os
+import os.path
+import stat
+import subprocess
+
+import pytest
+
+from pytest_console_scripts import script_runner
+
+logger = logging.getLogger(__name__)
+
+
+testDBCred = os.path.join(os.path.dirname(__file__), "data", "params.json")
+if stat.S_IMODE(os.stat(testDBCred).st_mode) != stat.S_IRUSR:
+    os.chmod(testDBCred, stat.S_IRUSR)  ## set 400
+testDBArg = f"--database={testDBCred}"
+
+_hasROOT = False
+try:
+    import cppyy
+
+    _hasROOT = True
+except ImportError:
+    pass
+needROOT = pytest.mark.skipif(not _hasROOT, reason="Needs ROOT")
+needGridProxy = pytest.mark.skipif(
+    (distutils.spawn.find_executable("voms-proxy-info") is None)
+    or (subprocess.call(["voms-proxy-info", "--exists", "--valid", "0:5"]) != 0),
+    reason="Needs a valid grid proxy",
+)
+
+
+@pytest.fixture
+def tmptestdbcopy(tmpdir):
+    import shutil
+
+    shutil.copy2(
+        os.path.join(os.path.dirname(__file__), "data", "params.json"),
+        str(tmpdir.join("params.json")),
+    )
+    shutil.copy2(
+        os.path.join(os.path.dirname(__file__), "data", "test.db"), str(tmpdir.join("test.db"))
+    )
+    yield "--database={}".format(str(tmpdir.join("params.json")))
+
+
+def checkSuccessOutLines(ret, nOut=None, nErr=None):
+    logger.info(ret.stdout)
+    if not ret.success:
+        logger.info(ret.stderr)
+    assert ret.success
+    if nOut is not None:
+        assert (nOut == 0 and len(ret.stdout.strip()) == 0) or len(
+            ret.stdout.strip().split("\n")
+        ) == nOut
+    if nErr is not None:
+        assert (nErr == 0 and len(ret.stderr.strip()) == 0) or len(
+            ret.stderr.strip().split("\n")
+        ) == nErr
+
+
+def test_search_sample(script_runner):
+    checkSuccessOutLines(
+        script_runner.run("search_SAMADhi", "sample", "--name=test*", testDBArg), nOut=8, nErr=0
+    )
+
+
+def test_add_sample(script_runner, tmptestdbcopy):
+    checkSuccessOutLines(
+        script_runner.run(
+            "add_sample",
+            "--continue",
+            tmptestdbcopy,
+            "NTUPLES",
+            "--name=test_cli_addSample_1",
+            "--processed=-1",
+            "--nevents=10",
+            "--norm=2.",
+            "--weight-sum=12.",
+            "--lumi=0.3",
+            "--code_version=0.1.0",
+            "--comment='testing add_sample'",
+            "--source_dataset=7",
+            "--source_sample=8",
+            "--author=pytest",
+            "/tmp",
+        )
+    )
+    checkSuccessOutLines(
+        script_runner.run(
+            "search_SAMADhi", tmptestdbcopy, "--long", "sample", "--name=test_cli_addSample_1"
+        )
+    )
+    checkSuccessOutLines(
+        script_runner.run("search_SAMADhi", tmptestdbcopy, "sample", "--name=test_cli_addSample_1"),
+        nOut=1,
+    )
+
+
+def test_add_sample_noconfirm(script_runner, tmptestdbcopy):
+    import io
+
+    checkSuccessOutLines(
+        script_runner.run(
+            "add_sample",
+            tmptestdbcopy,
+            "NTUPLES",
+            "--name=test_cli_addSample_2",
+            "--processed=-1",
+            "--nevents=10",
+            "--norm=2.",
+            "--weight-sum=12.",
+            "--lumi=0.3",
+            "--code_version=0.1.0",
+            "--comment='testing add_sample'",
+            "--author=pytest",
+            "/tmp",
+            stdin=io.StringIO("\n\nn\n"),
+        )
+    )  ## no source sample or dataset, no insert
+    checkSuccessOutLines(
+        script_runner.run("search_SAMADhi", tmptestdbcopy, "sample", "--name=test_cli_addSample_2"),
+        nOut=0,
+    )
+
+
+@needROOT
+def test_add_sample_files(script_runner, tmptestdbcopy):
+    checkSuccessOutLines(
+        script_runner.run(
+            "add_sample",
+            "--continue",
+            tmptestdbcopy,
+            "NTUPLES",
+            "--name=test_cli_addSample_3",
+            "--processed=-1",
+            "--nevents=10",
+            "--norm=2.",
+            "--weight-sum=12.",
+            "--lumi=0.3",
+            "--code_version=0.1.0",
+            "--comment='testing add_sample'",
+            "--author=pytest",
+            "--files=/foo/bar/test_cli_addSample/1.root,/foo/bar/test_cli_addSample/2.root",
+            "/tmp",
+        )
+    )
+    checkSuccessOutLines(
+        script_runner.run("search_SAMADhi", tmptestdbcopy, "sample", "--name=test_cli_addSample_3"),
+        nOut=1,
+    )
+
+
+def test_add_result(script_runner, tmptestdbcopy):
+    checkSuccessOutLines(
+        script_runner.run(
+            "add_result",
+            "--continue",
+            tmptestdbcopy,
+            "--analysis=1",
+            "--sample=4,5,6,7,8",
+            "--description='testing add_result'",
+            "--author=pytest",
+            "--elog=TODO",
+            "/tmp",
+            stdin=b"n",
+        )
+    )
+    checkSuccessOutLines(
+        script_runner.run("search_SAMADhi", tmptestdbcopy, "result", "--path=/tmp"), nOut=1
+    )
+
+
+@needGridProxy
+def test_import_dataset(script_runner, tmptestdbcopy):
+    dasName = "/DYJetsToLL_M-50_TuneCP5_13TeV-amcatnloFXFX-pythia8/RunIIAutumn18NanoAODv4-Nano14Dec2018_102X_upgrade2018_realistic_v16-v1/NANOAODSIM"
+    checkSuccessOutLines(
+        script_runner.run(
+            "das_import",
+            "--continue",
+            tmptestdbcopy,
+            "--energy=13",
+            "--xsection=6225.42",
+            "--process=DYJetsToLL_M-50_TuneCP5_13TeV-amcatnloFXF-pythia8",
+            dasName,
+        )
+    )
+    checkSuccessOutLines(
+        script_runner.run("search_SAMADhi", tmptestdbcopy, "dataset", f"--name={dasName}"), nOut=1
+    )
+
+
+@needGridProxy
+def test_import_dataset_update_xsec(script_runner, tmptestdbcopy):
+    checkSuccessOutLines(
+        script_runner.run(
+            "update_datasets_cross_section",
+            tmptestdbcopy,
+            "--write",
+            "--force=1",
+            "test_modelRels_mc1",
+        )
+    )
diff --git a/tests/test_cli_existing.py b/tests/test_cli_existing.py
new file mode 100644
index 0000000..c3d0859
--- /dev/null
+++ b/tests/test_cli_existing.py
@@ -0,0 +1,42 @@
+import logging
+import os
+import os.path
+
+import pytest
+
+from pytest_console_scripts import script_runner
+
+logger = logging.getLogger(__name__)
+
+needCredentials = pytest.mark.skipif(
+    not os.path.isfile(
+        os.path.expandvars(os.path.expanduser(os.getenv("SAMADHI_CREDENTIALS", "~/.samadhi")))
+    ),
+    reason="Needs valid SAMADhi credentials",
+)
+dbArg = (
+    "--database={}".format(os.getenv("SAMADHI_CREDENTIALS"))
+    if os.getenv("SAMADHI_CREDENTIALS") is not None
+    else None
+)
+
+
+def checkSuccessOutLines(ret, nOut=None, nErr=None):
+    logger.info(ret.stdout)
+    assert ret.success
+    if nOut is not None:
+        assert (nOut == 0 and len(ret.stdout.strip()) == 0) or len(
+            ret.stdout.strip().split("\n")
+        ) == nOut
+    if nErr is not None:
+        assert (nErr == 0 and len(ret.stderr.strip()) == 0) or len(
+            ret.stderr.strip().split("\n")
+        ) == nErr
+
+
+@needCredentials
+def test_search_sample(script_runner):
+    args = ["search_SAMADhi", "dataset", "--name=/DoubleMuon/Run2016*-03Feb2017-v*/MINIAOD"]
+    if dbArg:
+        args.append(dbArg)
+    checkSuccessOutLines(script_runner.run(*args), nOut=5, nErr=0)
diff --git a/tests/test_models.py b/tests/test_models.py
new file mode 100644
index 0000000..5f0f83d
--- /dev/null
+++ b/tests/test_models.py
@@ -0,0 +1,176 @@
+import logging
+
+import pytest
+
+logger = logging.getLogger(__name__)
+
+
+@pytest.fixture(scope="module")
+def sqlitetestdb():
+    from cp3_llbb.SAMADhi.SAMADhi import _models as MODELS
+    from peewee import SqliteDatabase
+
+    test_db = SqliteDatabase(":memory:")
+    test_db.bind(MODELS, bind_refs=False, bind_backrefs=False)
+    test_db.connect()
+    test_db.create_tables(MODELS)
+    yield
+    test_db.drop_tables(MODELS)
+    test_db.close()
+
+
+def test_createAnalysis(sqlitetestdb):
+    from cp3_llbb.SAMADhi.SAMADhi import Analysis
+
+    ana = Analysis.create(
+        cadiline="NP-20-001", contact="me <me@anywhere>", description="Evidence for new physics"
+    )
+    logger.info(str(ana))
+
+
+def test_createDataset_minimal(sqlitetestdb):
+    from cp3_llbb.SAMADhi.SAMADhi import Dataset
+
+    dset = Dataset.create(name="test_createDataset_minimal dataset", datatype="mc")
+    logger.info(str(dset))
+
+
+def test_createDataset_fullNoRel(sqlitetestdb):
+    from cp3_llbb.SAMADhi.SAMADhi import Dataset
+    from datetime import datetime, timedelta
+
+    dset = Dataset.create(
+        name="/NewPhysics/test_createDataset_fullNoRel/NANOAODSIM",
+        datatype="mc",
+        cmssw_release="CMSSW_10_6_0",
+        dsize=1024,
+        energy=14.0,
+        globaltag="mc_run2_106X_v0",
+        nevents=1000,
+        process="New Physics",
+        xsection=0.001,
+        user_comment="Your favourite sample",
+        creation_time=datetime.now() - timedelta(days=7),
+    )
+    logger.info(str(dset))
+
+
+def test_createSample_minimal(sqlitetestdb):
+    from cp3_llbb.SAMADhi.SAMADhi import Sample
+
+    smp = Sample.create(
+        name="test_createSample_minimal sample",
+        path="/test/sample/minimal",
+        sampletype="NTUPLES",
+        nevents_processed=1000,
+    )
+    logger.info(str(smp))
+
+
+def test_createSample_fullNoRel(sqlitetestdb):
+    from cp3_llbb.SAMADhi.SAMADhi import Sample
+
+    smp = Sample.create(
+        name="test_createSample_fullNoRel sample",
+        path="/test/sample/fullNoRel",
+        sampletype="NTUPLES",
+        nevents_processed=1000,
+        author="me <me@anywhere>",
+        code_version="Framework_x.y.z_MyAnalysis_u.v.w",
+        event_weight_sum=1000.0,
+        extras_event_weight_sum="variations go here - not available",
+        luminosity=2.0,
+        nevents=215,
+        processed_lumi="almost all",
+        user_comment="hello world",
+    )
+    logger.info(str(smp))
+
+
+def test_createFile(sqlitetestdb):
+    from cp3_llbb.SAMADhi.SAMADhi import Sample, File
+
+    smp = Sample.create(
+        name="test_createFile_minimal sample",
+        path="/test/sample/minimal_for_fie",
+        sampletype="NTUPLES",
+        nevents_processed=1000,
+    )
+    from cp3_llbb.SAMADhi.SAMADhi import File
+
+    f = File.create(
+        lfn="/foo/bar/test_createFile_minimal.root",
+        pfn="/my/storage/foo/bar/test_createFile_minimal.root",
+        nevents=1,
+        event_weight_sum=1.0,
+        sample=smp,
+    )
+    logger.info(str(f))
+
+
+def test_createResult_minimal(sqlitetestdb):
+    from cp3_llbb.SAMADhi.SAMADhi import Result
+
+    result = Result.create(path="/my/home/test_minimal_result.pdf")
+    logger.info(str(result))
+
+
+def test_createResult_fullNoRel(sqlitetestdb):
+    from cp3_llbb.SAMADhi.SAMADhi import Result
+
+    result = Result.create(
+        path="/my/home/test_fullNoRel_result.pdf",
+        author="me <me@anywhere>",
+        description="An interesting result",
+        elog="TODO",
+    )
+    logger.info(str(result))
+
+
+def test_modelRels(sqlitetestdb):
+    from cp3_llbb.SAMADhi.SAMADhi import Analysis, Dataset, Sample, File, Result
+
+    ana = Analysis.create(
+        cadiline="NP-20-002", contact="me <me@anywhere>", description="Measurement of XY->ZUVW"
+    )
+    datasets = [
+        Dataset.create(name=f"test_modelRels_data{i:d}", datatype="data") for i in range(3)
+    ] + [Dataset.create(name=f"test_modelRels_mc{i:d}", datatype="mc") for i in range(2)]
+    samples = [
+        Sample.create(
+            name="test_modelRels_{}".format(ds.name.split("_")[-1]),
+            path=f"/test/sample/{ds.name}",
+            sampletype="NTUPLES",
+            nevents_processed=1000,
+            source_dataset=ds,
+        )
+        for ds in datasets
+    ]
+    for smp in samples:
+        for i in range(4):
+            File.create(
+                sample=smp,
+                lfn=f"{smp.name}/{i:d}.root",
+                pfn=f"/store/me{smp.name}/{i:d}.root",
+                nevents=250,
+                event_weight_sum=250,
+            )
+    res1 = Result.create(
+        analysis=ana,
+        author="me <me@anywhere>",
+        description="Preliminary result",
+        elog="TODO",
+        path="/home/my/ana",
+    )
+    res2 = Result.create(
+        analysis=ana,
+        author="me <me@anywhere>",
+        description="Final result",
+        elog="TODO",
+        path="/home/my/paper",
+    )
+    logger.info(str(ana))
+    logger.info(str(datasets[-1]))
+    logger.info(str(samples[-1]))
+    logger.info(str(res1))
+    logger.info(str(res2))
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..6107e5f
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,8 @@
+[tox]
+envlist = py36,py37,py38,py39
+isolated_build = True
+[testenv]
+deps =
+    pytest
+    pytest-console-scripts
+commands = pytest