diff --git a/Makefile b/Makefile index e9062ad..ccb35d5 100644 --- a/Makefile +++ b/Makefile @@ -1,20 +1,21 @@ -INSTALL_PATH = /usr/local +INSTALL_PATH=/usr/local +PYTHON=python3.5 default: - python setup.py install --prefix $(INSTALL_PATH) + $(PYTHON) setup.py install --prefix $(INSTALL_PATH) build: - python setup.py build + $(PYTHON) setup.py build install: - python setup.py install --prefix $(INSTALL_PATH) + $(PYTHON) setup.py install --prefix $(INSTALL_PATH) user: - python setup.py install --prefix $(HOME)/.local + $(PYTHON) setup.py install --prefix $(HOME)/.local env: - (pip install -r requirements.txt; python setup.py install --prefix ./venv) + (pip install -r requirements.txt; $(PYTHON) setup.py install --prefix ./venv) test: - (cd tests; python test_basic.py) + (cd tests; $(PYTHON) test_basic.py) basic_test: - (cd tests; python test_basic.py) + (cd tests; $(PYTHON) test_basic.py) advanced_test: - (cd tests; python test_advanced.py > out; diff out OUT_1.0) + (cd tests; $(PYTHON) test_advanced.py > out; diff out OUT_1.0) clean: rm -r build diff --git a/README.md b/README.md index f8c448a..60fcb46 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,15 @@ # ndd **ndd** is a Python/Fortran module for estimation of entropy and entropy-related quantities from discrete data. -**ndd** implements the Nemenmann-Shafee-Bialek (NSB) algorithm as default entropy estimator. +The main goal of **ndd** is to provide a simple and minimal Python interface to the Nemenman-Shafee-Bialek (NSB) algorithm, +a state of the art Bayesian algorithm for entropy estimation. In **ndd**, this interface is the ndd.entropy() function, +that takes as input an array of counts over a set of possible outcomes, and returns an entropy estimate: +entropy_estimate = ndd.entropy(counts). # Obtaining the source All **ndd** source code is hosted on Github. -You can download the latest version of the code using [this link](https://github.com/simomarsili/ndd/archive/v0.1.6.zip). +You can download the latest version of the code using [this link](https://github.com/simomarsili/ndd/archive/v0.1.7.zip). # Prerequisites @@ -51,28 +54,16 @@ From the root directory of the project, type: The ndd.entropy function takes as input a histogram (a list or a numpy array of integers representing counts) and returns a entropy estimate (in nats): - >>> import ndd; estimated_entropy = ndd.entropy(h) - - Compared to the standard, "plugin" estimator, the NSB estimator performs well in the undersampled regime (i.e. k >> n where k is the number of possible outcomes and n the number of samples). - - % python - >>> import ndd # import the ndd module - >>> import numpy as np; import scipy.stats - >>> np.random.seed(0) - >>> def f(k,n,a): p = np.random.dirichlet([a]*k); h = np.random.multinomial(n,p); return p,h - ... - >>> k = int(1.e6); n = 1000 # undersampled regime: k = 1000 * n - >>> a=1.0; p,h = f(k,n,a) # the parameter a controls the granularity of the distribution - >>> scipy.stats.entropy(p) # the true value for the entropy of p - 13.393000456964428 - >>> ndd.entropy(h) # the NSB estimate from h - 13.120151656261665 - >>> a=0.001; p,h = f(k,n,a) # same calculation for a "more granular" distribution - >>> scipy.stats.entropy(p) # the true entropy of the distribution p - 7.4972455126461348 - >>> ndd.entropy(h) # call the ndd.entropy function - 7.524121340953834 - + >>> counts + [ 7 3 5 8 9 1 3 3 1 0 2 5 2 11 4 23 5 0 8 0] + >>> import ndd + >>> estimated_entropy = ndd.entropy(counts) + >>> estimated_entropy + 2.623634344902917 + + Compared to the standard, "plugin" estimator, the NSB estimator performs well in the undersampled regime + (i.e. k >> n where k is the number of possible outcomes and n the number of samples). + Check the [tutorial](https://github.com/simomarsili/ndd/blob/master/notebooks/ndd_tutorial.ipynb) for more info. # Contributing diff --git a/VERSION b/VERSION index 88c772a..b0286e5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -VERSION="v0.1.6" \ No newline at end of file +VERSION="v0.1.7" \ No newline at end of file diff --git a/ndd.py b/ndd.py index 826fcfa..4eb9821 100644 --- a/ndd.py +++ b/ndd.py @@ -20,14 +20,15 @@ Advances in neural information processing systems, 1:399--406 (2002). """ -from __future__ import division, print_function, absolute_import + +from __future__ import absolute_import,division,print_function,unicode_literals +from builtins import * __copyright__ = "Copyright (C) 2016 Simone Marsili" __license__ = "BSD 3 clause" -__version__ = "v0.1.6" +__version__ = "v0.1.7" __author__ = "Simone Marsili (simomarsili@gmail.com)" __all__ = ['entropy','histogram'] - import numpy as np import warnings import sys diff --git a/notebooks/ndd_tutorial.ipynb b/notebooks/ndd_tutorial.ipynb index 4487275..8fc7d32 100644 --- a/notebooks/ndd_tutorial.ipynb +++ b/notebooks/ndd_tutorial.ipynb @@ -8,9 +8,9 @@ "\n", "Welcome to the ndd (eNtropy from Discrete Data) module Tutorial\n", "\n", - "## What is this about\n", + "## What is **ndd** about\n", "\n", - "The main goal of **ndd** is to provide a simple and minimal Python interface to the Nemenman-Shafee-Bialek (NSB) algorithm, a state of the art Bayesian algorithm for entropy estimation. In **ndd**, this interface is the ndd.entropy() function, that returns an estimate from an array of counts over a set of outcomes: entropy_estimate = ndd.entropy(counts). In turn, ndd.entropy() relies on an efficient and numerically robust Fortran implementation of the NSB algorithm. " + "The main goal of **ndd** is to provide a simple and minimal Python interface to the Nemenman-Shafee-Bialek (NSB) algorithm, a state of the art Bayesian algorithm for entropy estimation. In **ndd**, this interface is the ndd.entropy() function, that returns an estimate from an array of counts over a set of outcomes: entropy_estimate = ndd.entropy(counts). ndd.entropy() relies on an efficient and numerically robust Fortran implementation of the NSB algorithm. " ] }, { diff --git a/requirements.txt b/requirements.txt index d0f005b..a16237e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ +future>=0.15.2 numpy>=1.11.1 diff --git a/setup.py b/setup.py index ffd0667..b6dc6df 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ -from __future__ import division, absolute_import, print_function +from __future__ import absolute_import,division,print_function +from builtins import * from numpy.distutils.core import Extension nddf = Extension(name = 'nddf', diff --git a/tests/test_advanced.py b/tests/test_advanced.py index c0aae8c..c403e6c 100644 --- a/tests/test_advanced.py +++ b/tests/test_advanced.py @@ -1,4 +1,5 @@ -from __future__ import print_function +from __future__ import absolute_import,division,print_function,unicode_literals +from builtins import * import numpy as np import sys import ndd diff --git a/tests/test_basic.py b/tests/test_basic.py index 7447c04..0438871 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- -from __future__ import print_function +from __future__ import absolute_import,division,print_function,unicode_literals +from builtins import * import unittest import ndd import numpy as np @@ -41,6 +42,12 @@ def test_001_100_100(self): result = np.float64(0.45816599887523507) self.assertEqual(ndd.entropy(data), result) + def test_histogram(self): + np.random.seed(0) + data = np.random.randint(1,11,1000) + result = [99, 96, 97, 122, 99, 105, 94, 97, 95, 96] + self.assertEqual(ndd.histogram(data), result) + if __name__ == '__main__': unittest.main()