Merge branch 'release-v0.1.7'

simomarsili · Sep 11, 2016 · dbb5e77 · dbb5e77
2 parents 844923b + 0e33791
commit dbb5e77
Show file tree

Hide file tree

Showing 9 changed files with 45 additions and 42 deletions.
diff --git a/Makefile b/Makefile
@@ -1,20 +1,21 @@
-INSTALL_PATH = /usr/local
+INSTALL_PATH=/usr/local
+PYTHON=python3.5
 
 default:
-	python setup.py install --prefix $(INSTALL_PATH)
+	$(PYTHON) setup.py install --prefix $(INSTALL_PATH)
 build:
-	python setup.py build
+	$(PYTHON) setup.py build
 install:
-	python setup.py install --prefix $(INSTALL_PATH)
+	$(PYTHON) setup.py install --prefix $(INSTALL_PATH)
 user: 
-	python setup.py install --prefix $(HOME)/.local
+	$(PYTHON) setup.py install --prefix $(HOME)/.local
 env:
-	(pip install -r requirements.txt; python setup.py install --prefix ./venv)
+	(pip install -r requirements.txt; $(PYTHON) setup.py install --prefix ./venv)
 test:
-	(cd tests; python test_basic.py)
+	(cd tests; $(PYTHON) test_basic.py)
 basic_test:
-	(cd tests; python test_basic.py)
+	(cd tests; $(PYTHON) test_basic.py)
 advanced_test:
-	(cd tests; python test_advanced.py > out; diff out OUT_1.0)
+	(cd tests; $(PYTHON) test_advanced.py > out; diff out OUT_1.0)
 clean:
 	rm -r build
diff --git a/README.md b/README.md
@@ -1,12 +1,15 @@
 # ndd
 
 **ndd** is a Python/Fortran module for estimation of entropy and entropy-related quantities from discrete data.  
-**ndd** implements the Nemenmann-Shafee-Bialek (NSB) algorithm as default entropy estimator. 
+The main goal of **ndd** is to provide a simple and minimal Python interface to the Nemenman-Shafee-Bialek (NSB) algorithm,
+a state of the art Bayesian algorithm for entropy estimation. In **ndd**, this interface is the ndd.entropy() function,
+that takes as input an array of counts over a set of possible outcomes, and returns an entropy estimate:  
+entropy_estimate = ndd.entropy(counts). 
 
 # Obtaining the source
 
 All **ndd** source code is hosted on Github. 
-You can download the latest version of the code using [this link](https://github.com/simomarsili/ndd/archive/v0.1.6.zip). 
+You can download the latest version of the code using [this link](https://github.com/simomarsili/ndd/archive/v0.1.7.zip). 
 
 # Prerequisites
 
@@ -51,28 +54,16 @@ From the root directory of the project, type:
 
   The ndd.entropy function takes as input a histogram (a list or a numpy array of integers representing counts) and returns a entropy estimate (in nats): 
 
-    >>> import ndd; estimated_entropy = ndd.entropy(h)
-
-  Compared to the standard, "plugin" estimator, the NSB estimator performs well in the undersampled regime (i.e. k >> n where k is the number of possible outcomes and n the number of samples). 
-
-    % python
-    >>> import ndd              # import the ndd module
-    >>> import numpy as np; import scipy.stats
-    >>> np.random.seed(0) 
-    >>> def f(k,n,a): p = np.random.dirichlet([a]*k); h = np.random.multinomial(n,p); return p,h
-    ...
-    >>> k = int(1.e6); n = 1000 # undersampled regime: k = 1000 * n
-    >>> a=1.0; p,h = f(k,n,a)   # the parameter a controls the granularity of the distribution 
-    >>> scipy.stats.entropy(p)  # the true value for the entropy of p 
-    13.393000456964428
-    >>> ndd.entropy(h)          # the NSB estimate from h
-    13.120151656261665
-    >>> a=0.001; p,h = f(k,n,a) # same calculation for a "more granular" distribution 
-    >>> scipy.stats.entropy(p)  # the true entropy of the distribution p 
-    7.4972455126461348
-    >>> ndd.entropy(h)          # call the ndd.entropy function 
-    7.524121340953834
-
+    >>> counts
+    [ 7  3  5  8  9  1  3  3  1  0  2  5  2 11  4 23  5  0  8  0]
+    >>> import ndd
+    >>> estimated_entropy = ndd.entropy(counts)
+    >>> estimated_entropy
+    2.623634344902917
+
+  Compared to the standard, "plugin" estimator, the NSB estimator performs well in the undersampled regime
+  (i.e. k >> n where k is the number of possible outcomes and n the number of samples).
+  Check the [tutorial](https://github.com/simomarsili/ndd/blob/master/notebooks/ndd_tutorial.ipynb) for more info.
 
 # Contributing
 

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-VERSION="v0.1.6"
+VERSION="v0.1.7"
diff --git a/ndd.py b/ndd.py
@@ -20,14 +20,15 @@
 Advances in neural information processing systems, 1:399--406 (2002).
 
 """
-from __future__ import division, print_function, absolute_import
+
+from __future__ import absolute_import,division,print_function,unicode_literals
+from builtins import *
 
 __copyright__ = "Copyright (C) 2016 Simone Marsili"
 __license__   = "BSD 3 clause"
-__version__   = "v0.1.6"
+__version__   = "v0.1.7"
 __author__    = "Simone Marsili ([email protected])"
 __all__ = ['entropy','histogram']
-
 import numpy as np
 import warnings
 import sys

diff --git a/notebooks/ndd_tutorial.ipynb b/notebooks/ndd_tutorial.ipynb
@@ -8,9 +8,9 @@
     "\n",
     "Welcome to the ndd (eNtropy from Discrete Data) module Tutorial\n",
     "\n",
-    "## What is this about\n",
+    "## What is **ndd** about\n",
     "\n",
-    "The main goal of **ndd** is to provide a simple and minimal Python interface to the Nemenman-Shafee-Bialek (NSB)  algorithm, a state of the art Bayesian algorithm for entropy estimation. In **ndd**, this interface is the ndd.entropy() function, that returns an estimate from an array of counts over a set of outcomes: entropy_estimate = ndd.entropy(counts). In turn, ndd.entropy() relies on an efficient and numerically robust Fortran implementation of the NSB algorithm. "
+    "The main goal of **ndd** is to provide a simple and minimal Python interface to the Nemenman-Shafee-Bialek (NSB)  algorithm, a state of the art Bayesian algorithm for entropy estimation. In **ndd**, this interface is the ndd.entropy() function, that returns an estimate from an array of counts over a set of outcomes: entropy_estimate = ndd.entropy(counts). ndd.entropy() relies on an efficient and numerically robust Fortran implementation of the NSB algorithm. "
    ]
   },
   {

diff --git a/requirements.txt b/requirements.txt
@@ -1 +1,2 @@
+future>=0.15.2
 numpy>=1.11.1
diff --git a/setup.py b/setup.py
@@ -1,4 +1,5 @@
-from __future__ import division, absolute_import, print_function
+from __future__ import absolute_import,division,print_function
+from builtins import *
 from numpy.distutils.core import Extension
 
 nddf = Extension(name = 'nddf',

diff --git a/tests/test_advanced.py b/tests/test_advanced.py
@@ -1,4 +1,5 @@
-from __future__ import print_function
+from __future__ import absolute_import,division,print_function,unicode_literals
+from builtins import *
 import numpy as np
 import sys
 import ndd

diff --git a/tests/test_basic.py b/tests/test_basic.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
-from __future__ import print_function
+from __future__ import absolute_import,division,print_function,unicode_literals
+from builtins import *
 import unittest
 import ndd
 import numpy as np
@@ -41,6 +42,12 @@ def test_001_100_100(self):
         result = np.float64(0.45816599887523507)
         self.assertEqual(ndd.entropy(data), result)
 
+    def test_histogram(self):
+        np.random.seed(0)
+        data = np.random.randint(1,11,1000)
+        result = [99, 96, 97, 122, 99, 105, 94, 97, 95, 96]
+        self.assertEqual(ndd.histogram(data), result)
+
 if __name__ == '__main__':
     unittest.main()