Update README and simplify code examples

edschofield · edschofield · commit 3f4f916849e8 · 2024-03-16T12:19:24.000+11:00
diff --git a/README.md b/README.md
@@ -1,23 +1,21 @@
-# scipy-maxentropy
-
-================================================
-Maximum entropy models (:mod:`scipy_maxentropy`)
-================================================
+# scipy-maxentropy: maximum entropy models
 
 This is the former `scipy.maxentropy` package that was available in SciPy up to
 version 0.10.1. It was then removed in SciPy 0.11.  It is now available as a
 separate package on PyPI for backward compatibility.
 
-For new projects, consider the `maxentropy` package instead, which offers a more
-modern scikit-learn compatible API.
+For new projects, consider the
+[maxentropy](https://github.com/PythonCharmers/maxentropy) package instead,
+which offers a more modern scikit-learn compatible API.
 
 ## Purpose
 
 This package fits "exponential family" models, including models of maximum
 entropy and minimum KL divergence to other models, subject to linear constraints
 on the expectations of arbitrary feature statistics. Applications include
 language models for natural language processing and understanding, machine
-translation, etc. Another application is environmental species modelling.
+translation, etc., environmental species modelling, image reconstruction, and
+others.
 
 ## Quickstart
 
@@ -69,17 +67,19 @@ print("Fitted distribution is:")
 p = model.probdist()
 for j in range(len(model.samplespace)):
     x = model.samplespace[j]
-    print("\tx = %-15s" %(x + ":",) + " p(x) = "+str(p[j]))
+    print(f"    x = {x + ':':15s} p(x) = {p[j]:.3f}")
 
 # Now show how well the constraints are satisfied:
 print()
 print("Desired constraints:")
-print("\tp['dans'] + p['en'] = 0.3")
-print("\tp['dans'] + p['à']  = 0.5")
+print("    sum(p(x))           = 1.0")
+print("    p['dans'] + p['en'] = 0.3")
+print("    p['dans'] + p['à']  = 0.5")
 print()
 print("Actual expectations under the fitted model:")
-print(f"\tp['dans'] + p['en'] = {p[0] + p[1]}")
-print(f"\tp['dans'] + p['à']  = {p[0] + p[2]}")
+print(f"    sum(p(x))           = {p.sum():.3f}")
+print(f"    p['dans'] + p['en'] = {p[0] + p[1]:.3f}")
+print(f"    p['dans'] + p['à']  = {p[0] + p[2]:.3f}")
 ```
 
 ## Models available
@@ -101,11 +101,11 @@ $$
 $$
 
 with a real parameter vector $\theta$ of the same length $n$ as the feature
-statistics $f(x) = [f_1(x), ..., f_n(x)]$.
+statistics $f(x) = \left(f_1(x), ..., f_n(x)\right)$.
 
-This is the "closest" model (in the sense of Kullback's discrimination
-information or relative entropy) to the prior model $p_0$ subject to the
-following additional constraints on the expectations of the features:
+This is the "closest" model (in the sense of minimizing KL divergence or
+"relative entropy") to the prior model $p_0$ subject to the following additional
+constraints on the expectations of the features:
 
 ```
     E f_1(X) = b_1
diff --git a/examples/bergerexample.py b/examples/bergerexample.py
@@ -16,48 +16,54 @@
     This code finds the probability distribution with maximal entropy
     subject to these constraints.
 """
-import scipy_maxentropy as maxentropy
 
-a_grave = u'\u00e0'
+from scipy_maxentropy import Model  # previously scipy.maxentropy
+
+samplespace = ["dans", "en", "à", "au cours de", "pendant"]
 
-samplespace = ['dans', 'en', a_grave, 'au cours de', 'pendant']
 
 def f0(x):
     return x in samplespace
 
+
 def f1(x):
-    return x=='dans' or x=='en'
+    return x == "dans" or x == "en"
+
 
 def f2(x):
-    return x=='dans' or x==a_grave
+    return x == "dans" or x == "à"
+
 
 f = [f0, f1, f2]
 
-model = maxentropy.model(f, samplespace)
+model = Model(f, samplespace)
 
 # Now set the desired feature expectations
-K = [1.0, 0.3, 0.5]
+b = [1.0, 0.3, 0.5]
 
-model.verbose = True
+model.verbose = False  # set to True to show optimization progress
 
 # Fit the model
-model.fit(K)
+model.fit(b)
 
 # Output the distribution
-print("\nFitted model parameters are:\n" + str(model.params))
-print("\nFitted distribution is:")
+print()
+print("Fitted model parameters are:\n" + str(model.params))
+print()
+print("Fitted distribution is:")
 p = model.probdist()
 for j in range(len(model.samplespace)):
     x = model.samplespace[j]
-    print("\tx = %-15s" %(x + ":",) + " p(x) = "+str(p[j]))
-
+    print(f"    x = {x + ':':15s} p(x) = {p[j]:.3f}")
 
 # Now show how well the constraints are satisfied:
 print()
 print("Desired constraints:")
-print("\tp['dans'] + p['en'] = 0.3")
-print("\tp['dans'] + p['" + a_grave + "']  = 0.5")
+print("    sum(p(x))           = 1.0")
+print("    p['dans'] + p['en'] = 0.3")
+print("    p['dans'] + p['à']  = 0.5")
 print()
 print("Actual expectations under the fitted model:")
-print("\tp['dans'] + p['en'] =", p[0] + p[1])
-print("\tp['dans'] + p['" + a_grave + "']  = " + str(p[0]+p[2]))
+print(f"    sum(p(x))           = {p.sum():.3f}")
+print(f"    p['dans'] + p['en'] = {p[0] + p[1]:.3f}")
+print(f"    p['dans'] + p['à']  = {p[0] + p[2]:.3f}")
diff --git a/examples/bergerexamplesimulated.py b/examples/bergerexamplesimulated.py
@@ -22,7 +22,7 @@
     large discrete sample space.
 """
 
-__author__  =  'Ed Schofield'
+__author__ = "Ed Schofield"
 
 
 import sys
@@ -33,29 +33,33 @@
 try:
     algorithm = sys.argv[1]
 except IndexError:
-    algorithm = 'CG'
+    algorithm = "CG"
 else:
-    assert algorithm in ['CG', 'BFGS', 'LBFGSB', 'Powell', 'Nelder-Mead']
+    assert algorithm in ["CG", "BFGS", "LBFGSB", "Powell", "Nelder-Mead"]
 
-a_grave = u'\u00e0'
+a_grave = "\u00e0"
+
+samplespace = ["dans", "en", a_grave, "au cours de", "pendant"]
 
-samplespace = ['dans', 'en', a_grave, 'au cours de', 'pendant']
 
 def f0(x):
     return x in samplespace
 
+
 def f1(x):
-    return x == 'dans' or x == 'en'
+    return x == "dans" or x == "en"
+
 
 def f2(x):
-    return x == 'dans' or x == a_grave
+    return x == "dans" or x == a_grave
+
 
 f = [f0, f1, f2]
 
 model = maxentropy.BigModel()
 
 # Now set the desired feature expectations
-K = [1.0, 0.3, 0.5]
+b = [1.0, 0.3, 0.5]
 
 # Define a uniform instrumental distribution for sampling
 samplefreq = {}
@@ -67,8 +71,9 @@ def f2(x):
 
 sampler = utils.dictsampler(samplefreq, size=n)
 
+
 # Now create a generator of features of random points:
-def sampleFgen(sampler, f, sparse_format='csc_matrix'):
+def sampleFgen(sampler, f, sparse_format="csc_matrix"):
     """
     A generator function that yields features of random points.
 
@@ -96,35 +101,43 @@ def sampleFgen(sampler, f, sparse_format='csc_matrix'):
 print("Generating an initial sample ...")
 model.setsampleFgen(sampleFgen(sampler, f))
 
-model.verbose = True
+model.verbose = False
 
 # Fit the model
 model.avegtol = 1e-4
-model.fit(K, algorithm=algorithm)
+model.fit(b, algorithm=algorithm)
 
-# Output the true distribution
-print("\nFitted model parameters are:\n" + str(model.params))
-smallmodel = maxentropy.model(f, samplespace)
+# Output the distribution
+print()
+print("Fitted model parameters are:\n" + str(model.params))
+print()
+smallmodel = maxentropy.Model(f, samplespace)
 smallmodel.setparams(model.params)
-print("\nFitted distribution is:")
+print("Fitted distribution is:")
 p = smallmodel.probdist()
 for j in range(len(smallmodel.samplespace)):
     x = smallmodel.samplespace[j]
-    print(("\tx = %-15s" %(x + ":",) + " p(x) = "+str(p[j])))
-
+    print(f"    x = {x + ':':15s} p(x) = {p[j]:.3f}")
 
 # Now show how well the constraints are satisfied:
 print()
 print("Desired constraints:")
-print("\tp['dans'] + p['en'] = 0.3")
-print("\tp['dans'] + p['" + a_grave + "']  = 0.5")
+print("    sum(p(x))           = 1.0")
+print("    p['dans'] + p['en'] = 0.3")
+print("    p['dans'] + p['à']  = 0.5")
 print()
 print("Actual expectations under the fitted model:")
-print("\tp['dans'] + p['en'] =", p[0] + p[1])
-print("\tp['dans'] + p['" + a_grave + "']  = " + \
-        str(p[0]+p[2]))
-
-print("\nEstimated error in constraint satisfaction (should be close to 0):\n" \
-        + str(abs(model.expectations() - K)))
-print("\nTrue error in constraint satisfaction (should be close to 0):\n" + \
-        str(abs(smallmodel.expectations() - K)))
+print(f"    sum(p(x))           = {p.sum():.3f}")
+print(f"    p['dans'] + p['en'] = {p[0] + p[1]:.3f}")
+print(f"    p['dans'] + p['à']  = {p[0] + p[2]:.3f}")
+
+print(
+    "\nEstimated error in constraint satisfaction (should be close to 0):\n"
+    + str(abs(model.expectations() - b))
+)
+print(
+    "\nTrue error in constraint satisfaction:\n"
+    + str(abs(smallmodel.expectations() - b))
+)
+print()
+print("The true error will be closer to 0 for larger samples n.\n")
diff --git a/examples/conditionalexample1.py b/examples/conditionalexample1.py
@@ -1,6 +1,7 @@
 """
 Example of conditional maxentropy models
 """
+
 from numpy import *
 from scipy_maxentropy import *
 
@@ -51,10 +52,10 @@
 pmf = model.pmf()
 # The elements of this are flatted like the rows of F and p_tilde.  We display
 # them nicely:
-print("x \ w \t 0 \t 1", end=' ')
+print("x \ w \t 0 \t 1", end=" ")
 for x in range(4):
-    print('\n' + str(x), end='')
+    print("\n" + str(x), end="")
     for w in range(2):
-        print(' \t %.3f' % pmf[w*numlabels + x], end=' ')
+        print(" \t %.3f" % pmf[w * numlabels + x], end=" ")
         # print ' \t %.3f' % pmf[indices_context[w]][x],
 print()
diff --git a/examples/conditionalexample2.py b/examples/conditionalexample2.py
@@ -19,24 +19,35 @@
 import scipy.sparse as sparse
 import scipy_maxentropy as maxentropy
 
-samplespace = ['dans', 'en', 'à', 'au cours de', 'pendant']
+samplespace = ["dans", "en", "à", "au cours de", "pendant"]
 # Occurrences of French words, and their 'next English word' contexts, in
 # a hypothetical parallel corpus:
-corpus = [('dans', 'a'), ('dans', 'a'), ('dans', 'a'), ('dans', 'the'), \
-          ('pendant', 'a'), ('dans', 'happy'), ('au cours de', 'healthy')]
+corpus = [
+    ("dans", "a"),
+    ("dans", "a"),
+    ("dans", "a"),
+    ("dans", "the"),
+    ("pendant", "a"),
+    ("dans", "happy"),
+    ("au cours de", "healthy"),
+]
 contexts = list(set([c for (x, c) in corpus]))
 
+
 def f0(x, c):
     return x in samplespace
 
+
 def f1(x, c):
-    if x == 'dans' and c in ['a', 'the']:
+    if x == "dans" and c in ["a", "the"]:
         return True
     else:
         return False
 
+
 def f2(x, c):
-    return (x=='dans' or x=='à') and c not in ['a', 'the']
+    return (x == "dans" or x == "à") and c not in ["a", "the"]
+
 
 f = [f0, f1, f2]
 
@@ -67,8 +78,8 @@ def f2(x, c):
 # training data.
 # (The maxentropy module infers the empirical pmf etc. from the counts N)
 
-N = sparse.lil_matrix((1, numcontexts * len(samplespace)))   # initialized to zero
-for (x, c) in corpus:
+N = sparse.lil_matrix((1, numcontexts * len(samplespace)))  # initialized to zero
+for x, c in corpus:
     N[0, context_index[c] * numsamplepoints + samplespace_index[x]] += 1
 
 # Ideally, this could be stored as a sparse matrix of size C x X, whose ith row
@@ -98,17 +109,17 @@ def f2(x, c):
 p = model.probdist()
 
 print("\npmf table p(x | c), where c is the context 'the':")
-c = contexts.index('the')
-print(p[c*numsamplepoints:(c+1)*numsamplepoints])
+c = contexts.index("the")
+print(p[c * numsamplepoints : (c + 1) * numsamplepoints])
 
 print("\nFitted distribution is:")
-print("%12s" % ("c \ x"), end=' ')
+print("%12s" % ("c \ x"), end=" ")
 for label in samplespace:
-    print("%12s" % label, end=' ')
+    print("%12s" % label, end=" ")
 
 for c, context in enumerate(contexts):
-    print("\n%12s" % context, end=' ')
+    print("\n%12s" % context, end=" ")
     for x, label in enumerate(samplespace):
-        print(("%12.3f" % p[c*numsamplepoints+x]), end=' ')
+        print(("%12.3f" % p[c * numsamplepoints + x]), end=" ")
 
 print()