cmusphinx
diff --git a/‎cython/_pocketsphinx.pxd‎
Lines changed: 2 additions & 0 deletions b/‎cython/_pocketsphinx.pxd‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎cython/_pocketsphinx.pyx‎
Lines changed: 75 additions & 0 deletions b/‎cython/_pocketsphinx.pyx‎
Lines changed: 75 additions & 0 deletions
diff --git a/‎cython/test/continuous_test.py‎
Lines changed: 10 additions & 10 deletions b/‎cython/test/continuous_test.py‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎include/pocketsphinx.h‎
Lines changed: 38 additions & 3 deletions b/‎include/pocketsphinx.h‎
Lines changed: 38 additions & 3 deletions
diff --git a/‎include/sphinxbase/cmn.h‎
Lines changed: 28 additions & 4 deletions b/‎include/sphinxbase/cmn.h‎
Lines changed: 28 additions & 4 deletions
diff --git a/‎src/acmod.c‎
Lines changed: 6 additions & 54 deletions b/‎src/acmod.c‎
Lines changed: 6 additions & 54 deletions
@@ -341,6 +341,8 @@ cdef extern from "pocketsphinx.h":
     int ps_free(ps_decoder_t *ps)
     int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config)
     int ps_reinit_feat(ps_decoder_t *ps, cmd_ln_t *config)
+    const char *ps_get_cmn(ps_decoder_t *ps, int update)
+    int ps_set_cmn(ps_decoder_t *ps, const char *cmn)
     logmath_t *ps_get_logmath(ps_decoder_t *ps)
     int ps_start_stream(ps_decoder_t *ps)
     int ps_get_in_speech(ps_decoder_t *ps)
 
@@ -803,6 +803,27 @@ cdef class Decoder:
         if ps_reinit_feat(self.ps, cconfig) < 0:
             raise RuntimeError("Failed to reinitialize feature extraction")
 
+    def get_cmn(self, update=False):
+        """Get current cepstral mean.
+
+        Args:
+          update(boolean): Update the mean based on current utterance.
+        Returns:
+          str: Cepstral mean as a comma-separated list of numbers.
+        """
+        cdef const char *cmn = ps_get_cmn(self.ps, update)
+        return cmn.decode("utf-8")
+    
+    def set_cmn(self, cmn):
+        """Get current cepstral mean.
+
+        Args:
+          cmn(str): Cepstral mean as a comma-separated list of numbers.
+        """
+        cdef int rv = ps_set_cmn(self.ps, cmn.encode("utf-8"))
+        if rv != 0:
+            raise ValueError("Invalid CMN string")
+        
     def start_stream(self):
         """Reset noise statistics.
 
@@ -1535,6 +1556,39 @@ cdef class Vad:
 
 cdef class Endpointer:
     """Simple endpointer using voice activity detection.
+
+    Args:
+      window(float): Length in seconds of window for decision.
+      ratio(float): Fraction of window that must be speech or
+                    non-speech to make a transition.
+      mode(int): Aggressiveness of voice activity detction (0-3)
+      sample_rate(int): Sampling rate of input, default is 16000.
+                        Rates other than 8000, 16000, 32000, 48000
+                        are only approximately supported, see note
+                        in `frame_length`.  Outlandish sampling
+                        rates like 3924 and 115200 will raise a
+                        `ValueError`.
+      frame_length(float): Desired input frame length in seconds,
+                           default is 0.03.  The *actual* frame
+                           length may be different if an
+                           approximately supported sampling rate is
+                           requested.  You must *always* use the
+                           `frame_bytes` and `frame_length`
+                           attributes to determine the input size.
+
+    Attributes:
+      sample_rate(int): Sampling rate of input (default is 16000)
+      frame_bytes(int): Number of bytes in a frame accepted by `process`.
+      frame_length(float): Length of a frame (*may be different from
+                           the one requested in the constructor*!)
+      in_speech(boolean): Are we currently in a speech region?
+      speech_start(float): Start of previous speech segment.
+      speech_end(float): End of previous speech segment.
+
+    Raises:
+      ValueError: Invalid input parameter.  Also raised if the ratio
+                  makes it impossible to do endpointing (i.e. it
+                  is more than N-1 or less than 1 frame).
     """
     cdef ps_endpointer_t *_ep
     DEFAULT_WINDOW = PS_ENDPOINTER_DEFAULT_WINDOW
@@ -1597,6 +1651,19 @@ cdef class Endpointer:
         return (<const unsigned char *>&outframe[0])[:n_samples * 2]
 
     def end_stream(self, frame):
+        """Read a final frame of data and return speech if any.
+
+        Args:
+          frame(bytes): Buffer containing speech data (16-bit signed
+                        integers).  Must be of length `frame_bytes`
+                        (in bytes) *or less*.
+        Returns:
+          (bytes) Remaining speech data (could be more than one frame),
+                  or None if none detected.
+        Raises:
+          IndexError: `buf` is of invalid size.
+          ValueError: Other internal VAD error.
+        """
         cdef const unsigned char[:] cframe = frame
         cdef Py_ssize_t n_samples = len(cframe) // 2
         cdef const short *outbuf
@@ -1612,7 +1679,15 @@ cdef class Endpointer:
         return (<const unsigned char *>&outbuf[0])[:out_n_samples * 2]
 
 def set_loglevel(level):
+    """Set internal log level of PocketSphinx.
+
+    Args:
+      level(str): one of "DEBUG", "INFO", "ERROR", "FATAL".
+    Raises:
+      ValueError: Invalid log level string.
+    """
     cdef const char *prev_level
     prev_level = err_set_loglevel_str(level.encode('utf-8'))
     if prev_level == NULL:
         raise ValueError("Invalid log level %s" % level)
+
@@ -14,30 +14,30 @@ def test_continuous(self):
         config.set_string("-hmm", os.path.join(MODELDIR, "en-us/en-us"))
         config.set_string("-lm", os.path.join(MODELDIR, "en-us/en-us.lm.bin"))
         config.set_string("-dict", os.path.join(MODELDIR, "en-us/cmudict-en-us.dict"))
-        config.set_string(
-            "-cmninit",
-            "41.00,-5.29,-0.12,5.09,2.48,-4.07,-1.37,-1.78,-5.08,-2.05,-6.45,-1.42,1.17",
+        prev_cmn = (
+            "41,-5.29,-0.12,5.09,2.48,-4.07,-1.37,-1.78,-5.08,-2.05,-6.45,-1.42,1.17"
         )
+        config.set_string("-cmninit", prev_cmn)
         decoder = Decoder(config)
+        self.assertEqual(prev_cmn, decoder.get_cmn(False))
 
         with open(os.path.join(DATADIR, "goforward.raw"), "rb") as stream:
-            in_speech_bf = False
             decoder.start_utt()
             while True:
                 buf = stream.read(1024)
                 if buf:
                     decoder.process_raw(buf, False, False)
-                    if decoder.get_in_speech() != in_speech_bf:
-                        in_speech_bf = decoder.get_in_speech()
-                        if not in_speech_bf:
-                            decoder.end_utt()
-                            print('Result:', decoder.hyp().hypstr)
-                            decoder.start_utt()
+                    cmn = decoder.get_cmn(True)
+                    self.assertNotEqual(prev_cmn, cmn)
+                    prev_cmn = cmn
                 else:
                     break
             decoder.end_utt()
             print("Result:", decoder.hyp().hypstr)
             self.assertEqual("go forward ten meters", decoder.hyp().hypstr)
+            cmn = decoder.get_cmn(False)
+            self.assertNotEqual(prev_cmn, cmn)
+            prev_cmn = cmn
 
 
 if __name__ == "__main__":
 
@@ -139,12 +139,13 @@ int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config);
  *
  * This function allows you to switch the feature computation
  * parameters without otherwise affecting the decoder configuration.
- * For example, if you change the sample rate or the frame rate, the
- * cepstral mean, or the VTLN warping factor, and do not need to
- * reconfigure the rest of the decoder.
+ * For example, if you change the sample rate or the frame rate, and
+ * do not want to reconfigure the rest of the decoder.
  *
  * Note that if your code has modified any internal parameters in the
  * \ref acmod_t, these will be overriden by values from the config.
+ * Likewise if you have set a custom cepstral mean with ps_set_cmn(),
+ * it will be overridden.
  *
  * @note The decoder retains ownership of the pointer `config`, so you
  * should free it when no longer used.
@@ -158,6 +159,40 @@ int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config);
 POCKETSPHINX_EXPORT
 int ps_reinit_feat(ps_decoder_t *ps, cmd_ln_t *config);
 
+/**
+ * Get the current cepstral mean as a string.
+ *
+ * This is the string representation of the current cepstral mean,
+ * which represents the acoustic channel conditions in live
+ * recognition.  This can be used to initialize the decoder with the
+ * `-cmninit` flag.
+ *
+ * @param ps Decoder
+ * @param update Update the cepstral mean using data processed so far.
+ * @return String representation of cepstral mean, as
+ *         `-ceplen` comma-separated numbers.  This pointer is owned
+ *         by the decoder and only valid until the next call to
+ *         ps_get_cmn(), ps_set_cmn() or ps_end_utt().
+ */
+POCKETSPHINX_EXPORT
+const char *ps_get_cmn(ps_decoder_t *ps, int update);
+
+/**
+ * Set the current cepstral mean from a string.
+ *
+ * This does the same thing as setting `-cmninit` and running
+ * `ps_reinit_feat()` but is more efficient, and can also be
+ * done in the middle of an utterance if you like.
+ *
+ * @param ps Decoder
+ * @param cmn String representation of cepstral mean, as
+ *            up to `-ceplen` comma-separated numbers (any
+ *            missing values will be zero-filled).
+ * @return 0 for success of -1 for invalid input.
+ */
+POCKETSPHINX_EXPORT
+int ps_set_cmn(ps_decoder_t *ps, const char *cmn);
+
 /**
  * Returns the argument definitions used in ps_init().
  *
 
@@ -131,6 +131,8 @@ typedef struct {
     mfcc_t *sum;        /**< Accumulated cepstra for computing mean */
     int32 nframe;	/**< Number of frames */
     int32 veclen;	/**< Length of cepstral vector */
+    char *repr;         /**< String representation of current means */
+    int refcount;
 } cmn_t;
 
 SPHINXBASE_EXPORT
@@ -169,14 +171,36 @@ SPHINXBASE_EXPORT
 void cmn_live_update(cmn_t *cmn);
 
 /**
- * Set the live mean.
+ * Set live mean from a vector of length cmn->veclen
+ */
+void cmn_live_set(cmn_t *cmn, mfcc_t const * vec);
+
+/**
+ * Get the string representation of the live mean.
+ */
+#define cmn_repr(cmn) (cmn)->repr
+
+/**
+ * Update the string representation.
+ */
+const char *cmn_update_repr(cmn_t *cmn);
+
+/**
+ * Set the live mean from a string.
  */
 SPHINXBASE_EXPORT
-void cmn_live_set(cmn_t *cmn, mfcc_t const *vec);
+int cmn_set_repr(cmn_t *cmn, char const *repr);
 
-/* RAH, free previously allocated memory */
+/**
+ * Retain a CMN.
+ */
+cmn_t *cmn_retain(cmn_t *cmn);
+
+/**
+ * Release a CMN, possibly freeing it.
+ */
 SPHINXBASE_EXPORT
-void cmn_free (cmn_t *cmn);
+int cmn_free (cmn_t *cmn);
 
 #ifdef __cplusplus
 }
 
@@ -65,7 +65,6 @@
 #include "ms_mgau.h"
 
 static int32 acmod_process_mfcbuf(acmod_t *acmod);
-static const char *acmod_update_cmninit(acmod_t *acmod);
 
 static int
 acmod_init_am(acmod_t *acmod)
@@ -196,23 +195,8 @@ acmod_reinit_feat(acmod_t *acmod, fe_t *fe, feat_t *fcb)
         if (fcb->cmn_struct
             && cmd_ln_exists_r(acmod->config, "-cmninit")
             && cmd_ln_str_r(acmod->config, "-cmninit")) {
-            char *c, *cc, *vallist;
-            int32 nvals;
-
-            vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit"));
-            c = vallist;
-            nvals = 0;
-            while (nvals < fcb->cmn_struct->veclen
-                   && (cc = strchr(c, ',')) != NULL) {
-                *cc = '\0';
-                fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
-                c = cc + 1;
-                ++nvals;
-            }
-            if (nvals < fcb->cmn_struct->veclen && *c != '\0') {
-                fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
-            }
-            ckd_free(vallist);
+            E_INFO("Setting initial CMN to %s\n", cmd_ln_str_r(acmod->config, "-cmninit"));
+            cmn_set_repr(fcb->cmn_struct, cmd_ln_str_r(acmod->config, "-cmninit"));
         }
     }
     if (acmod_feat_mismatch(acmod, fcb)) {
@@ -462,7 +446,11 @@ acmod_end_utt(acmod_t *acmod)
         /* Process whatever's left, and any leadout. */
         if (nfr)
             nfr = acmod_process_mfcbuf(acmod);
+        else /* Make sure to update CMN! */
+            feat_update_stats(acmod->fcb);
     }
+    else /* Make sure to update CMN! */
+        feat_update_stats(acmod->fcb);
     if (acmod->mfcfh) {
         int32 outlen, rv;
         outlen = (ftell(acmod->mfcfh) - 4) / 4;
@@ -484,45 +472,9 @@ acmod_end_utt(acmod_t *acmod)
         acmod->senfh = NULL;
     }
 
-    acmod_update_cmninit(acmod);
-
     return nfr;
 }
 
-static const char *
-acmod_update_cmninit(acmod_t *acmod)
-{
-    char *cmninit, *ptr;
-    cmn_t *cmn;
-    int i, len;
-    
-    if (acmod->fcb == NULL)
-        return NULL;
-    if ((cmn = acmod->fcb->cmn_struct) == NULL)
-        return NULL;
-    len = 0;
-    for (i = 0; i < cmn->veclen; ++i) {
-        int nbytes = snprintf(NULL, 0, "%g,", cmn->cmn_mean[i]);
-        if (nbytes <= 0) {
-            E_ERROR_SYSTEM("Failed to format %g for cmninit", cmn->cmn_mean[i]);
-            return NULL;
-        }
-        len += nbytes;
-    }
-    len++;
-    ptr = cmninit = ckd_malloc(len);
-    if (ptr == NULL) {
-        E_ERROR_SYSTEM("Failed to allocate %d bytes for cmninit", len);
-        return NULL;
-    }
-    for (i = 0; i < cmn->veclen; ++i)
-        ptr += snprintf(ptr, cmninit + len - ptr, "%g,", cmn->cmn_mean[i]);
-    *--ptr = '\0';
-    cmd_ln_set_str_r(acmod->config, "-cmninit", cmninit);
-    ckd_free(cmninit);
-    return cmd_ln_str_r(acmod->config, "-cmninit");
-}
-
 static int
 acmod_log_mfc(acmod_t *acmod,
               mfcc_t **cep, int n_frames)