@@ -803,6 +803,27 @@ cdef class Decoder:
803803 if ps_reinit_feat(self .ps, cconfig) < 0 :
804804 raise RuntimeError (" Failed to reinitialize feature extraction" )
805805
806+ def get_cmn (self , update = False ):
807+ """ Get current cepstral mean.
808+
809+ Args:
810+ update(boolean): Update the mean based on current utterance.
811+ Returns:
812+ str: Cepstral mean as a comma-separated list of numbers.
813+ """
814+ cdef const char * cmn = ps_get_cmn(self .ps, update)
815+ return cmn.decode(" utf-8" )
816+
817+ def set_cmn (self , cmn ):
818+ """ Get current cepstral mean.
819+
820+ Args:
821+ cmn(str): Cepstral mean as a comma-separated list of numbers.
822+ """
823+ cdef int rv = ps_set_cmn(self .ps, cmn.encode(" utf-8" ))
824+ if rv != 0 :
825+ raise ValueError (" Invalid CMN string" )
826+
806827 def start_stream (self ):
807828 """ Reset noise statistics.
808829
@@ -1535,6 +1556,39 @@ cdef class Vad:
15351556
15361557cdef class Endpointer:
15371558 """ Simple endpointer using voice activity detection.
1559+
1560+ Args:
1561+ window(float): Length in seconds of window for decision.
1562+ ratio(float): Fraction of window that must be speech or
1563+ non-speech to make a transition.
1564+ mode(int): Aggressiveness of voice activity detction (0-3)
1565+ sample_rate(int): Sampling rate of input, default is 16000.
1566+ Rates other than 8000, 16000, 32000, 48000
1567+ are only approximately supported, see note
1568+ in `frame_length`. Outlandish sampling
1569+ rates like 3924 and 115200 will raise a
1570+ `ValueError`.
1571+ frame_length(float): Desired input frame length in seconds,
1572+ default is 0.03. The *actual* frame
1573+ length may be different if an
1574+ approximately supported sampling rate is
1575+ requested. You must *always* use the
1576+ `frame_bytes` and `frame_length`
1577+ attributes to determine the input size.
1578+
1579+ Attributes:
1580+ sample_rate(int): Sampling rate of input (default is 16000)
1581+ frame_bytes(int): Number of bytes in a frame accepted by `process`.
1582+ frame_length(float): Length of a frame (*may be different from
1583+ the one requested in the constructor*!)
1584+ in_speech(boolean): Are we currently in a speech region?
1585+ speech_start(float): Start of previous speech segment.
1586+ speech_end(float): End of previous speech segment.
1587+
1588+ Raises:
1589+ ValueError: Invalid input parameter. Also raised if the ratio
1590+ makes it impossible to do endpointing (i.e. it
1591+ is more than N-1 or less than 1 frame).
15381592 """
15391593 cdef ps_endpointer_t * _ep
15401594 DEFAULT_WINDOW = PS_ENDPOINTER_DEFAULT_WINDOW
@@ -1597,6 +1651,19 @@ cdef class Endpointer:
15971651 return (< const unsigned char * > & outframe[0 ])[:n_samples * 2 ]
15981652
15991653 def end_stream (self , frame ):
1654+ """ Read a final frame of data and return speech if any.
1655+
1656+ Args:
1657+ frame(bytes): Buffer containing speech data (16-bit signed
1658+ integers). Must be of length `frame_bytes`
1659+ (in bytes) *or less*.
1660+ Returns:
1661+ (bytes) Remaining speech data (could be more than one frame),
1662+ or None if none detected.
1663+ Raises:
1664+ IndexError: `buf` is of invalid size.
1665+ ValueError: Other internal VAD error.
1666+ """
16001667 cdef const unsigned char [:] cframe = frame
16011668 cdef Py_ssize_t n_samples = len (cframe) // 2
16021669 cdef const short * outbuf
@@ -1612,7 +1679,15 @@ cdef class Endpointer:
16121679 return (< const unsigned char * > & outbuf[0 ])[:out_n_samples * 2 ]
16131680
16141681def set_loglevel (level ):
1682+ """ Set internal log level of PocketSphinx.
1683+
1684+ Args:
1685+ level(str): one of "DEBUG", "INFO", "ERROR", "FATAL".
1686+ Raises:
1687+ ValueError: Invalid log level string.
1688+ """
16151689 cdef const char * prev_level
16161690 prev_level = err_set_loglevel_str(level.encode(' utf-8' ))
16171691 if prev_level == NULL :
16181692 raise ValueError (" Invalid log level %s " % level)
1693+
0 commit comments