Skip to content

Commit

Permalink
Merge branch 'jchodera-bugfixes-after-c-implementation'
Browse files Browse the repository at this point in the history
  • Loading branch information
franknoe committed Mar 23, 2015
2 parents 325c9c8 + a4921a9 commit f470460
Show file tree
Hide file tree
Showing 49 changed files with 102,533 additions and 1,026 deletions.
132 changes: 80 additions & 52 deletions bhmm/bhmm_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import numpy as np
import copy
import time
from scipy.misc import logsumexp
#from scipy.misc import logsumexp
import bhmm.hidden as hidden


from bhmm.msm.transition_matrix_sampling_rev import TransitionMatrixSamplerRev

Expand All @@ -28,22 +30,23 @@ class BHMM(object):
>>> from bhmm import testsystems
>>> nstates = 3
>>> model = testsystems.dalton_model(nstates)
>>> data = model.generate_synthetic_observation_trajectories(ntrajectories=10, length=10000)
>>> [observations, hidden_states] = model.generate_synthetic_observation_trajectories(ntrajectories=10, length=10000)
Initialize a new BHMM model.
>>> from bhmm import BHMM
>>> bhmm = BHMM(data, nstates)
>>> bhmm_sampler = BHMM(observations, nstates)
Sample from the posterior.
>>> models = bhmm.sample(nsamples=10)
>>> models = bhmm_sampler.sample(nsamples=10)
"""
def __init__(self, observations, nstates, initial_model=None,
reversible=True, verbose=False,
transition_matrix_sampling_steps=1000,
output_model_type='gaussian'):
output_model_type='gaussian',
dtype = np.float64, kernel = 'c'):
"""Initialize a Bayesian hidden Markov model sampler.
Parameters
Expand All @@ -64,6 +67,8 @@ def __init__(self, observations, nstates, initial_model=None,
number of transition matrix sampling steps per BHMM cycle
output_model_type : str, optional, default='gaussian'
Output model type. ['gaussian', 'discrete']
kernel: str, optional, default='python'
Implementation kernel
TODO
----
Expand All @@ -83,19 +88,31 @@ def __init__(self, observations, nstates, initial_model=None,

# Store a copy of the observations.
self.observations = copy.deepcopy(observations)
self.nobs = len(observations)
self.Ts = [len(o) for o in observations]
self.maxT = np.max(self.Ts)

# Determine number of observation trajectories we have been given.
self.ntrajectories = len(self.observations)

# initial model
if initial_model:
# Use user-specified initial model, if provided.
self.model = copy.deepcopy(initial_model)
else:
# Generate our own initial model.
self.model = self._generateInitialModel(output_model_type)

# sampling options
self.transition_matrix_sampling_steps = transition_matrix_sampling_steps

# implementation options
self.dtype = dtype
self.kernel = kernel
hidden.set_implementation(kernel)
self.model.output_model.set_implementation(kernel)

# pre-construct hidden variables
self.alpha = np.zeros((self.maxT,self.nstates), dtype=dtype, order='C')
self.pobs = np.zeros((self.maxT,self.nstates), dtype=dtype, order='C')

return

def sample(self, nsamples, nburn=0, nthin=1, save_hidden_state_trajectory=False):
Expand Down Expand Up @@ -170,12 +187,12 @@ def _updateHiddenStateTrajectories(self):
"""
self.model.hidden_state_trajectories = list()
for trajectory_index in range(self.ntrajectories):
for trajectory_index in range(self.nobs):
hidden_state_trajectory = self._sampleHiddenStateTrajectory(self.observations[trajectory_index])
self.model.hidden_state_trajectories.append(hidden_state_trajectory)
return

def _sampleHiddenStateTrajectory(self, o_t, dtype=np.int32):
def _sampleHiddenStateTrajectory(self, obs, dtype=np.int32):
"""Sample a hidden state trajectory from the conditional distribution P(s | T, E, o)
Parameters
Expand All @@ -192,61 +209,72 @@ def _sampleHiddenStateTrajectory(self, o_t, dtype=np.int32):
Examples
--------
>>> import testsystems
>>> from bhmm import testsystems
>>> [model, observations, states, bhmm] = testsystems.generate_random_bhmm()
>>> o_t = observations[0]
>>> s_t = bhmm._sampleHiddenStateTrajectory(o_t)
"""

# Determine observation trajectory length
T = o_t.shape[0]
T = obs.shape[0]

# Convenience access.
model = self.model # current HMM model
nstates = model.nstates
logPi = model.logPi
logTij = model.logTij
#logPi = np.log(model.Pi)
#logTij = np.log(model.Tij)

A = self.model.Tij
pi = self.model.Pi

# compute output probability matrix
self.model.output_model.p_obs(obs, out=self.pobs, dtype=self.dtype)
# forward variables
logprob = hidden.forward(A, self.pobs, pi, T = T, alpha_out=self.alpha, dtype=self.dtype)[0]
# sample path
S = hidden.sample_path(self.alpha, A, self.pobs, T = T, dtype=self.dtype)

return S

# TODO: remove this when new impl. successfully tested.
# model = self.model # current HMM model
# nstates = model.nstates
# logPi = model.logPi
# logTij = model.logTij
# #logPi = np.log(model.Pi)
# #logTij = np.log(model.Tij)
#
# Forward part.
# #
# # Forward part.
# #
#

log_alpha_it = np.zeros([nstates, T], np.float64)

# TODO: Vectorize in i.
for i in range(nstates):
log_alpha_it[i,0] = logPi[i] + model.log_emission_probability(i, o_t[0])

# TODO: Vectorize in j.
for t in range(1,T):
for j in range(nstates):
log_alpha_it[j,t] = logsumexp(log_alpha_it[:,t-1] + logTij[:,j]) + model.log_emission_probability(j, o_t[t])

# log_alpha_it = np.zeros([nstates, T], np.float64)
#
# Sample state trajectory in backwards part.
# for i in range(nstates):
# log_alpha_it[i,0] = logPi[i] + model.log_emission_probability(i, o_t[0])
#

s_t = np.zeros([T], dtype=dtype)

# Sample final state.
log_p_i = log_alpha_it[:,T-1]
p_i = np.exp(log_p_i - logsumexp(log_alpha_it[:,T-1]))
s_t[T-1] = np.random.choice(range(nstates), size=1, p=p_i)

# Work backwards from T-2 to 0.
for t in range(T-2, -1, -1):
# Compute P(s_t = i | s_{t+1}..s_T).
log_p_i = log_alpha_it[:,t] + logTij[:,s_t[t+1]]
p_i = np.exp(log_p_i - logsumexp(log_p_i))

# Draw from this distribution.
s_t[t] = np.random.choice(range(nstates), size=1, p=p_i)

# Return trajectory
return s_t
# for t in range(1,T):
# for j in range(nstates):
# log_alpha_it[j,t] = logsumexp(log_alpha_it[:,t-1] + logTij[:,j]) + model.log_emission_probability(j, o_t[t])
#
# #
# # Sample state trajectory in backwards part.
# #
#
# s_t = np.zeros([T], dtype=dtype)
#
# # Sample final state.
# log_p_i = log_alpha_it[:,T-1]
# p_i = np.exp(log_p_i - logsumexp(log_alpha_it[:,T-1]))
# s_t[T-1] = np.random.choice(range(nstates), size=1, p=p_i)
#
# # Work backwards from T-2 to 0.
# for t in range(T-2, -1, -1):
# # Compute P(s_t = i | s_{t+1}..s_T).
# log_p_i = log_alpha_it[:,t] + logTij[:,s_t[t+1]]
# p_i = np.exp(log_p_i - logsumexp(log_p_i))
#
# # Draw from this distribution.
# s_t[t] = np.random.choice(range(nstates), size=1, p=p_i)
#
# # Return trajectory
# return s_t

def _updateEmissionProbabilities(self):
"""Sample a new set of emission probabilites from the conditional distribution P(E | S, O)
Expand Down
52 changes: 50 additions & 2 deletions bhmm/hidden/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ def backward(A, pobs, T=None, beta_out=None, dtype=np.float32):
transition matrix of the hidden states
pobs : ndarray((T,N), dtype = float)
pobs[t,i] is the observation probability for observation at time t given hidden state i
T : int, optional, default = None
trajectory length. If not given, T = pobs.shape[0] will be used.
beta_out : ndarray((T,N), dtype = float), optional, default = None
containter for the beta result variables. If None, a new container will be created.
dtype : type, optional, default = np.float32
Expand All @@ -116,7 +118,7 @@ def backward(A, pobs, T=None, beta_out=None, dtype=np.float32):
raise RuntimeError('Nonexisting implementation selected: '+str(__impl__))


def state_probabilities(alpha, beta, gamma_out=None):
def state_probabilities(alpha, beta, T=None, gamma_out=None):
""" Calculate the (T,N)-probabilty matrix for being in state i at time t.
Parameters
Expand All @@ -125,6 +127,11 @@ def state_probabilities(alpha, beta, gamma_out=None):
alpha[t,i] is the ith forward coefficient of time t.
beta : ndarray((T,N), dtype = float), optional, default = None
beta[t,i] is the ith forward coefficient of time t.
T : int, optional, default = None
trajectory length. If not given, gamma_out.shape[0] will be used. If
gamma_out is neither given, T = alpha.shape[0] will be used.
gamma_out : ndarray((T,N), dtype = float), optional, default = None
containter for the gamma result variables. If None, a new container will be created.
Returns
-------
Expand All @@ -140,11 +147,22 @@ def state_probabilities(alpha, beta, gamma_out=None):
"""
if alpha.shape[0] != beta.shape[0]:
raise ValueError('Inconsistent sizes of alpha and beta.')
# determine T to use
if T is None:
if gamma_out is None:
T = alpha.shape[0]
else:
T = gamma_out.shape[0]
# compute
if gamma_out is None:
gamma_out = alpha * beta
if T < gamma_out.shape[0]:
gamma_out = gamma_out[:T]
else:
np.multiply(alpha, beta, gamma_out)
if gamma_out.shape[0] < alpha.shape[0]:
np.multiply(alpha[:T], beta[:T], gamma_out)
else:
np.multiply(alpha, beta, gamma_out)
# normalize
np.multiply(gamma_out, 1.0/np.sum(gamma_out, axis=1)[:,None], out = gamma_out)
# done
Expand Down Expand Up @@ -272,3 +290,33 @@ def viterbi(A, pobs, pi, dtype=np.float32):
raise RuntimeError('Nonexisting implementation selected: '+str(__impl__))


def sample_path(alpha, A, pobs, T = None, dtype=np.float32):
""" Sample the hidden pathway S from the conditional distribution P ( S | Parameters, Observations )
Parameters
----------
alpha : ndarray((T,N), dtype = float), optional, default = None
alpha[t,i] is the ith forward coefficient of time t.
A : ndarray((N,N), dtype = float)
transition matrix of the hidden states
pobs : ndarray((T,N), dtype = float)
pobs[t,i] is the observation probability for observation at time t given hidden state i
T : int
number of time steps
dtype : type, optional, default = np.float32
data type of the arguments.
Returns
-------
S : numpy.array shape (T)
maximum likelihood hidden path
"""
if __impl__ == __IMPL_PYTHON__:
return ip.sample_path(alpha, A, pobs, T = T, dtype=dtype)
elif __impl__ == __IMPL_C__:
return ic.sample_path(alpha, A, pobs, T = T, dtype=dtype)
else:
raise RuntimeError('Nonexisting implementation selected: '+str(__impl__))


78 changes: 77 additions & 1 deletion bhmm/hidden/impl_c/_hidden.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "_hidden.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>

#ifndef __DIMS__
Expand Down Expand Up @@ -239,7 +240,7 @@ void _compute_viterbi(
}
maxi = argmax(h, N);
ptr[t*N + j] = maxi;
vnext[j] = pobs[t*N + j] * v[maxi];
vnext[j] = pobs[t*N + j] * v[maxi] * A[maxi*N+j];
sum += vnext[j];
}
// normalize
Expand Down Expand Up @@ -267,6 +268,81 @@ void _compute_viterbi(
free(ptr);
}

int _random_choice(const double* p, const int N)
{
double dR = (double)rand();
double dM = (double)RAND_MAX;
double r = dR / (dM + 1.0);
double s = 0.0;
int i;
for (i = 0; i < N; i++)
{
s += p[i];
if (s >= r)
{
return(i);
}
}

printf("ERROR: random select method could not select anything. Probably p is not normalized.");
return(-1);
}

void _normalize(double* v, const int N)
{
int i;
double s = 0.0;
for (i = 0; i < N; i++)
{
s += v[i];
}
for (i = 0; i < N; i++)
{
v[i] /= s;
}
}


void _sample_path(
int *path,
const double *alpha,
const double *A,
const double *pobs,
const int N, const int T)
{
// initialize variables
int i,t;
double* psel = (double*) malloc(N * sizeof(double));

// initialize random number generator
srand(time(NULL));


// Sample final state.
for (i = 0; i < N; i++)
{
psel[i] = alpha[(T-1)*N+i];
}
_normalize(psel, N);
// Draw from this distribution.
path[T-1] = _random_choice(psel, N);
//printf(" drawn: %i\n",path[T-1]);

// Work backwards from T-2 to 0.
for (t = T-2; t >= 0; t--)
{
// Compute P(s_t = i | s_{t+1}..s_T).
for (i = 0; i < N; i++)
{
psel[i] = alpha[t*N+i] * A[i,path[t+1]];
}
_normalize(psel, N);
// Draw from this distribution.
path[t] = _random_choice(psel, N);
//printf(" drawn: %i\n",path[t]);
}
}

/*
void compute_transition_probabilities(
double *xi,
Expand Down
Loading

0 comments on commit f470460

Please sign in to comment.