-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbayes_analysis.py
132 lines (116 loc) · 4.79 KB
/
bayes_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import numpy as np
import matplotlib.pyplot as plt
from hmmlearn.hmm import GaussianHMM
from itertools import groupby
from scipy.stats import ttest_ind, mannwhitneyu, ks_2samp
def analyze_hidden_markov_states(
time_series,
n_components=2,
n_iter=100,
random_state=42,
plot=True
):
"""
Analyze time series data using a Hidden Markov Model (HMM) to identify patterns,
dwell times, and state transitions. Performs statistical tests to assess the hypothesis
of systematic behavior (e.g., automation).
Parameters:
----------
time_series : pandas.Series or numpy.ndarray
The input time series data.
n_components : int, optional
The number of hidden states in the HMM. Default is 2.
n_iter : int, optional
The maximum number of iterations for the HMM. Default is 100.
random_state : int, optional
Seed for reproducibility. Default is 42.
plot : bool, optional
Whether to generate plots for HMM states and statistical analysis. Default is True.
Returns:
-------
results : dict
A dictionary containing HMM metrics, dwell time analysis, statistical test results,
and interpretative feedback.
"""
# Step 1: Fit a Hidden Markov Model
observed_data = time_series.values.reshape(-1, 1)
hmm = GaussianHMM(n_components=n_components, covariance_type="diag", n_iter=n_iter, random_state=random_state)
hmm.fit(observed_data)
log_likelihood = hmm.score(observed_data)
transition_matrix = hmm.transmat_
# Step 2: Predict states
states = hmm.predict(observed_data)
# Step 3: Analyze dwell times for each state
dwell_times = [len(list(group)) for _, group in groupby(states)]
dwell_time_by_state = {
s: [len(list(group)) for state, group in groupby(states) if state == s]
for s in range(n_components)
}
# Step 4: Generate random states for comparison
random_states = np.random.choice(range(n_components), size=len(states))
random_dwell_time_by_state = {
s: [len(list(group)) for state, group in groupby(random_states) if state == s]
for s in range(n_components)
}
# Step 5: Perform statistical tests
stat_tests = {}
for state in range(n_components):
observed_dwell = dwell_time_by_state[state]
random_dwell = random_dwell_time_by_state[state]
t_stat, t_p_value = ttest_ind(observed_dwell, random_dwell, equal_var=False)
u_stat, u_p_value = mannwhitneyu(observed_dwell, random_dwell, alternative="two-sided")
ks_stat, ks_p_value = ks_2samp(observed_dwell, random_dwell)
stat_tests[state] = {
"t-test": {"t_stat": t_stat, "p_value": t_p_value},
"Mann-Whitney U": {"u_stat": u_stat, "p_value": u_p_value},
"KS Test": {"ks_stat": ks_stat, "p_value": ks_p_value},
}
# Step 6: Analyze state proportions
state_counts = np.bincount(states, minlength=n_components)
state_proportions = state_counts / len(states)
# Step 7: Interpret results
feedback = []
for state, stats in stat_tests.items():
if stats["t-test"]["p_value"] < 0.05:
feedback.append(f"State {state}: Significant difference in dwell times (t-test, p < 0.05).")
if stats["Mann-Whitney U"]["p_value"] < 0.05:
feedback.append(f"State {state}: Significant difference in dwell times (Mann-Whitney U, p < 0.05).")
if stats["KS Test"]["p_value"] < 0.05:
feedback.append(f"State {state}: Significant difference in dwell times (KS Test, p < 0.05).")
if not feedback:
feedback.append("No significant differences detected in state dwell times. Systematic behavior not evident.")
# Step 8: Plot results
if plot:
# Plot state sequence
plt.figure(figsize=(10, 6))
plt.plot(time_series.index, states, marker="o", linestyle="-", label="HMM States")
plt.title("HMM State Sequence")
plt.xlabel("Time")
plt.ylabel("State")
plt.grid(True)
plt.legend()
plt.show()
# Plot dwell time distribution
plt.figure(figsize=(10, 6))
for state in range(n_components):
plt.hist(
dwell_time_by_state[state],
bins=10,
alpha=0.5,
label=f"State {state} Dwell Times"
)
plt.title("Dwell Time Distribution by State")
plt.xlabel("Dwell Time")
plt.ylabel("Frequency")
plt.legend()
plt.show()
# Combine results into a dictionary
results = {
"Log Likelihood": log_likelihood,
"Transition Matrix": transition_matrix,
"State Proportions": state_proportions.tolist(),
"Dwell Times": dwell_time_by_state,
"Statistical Tests": stat_tests,
"Feedback": feedback,
}
return results