Skip to content

Commit 908c45f

Browse files
committed
improve Cheating model
1 parent f9a9916 commit 908c45f

File tree

2 files changed

+169
-13
lines changed

2 files changed

+169
-13
lines changed
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
# clears workspace:
2+
rm(list=ls())
3+
4+
library(rstan)
5+
6+
model <- "
7+
// Cheating Latent Mixture Model
8+
data {
9+
int<lower=1> n;
10+
int<lower=1> p;
11+
int<lower=1,upper=n> k[p];
12+
int<lower=0,upper=1> truth[p];
13+
}
14+
parameters {
15+
real<lower=0,upper=1> phi;
16+
real<lower=0,upper=1> mubon;
17+
real<lower=0> mudiff;
18+
real<lower=5,upper=50> lambdabon;
19+
real<lower=5,upper=50> lambdache;
20+
matrix<lower=0,upper=1>[2,p] theta;
21+
}
22+
transformed parameters {
23+
vector[2] lp_parts[p];
24+
vector<lower=0>[2] alpha;
25+
vector<lower=0>[2] beta;
26+
real<lower=0,upper=1> muche;
27+
28+
// Additivity on Logit Scale
29+
muche <- inv_logit(logit(mubon) + mudiff);
30+
31+
// Transformation to Group Mean and Precision
32+
alpha[1] <- mubon * lambdabon;
33+
beta[1] <- lambdabon * (1 - mubon);
34+
alpha[2] <- muche * lambdache;
35+
beta[2] <- lambdache * (1 - muche);
36+
37+
// Data are Binomial with Rate Given by
38+
// Each Person’s Group Assignment
39+
for (i in 1:p) {
40+
lp_parts[i,1] <- log1m(phi) + binomial_log(k[i], n, theta[1,i]);
41+
lp_parts[i,2] <- log(phi) + binomial_log(k[i], n, theta[2,i]);
42+
}
43+
}
44+
model {
45+
// Priors
46+
mubon ~ beta(1, 1); // can be removed
47+
mudiff ~ normal(0, 1 / sqrt(.5))T[0,]; // Constrained to be Positive
48+
// Relatively Uninformative Prior on Base Rate
49+
phi ~ beta(5, 5);
50+
51+
theta[1] ~ beta(alpha[1], beta[1]);
52+
theta[2] ~ beta(alpha[2], beta[2]);
53+
54+
for (i in 1:p)
55+
increment_log_prob(log_sum_exp(lp_parts[i]));
56+
}
57+
generated quantities {
58+
int<lower=0,upper=1> z[p];
59+
real pc;
60+
vector[p] pct;
61+
62+
for (i in 1:p) {
63+
vector[2] prob;
64+
prob <- softmax(lp_parts[i]);
65+
// Each Person Belongs to One of Two Latent Groups
66+
z[i] <- bernoulli_rng(prob[2]);
67+
// Correct Count
68+
pct[i] <- if_else(z[i] == truth[i], 1, 0);
69+
}
70+
pc <- sum(pct);
71+
}"
72+
73+
cheat.dat <- read.table("cheat.csv", header=F, sep=",")
74+
cheatt.dat <- read.table("cheatt.csv", header=F, sep="")
75+
truth <- cheatt.dat$V1 # truth = 1 if cheater
76+
k <- apply(cheat.dat, 1, sum) # total correct per participant
77+
p <- length(k) # number of people
78+
n <- 40 # total trials
79+
80+
data <- list(p=p, k=k, n=n, truth=truth) # To be passed on to Stan
81+
82+
myinits <- list(
83+
list(mudiff=.1, phi=.5, mubon=.5, lambdabon=30, lambdache=25,
84+
theta=matrix(rep(.5, 2 * p), 2, p)),
85+
list(mudiff=.15, phi=.5, mubon=.5, lambdabon=25, lambdache=30,
86+
theta=matrix(rep(.5, 2 * p), 2, p)))
87+
88+
# Parameters to be monitored:
89+
parameters <- c("theta", "z", "mubon", "lambdabon", "muche", "lambdache",
90+
"mudiff", "phi", "alpha", "beta", "pc")
91+
92+
# The following command calls Stan with specific options.
93+
# For a detailed description type "?rstan".
94+
samples <- stan(model_code=model,
95+
data=data,
96+
init=myinits, # If not specified, gives random inits
97+
pars=parameters,
98+
iter=5000,
99+
chains=2,
100+
thin=1,
101+
# warmup = 100, # Stands for burn-in; Default = iter/2
102+
# seed = 123 # Setting seed; Default is random seed
103+
)
104+
# Now the values for the monitored parameters are in the "samples" object,
105+
# ready for inspection.
106+
107+
print(samples)
108+
pc <- extract(samples)$pc / p # to get proportion correct
109+
mean(pc)
110+
111+
# plot 6.9
112+
#make the two panel plot:
113+
windows(width=8,height=6) #this command works only under Windows!
114+
layout(matrix(c(1,2),2,1))
115+
layout.show(2)
116+
par(cex.main = 1.5, mar = c(5, 6, 4, 5) + 0.1, mgp = c(3.5, 1, 0), cex.lab = 1.5,
117+
font.lab = 2, cex.axis = 1.3, bty = "n", las=1)
118+
bins <- c(-1:n)+.5
119+
bonafide <- hist(k[truth==0], breaks=bins, plot=F)$counts
120+
cheat <- hist(k[truth==1], breaks=bins, plot=F)$counts
121+
122+
counts <- rbind(bonafide, cheat)
123+
barplot(counts, main=" ", xlab=" ", col=c("grey","white"),
124+
legend.text = c("Bona Fide","Cheater"), args.legend = list(x="topleft"),
125+
beside=TRUE, axes=F)
126+
# bottom panel:
127+
par(cex.main = 1.5, mar = c(5, 6, 4, 5) + 0.1, mgp = c(3.5, 1, 0), cex.lab = 1.5,
128+
font.lab = 2, cex.axis = 1.3, bty = "n", las=1)
129+
pc.line <- array()
130+
for (i in 1:41) {
131+
pc.line[i] <- mean((k>=(i-1))==truth)
132+
}
133+
134+
dev.new() # so the plot below does not overwrite the plot above
135+
136+
plot(c(0:40), pc.line, type="l", lwd=2, xlim=c(0,40), ylim=c(0.4,1),
137+
xlab="Number of Items Recalled Correctly",
138+
ylab=" ", axes=F)
139+
axis(1, at=c(0,seq(from=5,by=5,to=40)))
140+
axis(2, at=c(.5,.75,1))
141+
par(las=0)
142+
mtext("Prop. Correct",side=2, line=2.5,cex=1.5)
143+
# Now add the distribution:
144+
pc.dens <- density(pc)
145+
polygon(c(0,pc.dens$y,0,0), c(pc.dens$x[1]-.01,pc.dens$x,pc.dens$x[1]+.01,
146+
pc.dens$x[1]-.01), col="green")
147+
148+
# plot 6.10
149+
windows()
150+
par(cex.main = 1.5, mar = c(5, 6, 4, 5) + 0.1, mgp = c(3.5, 1, 0), cex.lab = 1.5,
151+
font.lab = 2, cex.axis = 1.3, bty = "n", las=1)
152+
plot(k,summary(samples)$summary[237:354, 1],ylim=c(0,1),xlim=c(0,n),
153+
xlab= "Number of Items Recalled Correctly", ylab="Cheater Classification",
154+
lwd=2, pch=4)
155+
# in the code, z=0 is bonafide and z=1 is cheating
156+
# so z gives the prob of being assigned to cheating group
157+

Bayesian_Cognitive_Modeling/ParameterEstimation/LatentMixtures/Cheating_Stan.R

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ parameters {
1717
real<lower=0> mudiff;
1818
real<lower=5,upper=50> lambdabon;
1919
real<lower=5,upper=50> lambdache;
20-
matrix<lower=0,upper=1>[2,p] theta;
20+
vector<lower=0,upper=1>[p] theta;
2121
}
2222
transformed parameters {
2323
vector[2] lp_parts[p];
@@ -37,8 +37,8 @@ transformed parameters {
3737
// Data are Binomial with Rate Given by
3838
// Each Person’s Group Assignment
3939
for (i in 1:p) {
40-
lp_parts[i,1] <- log1m(phi) + binomial_log(k[i], n, theta[1,i]);
41-
lp_parts[i,2] <- log(phi) + binomial_log(k[i], n, theta[2,i]);
40+
lp_parts[i,1] <- log1m(phi) + beta_log(theta[i], alpha[1], beta[1]);
41+
lp_parts[i,2] <- log(phi) + beta_log(theta[i], alpha[2], beta[2]);
4242
}
4343
}
4444
model {
@@ -48,11 +48,10 @@ model {
4848
// Relatively Uninformative Prior on Base Rate
4949
phi ~ beta(5, 5);
5050
51-
theta[1] ~ beta(alpha[1], beta[1]);
52-
theta[2] ~ beta(alpha[2], beta[2]);
53-
5451
for (i in 1:p)
5552
increment_log_prob(log_sum_exp(lp_parts[i]));
53+
54+
k ~ binomial(n, theta);
5655
}
5756
generated quantities {
5857
int<lower=0,upper=1> z[p];
@@ -81,9 +80,9 @@ data <- list(p=p, k=k, n=n, truth=truth) # To be passed on to Stan
8180

8281
myinits <- list(
8382
list(mudiff=.1, phi=.5, mubon=.5, lambdabon=30, lambdache=25,
84-
theta=matrix(rep(.5, 2 * p), 2, p)),
83+
theta=rep(.5, p)),
8584
list(mudiff=.15, phi=.5, mubon=.5, lambdabon=25, lambdache=30,
86-
theta=matrix(rep(.5, 2 * p), 2, p)))
85+
theta=rep(.5, p)))
8786

8887
# Parameters to be monitored:
8988
parameters <- c("theta", "z", "mubon", "lambdabon", "muche", "lambdache",
@@ -95,10 +94,10 @@ samples <- stan(model_code=model,
9594
data=data,
9695
init=myinits, # If not specified, gives random inits
9796
pars=parameters,
98-
iter=30000,
97+
iter=1000,
9998
chains=2,
10099
thin=1,
101-
# warmup = 100, # Stands for burn-in; Default = iter/2
100+
warmup = 200, # Stands for burn-in; Default = iter/2
102101
# seed = 123 # Setting seed; Default is random seed
103102
)
104103
# Now the values for the monitored parameters are in the "samples" object,
@@ -149,9 +148,9 @@ polygon(c(0,pc.dens$y,0,0), c(pc.dens$x[1]-.01,pc.dens$x,pc.dens$x[1]+.01,
149148
windows()
150149
par(cex.main = 1.5, mar = c(5, 6, 4, 5) + 0.1, mgp = c(3.5, 1, 0), cex.lab = 1.5,
151150
font.lab = 2, cex.axis = 1.3, bty = "n", las=1)
152-
plot(k,summary(samples)$summary[237:354, 1],ylim=c(0,1),xlim=c(0,n),
153-
xlab= "Number of Items Recalled Correctly", ylab="Cheater Classification",
154-
lwd=2, pch=4)
151+
plot(k,summary(samples)$summary[paste("z[", 1:118, "]", sep=""), 1], ylim=c(0,1),
152+
xlim=c(0,n), lwd=2, pch=4, xlab= "Number of Items Recalled Correctly",
153+
ylab="Cheater Classification")
155154
# in the code, z=0 is bonafide and z=1 is cheating
156155
# so z gives the prob of being assigned to cheating group
157156

0 commit comments

Comments
 (0)