24/03/2021
There are also four optional blocks
functions
transformed data
transformed parameters
generated quantities
dat<-iris[iris$Species=="setosa",c("Petal.Length", "Sepal.Length")] mod <- lm(Petal.Length ~ Sepal.Length, data = dat) dat <- transform(dat, Fitted = fitted(mod)) ggplot(dat, aes(x=Sepal.Length, y=Petal.Length)) + geom_point(color="red") + geom_smooth(se=FALSE, method = "lm") + geom_segment(aes(x = Sepal.Length, y = Petal.Length, xend = Sepal.Length, yend = Fitted))
## `geom_smooth()` using formula 'y ~ x'
write("// Stan model for simple linear regression data { int < lower = 1 > N; // Sample size vector[N] x; // Predictor vector[N] y; // Outcome } parameters { real alpha; // Intercept real beta; // Slope (regression coefficients) real < lower = 0 > sigma; // Error SD } model { y ~ normal(alpha + x * beta , sigma); } generated quantities { } // The posterior predictive distribution", "stan_model1.stan")
stanc("stan_model1.stan")$status# Check if we wrote a file
## [1] TRUE
stan_model1 <- "stan_model1.stan" # save the file path stan_data<-list(y=dat$Petal.Length, x=dat$Sepal.Length, N=nrow(dat))#prepare the data fit <- stan(file = stan_model1, data = stan_data, warmup = 500, iter = 1000, chains = 4, cores = 2, thin = 1)#running the model
print(fit)#basic summary stat
## Inference for Stan model: stan_model1. ## 4 chains, each with iter=1000; warmup=500; thin=1; ## post-warmup draws per chain=500, total post-warmup draws=2000. ## ## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat ## alpha 0.82 0.01 0.35 0.12 0.59 0.83 1.06 1.52 582 1.00 ## beta 0.13 0.00 0.07 -0.01 0.08 0.13 0.17 0.27 582 1.00 ## sigma 0.17 0.00 0.02 0.14 0.16 0.17 0.18 0.21 634 1.01 ## lp__ 61.53 0.05 1.31 58.04 60.93 61.89 62.50 62.99 588 1.00 ## ## Samples were drawn using NUTS(diag_e) at Fri Mar 26 12:10:25 2021. ## For each parameter, n_eff is a crude measure of effective sample size, ## and Rhat is the potential scale reduction factor on split chains (at ## convergence, Rhat=1).
posterior <- rstan::extract(fit) plot(stan_data$y~ stan_data$x, pch = 20,type="n") for (i in 1:500) { abline(posterior$alpha[i], posterior$beta[i], col = "gray", lty = 1) } abline(mean(posterior$alpha), mean(posterior$beta), col = 6, lw = 2)
write("// Stan model for simple linear regression data { int < lower = 1 > N; // Sample size vector[N] x; // Predictor vector[N] y; // Outcome } parameters { real alpha; // Intercept real beta; // Slope (regression coefficients) real < lower = 0 > sigma; // Error SD } model { alpha ~ normal(10, 0.1); beta ~ normal(1, 0.1); y ~ normal(alpha + x * beta , sigma); } generated quantities { } // The posterior predictive distribution", "stan_model2.stan") stan_model2 <-"stan_model2.stan" # save the file path
traceplot(fit)
stan_dens(fit)
write("// Stan model for simple linear regression data { ... } parameters { ... } model { y ~ normal(x * beta + alpha, sigma); } generated quantities { real y_rep[N]; for (n in 1:N) { y_rep[n] = normal_rng(x[n] * beta + alpha, sigma); } }","stan_model2_GQ.stan")
stan_model2_GQ <- "stan_model2_GQ.stan" fit3 <- stan(file = stan_model2_GQ, data = stan_data, warmup = 500, iter = 1000, chains = 4, cores = 2, thin = 1)#running the model
y_rep <- as.matrix(fit3, pars = "y_rep") ppc_dens_overlay(stan_data$y, y_rep[1:200, ])
str(sleepstudy)
## 'data.frame': 180 obs. of 3 variables: ## $ Reaction: num 250 259 251 321 357 ... ## $ Days : num 0 1 2 3 4 5 6 7 8 9 ... ## $ Subject : Factor w/ 18 levels "308","309","310",..: 1 1 1 1 1 1 1 1 1 1 ...
\[y_{ij} = \beta_0 + u_{0j} + \left( \beta_1 + u_{1j} \right) \cdot {\rm{Days}} + e_i\] \[\left[ {\begin{array}{*{20}{c}} {{u_0}}\\ {{u_1}} \end{array}} \right] \sim\cal N \left( {\left[ {\begin{array}{*{20}{c}} 0\\ 0 \end{array}} \right],\Omega = \left[ {\begin{array}{*{20}{c}} {\sigma _0^2}&{{\mathop{\rm cov}} \left( {{u_0},{u_1}} \right)}\\ {{\mathop{\rm cov}} \left( {{u_0},{u_1}} \right)}&{\sigma _1^2} \end{array}} \right]} \right)\]
Data block
data { int<lower=1> N; //number of observations real RT[N]; //reaction times int<lower=0,upper=9> Days[N]; //predictor (days of sleep deprivation) // grouping factor int<lower=1> J; //number of subjects int<lower=1,upper=J> Subject[N]; //subject id }
Parameter block
parameters { vector[2] beta; // fixed-effects parameters real<lower=0> sigma_e; // residual std vector<lower=0>[2] sigma_u; // random effects standard deviations // declare L_u to be the Choleski factor of a 2x2 correlation matrix cholesky_factor_corr[2] L_u; matrix[2,J] z_u; // random effect matrix } transformed parameters { // this transform random effects so that they have the correlation // matrix specified by the correlation matrix above matrix[2,J] u; u = diag_pre_multiply(sigma_u, L_u) * z_u; }
Model block
model { real mu; // conditional mean of the dependent variable //priors L_u ~ lkj_corr_cholesky(1.5); // LKJ prior for the correlation matrix to_vector(z_u) ~ normal(0,2); sigma_e ~ normal(0, 5); // prior for residual standard deviation beta[1] ~ normal(0.3, 0.5); // prior for fixed-effect intercept beta[2] ~ normal(0.2, 2); // prior for fixed-effect slope //likelihood for (i in 1:N){ mu = beta[1] + u[1,Subject[i]] + (beta[2] + u[2,Subject[i]])*Days[i]; RT[i] ~ normal(mu, sigma_e); } }
Generated quantities block
generated quantities { matrix[2, 2] Omega; Omega = L_u * L_u' ; // so that it return the correlation matrix }
Prepare Data
d_stan <- list(Subject = as.numeric(factor(sleepstudy$Subject, labels = 1:length(unique(sleepstudy$Subject)))), Days = sleepstudy$Days, RT = sleepstudy$Reaction/1000, N = nrow(sleepstudy), J = length(unique(sleepstudy$Subject)))
sleep_model <- stan(file = "sleep_model.stan", data = d_stan, iter = 2000, chains = 4)
## ## SAMPLING FOR MODEL 'sleep_model' NOW (CHAIN 1). ## Chain 1: ## Chain 1: Gradient evaluation took 0 seconds ## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds. ## Chain 1: Adjust your expectations accordingly! ## Chain 1: ## Chain 1: ## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup) ## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup) ## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup) ## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup) ## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup) ## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup) ## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling) ## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling) ## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling) ## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling) ## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling) ## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling) ## Chain 1: ## Chain 1: Elapsed Time: 9.186 seconds (Warm-up) ## Chain 1: 3.699 seconds (Sampling) ## Chain 1: 12.885 seconds (Total) ## Chain 1: ## ## SAMPLING FOR MODEL 'sleep_model' NOW (CHAIN 2). ## Chain 2: ## Chain 2: Gradient evaluation took 0 seconds ## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds. ## Chain 2: Adjust your expectations accordingly! ## Chain 2: ## Chain 2: ## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup) ## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup) ## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup) ## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup) ## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup) ## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup) ## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling) ## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling) ## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling) ## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling) ## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling) ## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling) ## Chain 2: ## Chain 2: Elapsed Time: 9.159 seconds (Warm-up) ## Chain 2: 3.107 seconds (Sampling) ## Chain 2: 12.266 seconds (Total) ## Chain 2: ## ## SAMPLING FOR MODEL 'sleep_model' NOW (CHAIN 3). ## Chain 3: ## Chain 3: Gradient evaluation took 0 seconds ## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds. ## Chain 3: Adjust your expectations accordingly! ## Chain 3: ## Chain 3: ## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup) ## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup) ## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup) ## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup) ## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup) ## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup) ## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling) ## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling) ## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling) ## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling) ## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling) ## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling) ## Chain 3: ## Chain 3: Elapsed Time: 9.346 seconds (Warm-up) ## Chain 3: 3.539 seconds (Sampling) ## Chain 3: 12.885 seconds (Total) ## Chain 3: ## ## SAMPLING FOR MODEL 'sleep_model' NOW (CHAIN 4). ## Chain 4: ## Chain 4: Gradient evaluation took 0 seconds ## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds. ## Chain 4: Adjust your expectations accordingly! ## Chain 4: ## Chain 4: ## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup) ## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup) ## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup) ## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup) ## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup) ## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup) ## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling) ## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling) ## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling) ## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling) ## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling) ## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling) ## Chain 4: ## Chain 4: Elapsed Time: 9.688 seconds (Warm-up) ## Chain 4: 3.528 seconds (Sampling) ## Chain 4: 13.216 seconds (Total) ## Chain 4:
print(sleep_model, pars = c("beta"), probs = c(0.025, 0.975), digits = 3)
## Inference for Stan model: sleep_model. ## 4 chains, each with iter=2000; warmup=1000; thin=1; ## post-warmup draws per chain=1000, total post-warmup draws=4000. ## ## mean se_mean sd 2.5% 97.5% n_eff Rhat ## beta[1] 0.251 0 0.008 0.236 0.266 2247 1 ## beta[2] 0.010 0 0.002 0.007 0.014 2317 1 ## ## Samples were drawn using NUTS(diag_e) at Fri Mar 26 12:12:30 2021. ## For each parameter, n_eff is a crude measure of effective sample size, ## and Rhat is the potential scale reduction factor on split chains (at ## convergence, Rhat=1).
plot(sleep_model, plotfun = "hist", pars = c("beta", "sigma_u"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(sleep_model, pars = c("Omega"), digits = 3)
## Inference for Stan model: sleep_model. ## 4 chains, each with iter=2000; warmup=1000; thin=1; ## post-warmup draws per chain=1000, total post-warmup draws=4000. ## ## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat ## Omega[1,1] 1.000 NaN 0.000 1.000 1.000 1.000 1.000 1.000 NaN NaN ## Omega[1,2] 0.077 0.009 0.292 -0.479 -0.132 0.077 0.283 0.643 1125 1.002 ## Omega[2,1] 0.077 0.009 0.292 -0.479 -0.132 0.077 0.283 0.643 1125 1.002 ## Omega[2,2] 1.000 0.000 0.000 1.000 1.000 1.000 1.000 1.000 3869 0.999 ## ## Samples were drawn using NUTS(diag_e) at Fri Mar 26 12:12:30 2021. ## For each parameter, n_eff is a crude measure of effective sample size, ## and Rhat is the potential scale reduction factor on split chains (at ## convergence, Rhat=1).
library(brms) # fit linear regression fit_lm<-brm(y~x,data=dat) #Fit mixed effect model fit_lmm<-brm(Reaction~Days+(Days|Subject), data=sleepstudy)
https://cran.r-project.org/web/packages/brms/index.html
```