Video #4 code

Categories, curves, and splines

Published

September 14, 2022

library(tidyverse)
library(broom)
library(brms)
library(tidybayes)
library(ggdist)
library(ggdag)
library(splines)
library(lubridate)
library(patchwork)

# Plot stuff
clrs <- MetBrewer::met.brewer("Lakota", 6)
theme_set(theme_bw())

# Seed stuff
BAYES_SEED <- 1234
set.seed(1234)

# Data
data(Howell1, package = "rethinking")

d <- Howell1 %>% 
  filter(age > 18) %>% 
  # Stan doesn't like working with columns with attributes, but I want to keep
  # the attributes for unscaling later, so there are two scaled height columns
  mutate(height_scaled = scale(height),
         height_z = as.numeric(height_scaled)) %>% 
  mutate(sex = factor(male),
         sex_nice = factor(male, labels = c("Female", "Male")))

height_scale <- attributes(d$height_scaled) %>% 
  set_names(janitor::make_clean_names(names(.)))

head(Howell1)
##    height   weight age male
## 1 151.765 47.82561  63    1
## 2 139.700 36.48581  63    0
## 3 136.525 31.86484  65    0
## 4 156.845 53.04191  41    1
## 5 145.415 41.27687  51    0
## 6 163.830 62.99259  35    1

ggplot(d, aes(x = height, y = weight, color = sex_nice)) +
  geom_point() +
  scale_color_manual(values = clrs[1:2]) +
  labs(x = "Height (cm)", y = "Weight (kg)", color = NULL) +
  theme(legend.position = "bottom")

Sex only

\[ \begin{aligned} W_i &\sim \mathcal{N}(\mu_i, \sigma) \\ \mu_i &= \alpha_{S[i]} \\ \\ \alpha_j &\sim \mathcal{N}(60, 10) \\ \sigma &\sim \operatorname{Uniform}(0, 10) \end{aligned} \]

brms
Stan

Create a model with no intercept; use a factor version of sex to get the indexes like he does with \(\alpha_{S[i]}\).

priors <- c(prior(normal(60, 10), class = b),
            prior(uniform(0, 10), class = sigma, lb = 0, ub = 10))

sex_weight <- brm(
  bf(weight ~ 0 + sex),
  data = d,
  family = gaussian(),
  prior = priors,
  chains = 4, cores = 4, seed = BAYES_SEED
)
## Compiling Stan program...
## Trying to compile a simple C file
## Start sampling

sex_weight
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: weight ~ 0 + sex 
##    Data: d (Number of observations: 346) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Population-Level Effects: 
##      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sex0    41.87      0.41    41.08    42.65 1.00     3580     3109
## sex1    48.63      0.43    47.81    49.49 1.00     4090     2920
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     5.52      0.22     5.12     5.98 1.00     4135     2820
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Posterior mean weights:

sw_post_means <- sex_weight %>% 
  gather_draws(b_sex0, b_sex1)

sw_post_means %>% 
  mean_hdci()
## # A tibble: 2 × 7
##   .variable .value .lower .upper .width .point .interval
##   <chr>      <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
## 1 b_sex0      41.9   41.1   42.7   0.95 mean   hdci     
## 2 b_sex1      48.6   47.7   49.4   0.95 mean   hdci

sw_post_means %>% 
  ggplot(aes(x = .value, fill = .variable)) +
  stat_halfeye() +
  scale_fill_manual(values = clrs[1:2]) +
  labs(x = "Posterior mean weight (kg)\n(Coefficient for sex)", y = "Density", fill = NULL) +
  theme(legend.position = "bottom")

Posterior mean contrast in weights:

sw_post_means_wide <- sex_weight %>% 
  spread_draws(b_sex0, b_sex1) %>% 
  mutate(diff = b_sex1 - b_sex0)

sw_post_means_wide %>% 
  select(diff) %>% 
  mean_hdci()
## # A tibble: 1 × 6
##    diff .lower .upper .width .point .interval
##   <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
## 1  6.77   5.63   7.95   0.95 mean   hdci

sw_post_means_wide %>% 
  ggplot(aes(x = diff)) +
  stat_halfeye(fill = clrs[3]) +
  labs(x = "Posterior mean weight contrast (kg)\nWomen − Men", y = "Density")

Posterior predicted weights:

sw_post_pred <- tibble(sex = c("0", "1")) %>% 
  add_predicted_draws(sex_weight, ndraws = 1000)

sw_post_pred %>% 
  mean_hdci()
## # A tibble: 2 × 8
##   sex    .row .prediction .lower .upper .width .point .interval
##   <chr> <int>       <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
## 1 0         1        41.6   30.7   50.9   0.95 mean   hdci     
## 2 1         2        49.0   38.2   59.3   0.95 mean   hdci

sw_post_pred %>% 
  ungroup() %>% 
  mutate(sex_nice = factor(sex, labels = c("Female", "Male"))) %>% 
  ggplot(aes(x = .prediction, fill = sex_nice)) +
  stat_halfeye(alpha = 0.75) +
  scale_fill_manual(values = clrs[1:2]) +
  labs(x = "Posterior predicted weight (kg)", y = "Density", fill = NULL) +
  theme(legend.position = "bottom")

Posterior predicted contrast in weights:

sw_post_pred_diff <- tibble(sex = c("0", "1")) %>% 
  add_predicted_draws(sex_weight, ndraws = 1000) %>% 
  compare_levels(variable = .prediction, by = sex)

sw_post_pred_diff %>% 
  mean_hdci()
## # A tibble: 1 × 7
##   sex   .prediction .lower .upper .width .point .interval
##   <chr>       <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
## 1 1 - 0        7.21  -7.29   21.9   0.95 mean   hdci

sw_post_pred_diff %>% 
  ggplot(aes(x = .prediction)) +
  stat_halfeye(aes(fill = stat(x > 0))) +
  geom_vline(xintercept = 0) +
  scale_fill_manual(values = c(colorspace::lighten(clrs[3], 0.5), clrs[3]),
                    guide = "none") +
  labs(x = "Posterior weight contrast (kg)\nWomen − Men", y = "Density")

sw_stan.stan

data {
  // Stuff from R
  int<lower=1> n;
  vector[n] weight;
  int sex[n];
}

parameters {
  // Things to estimate
  real<lower=0, upper=10> sigma;
  vector[2] a;
}

transformed parameters {
  vector[n] mu;
  mu = a[sex];
}

model {
  // Likelihood
  weight ~ normal(mu, sigma);
  
  // Priors
  sigma ~ uniform(0, 10);
  a ~ normal(60, 10);
}

generated quantities {
  real diff;
  matrix[n, 2] weight_rep;
  vector[n] diff_rep;
  
  // Calculate the contrasts / difference between group means
  diff = a[2] - a[1];
  
  // Generate a posterior predictive distribution for each sex
  // To do this we have to create a matrix, with a column per sex
  for (j in 1:2) {
    for (i in 1:n) {
      weight_rep[i, j] = normal_rng(a[j], sigma);
    }
  }
  
  // Generate a posterior predictive distribution of group contrasts
  for (i in 1:n) {
    diff_rep[i] = normal_rng(a[2], sigma) - normal_rng(a[1], sigma);
  }
}

stan_data <- list(n = nrow(d),
                  weight = d$weight,
                  sex = d$male + 1)

model_sw_stan <- rstan::sampling(
  object = sw_stan,
  data = stan_data,
  iter = 5000, warmup = 1000, seed = BAYES_SEED, chains = 4, cores = 4
)

print(model_sw_stan,
      pars = c("sigma", "a[1]", "a[2]", "diff"))
## Inference for Stan model: ce4f92ccf1e48f603c1b01a6bf1ee94d.
## 4 chains, each with iter=5000; warmup=1000; thin=1; 
## post-warmup draws per chain=4000, total post-warmup draws=16000.
## 
##        mean se_mean   sd  2.5%   25%   50%   75% 97.5% n_eff Rhat
## sigma  5.52       0 0.21  5.12  5.37  5.51  5.66  5.96 14336    1
## a[1]  41.86       0 0.41 41.05 41.58 41.86 42.14 42.67 14671    1
## a[2]  48.63       0 0.43 47.79 48.34 48.64 48.92 49.49 15314    1
## diff   6.77       0 0.60  5.60  6.37  6.77  7.17  7.94 14697    1
## 
## Samples were drawn using NUTS(diag_e) at Thu Sep 15 01:54:27 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Posterior mean weights:

sw_stan_post_means <- model_sw_stan %>% 
  gather_draws(a[i])
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

sw_stan_post_means %>% 
  mean_hdci()
## # A tibble: 2 × 8
##       i .variable .value .lower .upper .width .point .interval
##   <int> <chr>      <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
## 1     1 a           41.9   41.1   42.7   0.95 mean   hdci     
## 2     2 a           48.6   47.7   49.4   0.95 mean   hdci

sw_stan_post_means %>% 
  ungroup() %>% 
  mutate(nice_i = factor(i, labels = c("a_female", "a_male"))) %>% 
  ggplot(aes(x = .value, fill = nice_i)) +
  stat_halfeye() +
  scale_fill_manual(values = clrs[1:2]) +
  labs(x = "Posterior mean weight (kg)\n(Coefficient for sex)", y = "Density", fill = NULL) +
  theme(legend.position = "bottom")

Posterior mean contrast in weights:

sw_stan_post_diff_means <- model_sw_stan %>% 
  gather_draws(diff)

sw_stan_post_diff_means %>% 
  mean_hdci()
## # A tibble: 1 × 7
##   .variable .value .lower .upper .width .point .interval
##   <chr>      <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
## 1 diff        6.77   5.58   7.92   0.95 mean   hdci

sw_stan_post_diff_means %>% 
  ggplot(aes(x = .value)) +
  stat_halfeye(fill = clrs[3]) +
  labs(x = "Posterior mean weight contrast (kg)\nWomen − Men", y = "Density")

Posterior predicted weights:

predicted_weights_stan <- model_sw_stan %>% 
  spread_draws(weight_rep[i, sex])

predicted_weights_stan %>% 
  group_by(sex) %>% 
  mean_hdci()
## # A tibble: 2 × 10
##     sex     i i.lower i.upper weight_rep weight_…¹ weigh…² .width .point .inte…³
##   <int> <dbl>   <int>   <int>      <dbl>     <dbl>   <dbl>  <dbl> <chr>  <chr>  
## 1     1  174.       1     329       41.9      30.9    52.7   0.95 mean   hdci   
## 2     2  174.       1     329       48.6      37.8    59.5   0.95 mean   hdci   
## # … with abbreviated variable names ¹weight_rep.lower, ²weight_rep.upper,
## #   ³.interval

predicted_weights_stan %>% 
  ungroup() %>% 
  mutate(sex_nice = factor(sex, labels = c("Female", "Male"))) %>% 
  ggplot(aes(x = weight_rep, fill = sex_nice)) +
  stat_halfeye(alpha = 0.75) +
  scale_fill_manual(values = clrs[1:2]) +
  labs(x = "Posterior predicted weight (kg)", y = "Density", fill = NULL) +
  theme(legend.position = "bottom")

Posterior predicted contrast in weights:

sw_post_pred_diff_stan <- model_sw_stan %>% 
  gather_draws(diff_rep[i])

sw_post_pred_diff_stan %>%
  group_by(.variable) %>%
  mean_hdci() %>% 
  select(-starts_with("i"))
## # A tibble: 1 × 7
##   .variable .value .value.lower .value.upper .width .point .interval
##   <chr>      <dbl>        <dbl>        <dbl>  <dbl> <chr>  <chr>    
## 1 diff_rep    6.77        -8.63         22.1   0.95 mean   hdci

sw_post_pred_diff_stan %>% 
  ggplot(aes(x = .value)) +
  stat_halfeye(aes(fill = stat(x > 0))) +
  geom_vline(xintercept = 0) +
  scale_fill_manual(values = c(colorspace::lighten(clrs[3], 0.5), clrs[3]),
                    guide = "none") +
  labs(x = "Posterior weight contrast (kg)\nWomen − Men", y = "Density")

Sex + height

\[ \begin{aligned} W_i &\sim \mathcal{N}(\mu_i, \sigma) \\ \mu_i &= \alpha_{S[i]} + \beta_{S[i]}(H_i - \bar{H}) \\ \\ \alpha_j &\sim \mathcal{N}(60, 10) \\ \beta_j &\sim \operatorname{LogNormal}(0, 1) \\ \sigma &\sim \operatorname{Uniform}(0, 10) \end{aligned} \]

brms
Stan

This is the wonkiest syntax ever, but it works! We can hack the nl capabilities of brms to create indexed parameters.

Alternatively you can avoid this nl syntax! Use bf(weight ~ 0 + sex + sex:height_z), instead. Note the : for the interaction term instead of the more standard *. If you use *, you’ll get a more standard interaction term (i.e. the change in the slope when one group is active); if you use :, you’ll get slopes for each group. It’s a little subtlety in R’s formula syntax. The * is a shortcut for complete crossing of the terms, so x * z really turns into x + z + x:z behind the scenes. The : only does the interaction of the two terms, so that x:z is just \(x \times z\).

priors <- c(prior(normal(60, 10), class = b, nlpar = a),
            prior(lognormal(0, 1), class = b, nlpar = b, lb = 0),
            prior(uniform(0, 10), class = sigma, lb = 0, ub = 10))

model_height_sex <- brm(
  bf(weight ~ 0 + a + b * height_z,
     a ~ 0 + sex,
     b ~ 0 + sex,
     nl = TRUE),
  data = d,
  family = gaussian(),
  prior = priors,
  chains = 4, cores = 4, seed = BAYES_SEED
)
## Compiling Stan program...
## Trying to compile a simple C file
## Start sampling

model_height_sex
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: weight ~ 0 + a + b * height_z 
##          a ~ 0 + sex
##          b ~ 0 + sex
##    Data: d (Number of observations: 346) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Population-Level Effects: 
##        Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## a_sex0    45.10      0.46    44.19    45.99 1.00     3021     3038
## a_sex1    45.19      0.47    44.30    46.11 1.00     2828     2799
## b_sex0     4.90      0.50     3.88     5.89 1.00     3018     2442
## b_sex1     4.65      0.45     3.79     5.53 1.00     2964     2861
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     4.28      0.17     3.97     4.62 1.00     3600     2789
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

sex_height_weight_post_pred <- expand_grid(
  height_z = seq(min(d$height_z), max(d$height_z), length.out = 50),
  sex = 0:1
) %>% 
  add_predicted_draws(model_height_sex) %>%
  compare_levels(variable = .prediction, by = sex, comparison = list(c("0", "1"))) %>% 
  mutate(height_unscaled = (height_z * height_scale$scaled_scale) + height_scale$scaled_center)

Overall distribution of predictive posterior contrasts:

ggplot(sex_height_weight_post_pred, aes(x = .prediction)) +
  stat_halfeye(fill = clrs[3]) +
  labs(x = "Posterior mean weight contrast (kg)\nWomen − Men", y = "Density")

Distribution of predictive posterior contrasts across range of heights:

ggplot(sex_height_weight_post_pred, aes(x = height_unscaled, y = .prediction)) +
  stat_lineribbon(aes(fill_ramp = stat(.width)), .width = ppoints(50),
                  fill = clrs[3], color = colorspace::darken(clrs[3], 0.5), 
                  show.legend = FALSE) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_fill_ramp_continuous(from = "transparent", range = c(1, 0)) +
  labs(x = "Height (cm)", y = "Posterior weight contrast (kg)\nWomen − Men")

Extracting Stan code from Rethinking models

The ulam() function is super helpful for translating McElreath’s quap() syntax into Stan!

m_SHW <- rethinking::quap(
  alist(
    W ~ dnorm(mu, sigma),
    mu <- a[S] + b[S] * (H - Hbar),
    a[S] ~ dnorm(60, 10),
    b[S] ~ dlnorm(0, 1),
    sigma ~ dunif(0, 10)
  ), 
  data = list(
    W = d$weight,
    H = d$height,
    Hbar = mean(d$height),
    S = d$male + 1
  )
)

cat(rethinking::ulam(m_SHW, sample = FALSE)$model)
## data{
##     vector[346] W;
##     real Hbar;
##     vector[346] H;
##     int S[346];
## }
## parameters{
##     vector[2] a;
##     vector<lower=0>[2] b;
##     real<lower=0,upper=10> sigma;
## }
## model{
##     vector[346] mu;
##     sigma ~ uniform( 0 , 10 );
##     b ~ lognormal( 0 , 1 );
##     a ~ normal( 60 , 10 );
##     for ( i in 1:346 ) {
##         mu[i] = a[S[i]] + b[S[i]] * (H[i] - Hbar);
##     }
##     W ~ normal( mu , sigma );
## }

sex_height.stan

data {
  // Stuff from R
  int<lower=1> n;
  vector[n] weight;
  vector[n] height;
  int sex[n];
}

transformed data {
  // Center and standardize height
  vector[n] height_z;
  height_z = (height - mean(height)) / sd(height);
}

parameters {
  // Things to estimate
  real<lower=0, upper=10> sigma;
  vector[2] a;
  vector<lower=0>[2] b;
}

model {
  vector[n] mu;
  
  // Model for mu with intercepts (a) and coefficients (b) for each sex
  for (i in 1:n) {
    mu[i] = a[sex[i]] + b[sex[i]] * height_z[i];
  }

  // Likelihood
  weight ~ normal(mu, sigma);
  
  // Priors
  sigma ~ uniform(0, 10);
  a ~ normal(60, 10);
  b ~ lognormal(0, 1);
}

generated quantities {
  matrix[n, 2] weight_rep;
  vector[n] diff_rep;
  
  // Generate a posterior predictive distribution for each sex
  // To do this we have to create a matrix, with a column per sex
  for (j in 1:2) {
    for (i in 1:n) {
      real mu_hat_n = a[sex[i]] + b[sex[i]] * height_z[i];
      weight_rep[i, j] = normal_rng(mu_hat_n, sigma);
    }
  }
  
  // Generate a posterior predictive distribution of group contrasts
  for (i in 1:n) {
    diff_rep[i] = weight_rep[i, 1] - weight_rep[i, 2];
  }
}

stan_data <- list(n = nrow(d),
                  weight = d$weight,
                  height = d$height,
                  sex = d$male + 1)

model_sex_height_stan <- rstan::sampling(
  object = sex_height_stan,
  data = stan_data,
  iter = 5000, warmup = 1000, seed = BAYES_SEED, chains = 4, cores = 4
)

print(model_sex_height_stan,
      pars = c("sigma", "a[1]", "a[2]", "b[1]", "b[2]"))
## Inference for Stan model: f8b7828023f48992dd5011f1f1fa456e.
## 4 chains, each with iter=5000; warmup=1000; thin=1; 
## post-warmup draws per chain=4000, total post-warmup draws=16000.
## 
##        mean se_mean   sd  2.5%   25%   50%   75% 97.5% n_eff Rhat
## sigma  4.28       0 0.17  3.97  4.17  4.28  4.39  4.62 15332    1
## a[1]  45.10       0 0.45 44.22 44.80 45.10 45.41 45.99 10776    1
## a[2]  45.20       0 0.46 44.29 44.89 45.20 45.51 46.09 10499    1
## b[1]   4.91       0 0.49  3.95  4.57  4.91  5.24  5.86 10718    1
## b[2]   4.65       0 0.43  3.80  4.36  4.65  4.95  5.50 10394    1
## 
## Samples were drawn using NUTS(diag_e) at Thu Sep 15 01:55:38 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

original_hw <- tibble(height = d$height,
                      weight = d$weight) %>% 
  mutate(i = 1:n())

predicted_diffs_sex_height_stan <- model_sex_height_stan %>% 
  spread_draws(diff_rep[i]) %>% 
  left_join(original_hw, by = "i")

Overall distribution of predictive posterior contrasts:

ggplot(predicted_diffs_sex_height_stan, aes(x = diff_rep)) +
  stat_halfeye(fill = clrs[3]) +
  labs(x = "Posterior mean weight contrast (kg)\nWomen − Men", y = "Density")

Distribution of predictive posterior contrasts across range of heights:

(The y-values are way off from the video here :shrug:)

ggplot(predicted_diffs_sex_height_stan, aes(x = height, y = diff_rep)) +
  stat_lineribbon(aes(fill_ramp = stat(.width)), .width = ppoints(50),
                  fill = clrs[3], color = colorspace::darken(clrs[3], 0.5), 
                  show.legend = FALSE) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_fill_ramp_continuous(from = "transparent", range = c(1, 0)) +
  labs(x = "Height (cm)", y = "Posterior weight contrast (kg)\nWomen − Men")

Full luxury Bayes!

Given this DAG:

height_sex_dag <- dagify(
  x ~ z,
  y ~ x + z,
  exposure = "x",
  outcome = "y",
  labels = c(x = "Height", y = "Weight", z = "Sex"),
  coords = list(x = c(x = 1, y = 3, z = 2),
                y = c(x = 1, y = 1, z = 2))) %>% 
  tidy_dagitty() %>% 
  node_status()

ggplot(height_sex_dag, aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_dag_edges() +
  geom_dag_point(aes(color = status)) +
  geom_dag_text(aes(label = label), size = 3.5) +
  scale_color_manual(values = clrs[c(1, 4)], guide = "none") +
  theme_dag()

…what’s the causal effect of sex on weight? Or:

\[ E(\text{Weight} \mid \operatorname{do}(\text{Sex})) \]

Here’s the official model:

\[ \begin{aligned} H_i &\sim \mathcal{N}(\nu_i, \tau) \\ W_i &\sim \mathcal{N}(\mu_i, \sigma) \\ \nu_i &= h_{S[i]} \\ \mu_i &= \alpha_{S[i]} + \beta_{S[i]}(H_i - \bar{H}) \\ \\ h_j &\sim \mathcal{N}(160, 10) \\ \alpha_j &\sim \mathcal{N}(60, 10) \\ \beta_j &\sim \operatorname{LogNormal}(0, 1) \\ \sigma, \tau &\sim \operatorname{Uniform}(0, 10) \end{aligned} \]

The results should look something like this, from the slides:

brms
Stan

priors <- c(prior(normal(60, 10), resp = weight, class = b, nlpar = a),
            prior(lognormal(0, 1), resp = weight, class = b, nlpar = b, lb = 0),
            prior(uniform(0, 10), resp = weight, class = sigma, lb = 0, ub = 10),
            # prior(normal(160, 10), resp = height, class = b),
            prior(normal(0, 1), resp = heightz, class = b),
            prior(uniform(0, 10), resp = heightz, class = sigma, lb = 0, ub = 10))

model_luxury <- brm(
  bf(weight ~ 0 + a + b * height_z,
     a ~ 0 + sex,
     b ~ 0 + sex,
     nl = TRUE) + 
    bf(height_z ~ 0 + sex) + 
    set_rescor(TRUE),
  data = d,
  family = gaussian(),
  prior = priors,
  chains = 4, cores = 4, seed = BAYES_SEED
)
## Compiling Stan program...
## Trying to compile a simple C file
## Start sampling

model_luxury
##  Family: MV(gaussian, gaussian) 
##   Links: mu = identity; sigma = identity
##          mu = identity; sigma = identity 
## Formula: weight ~ 0 + a + b * height_z 
##          a ~ 0 + sex
##          b ~ 0 + sex
##          height_z ~ 0 + sex 
##    Data: d (Number of observations: 346) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Population-Level Effects: 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## weight_a_sex0    42.58      0.64    41.52    44.07 1.01      598      285
## weight_a_sex1    47.97      0.64    46.49    49.02 1.01      552      313
## weight_b_sex0     1.09      0.77     0.17     3.02 1.01      565      255
## weight_b_sex1     0.89      0.70     0.12     2.73 1.01      491      275
## heightz_sex0     -0.66      0.05    -0.77    -0.55 1.00     2384     1681
## heightz_sex1      0.74      0.06     0.62     0.85 1.00     2806     2528
## 
## Family Specific Parameters: 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma_weight      5.13      0.29     4.49     5.67 1.01      666      291
## sigma_heightz     0.72      0.03     0.67     0.77 1.00     3262     1965
## 
## Residual Correlations: 
##                        Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## rescor(weight,heightz)     0.54      0.09     0.32     0.65 1.01      560
##                        Tail_ESS
## rescor(weight,heightz)      265
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Posterior mean contrast in weights:

luxury_post_mean_diff <- expand_grid(
  height_z = seq(min(d$height_z), max(d$height_z), length.out = 50),
  sex = 0:1
) %>% 
  add_epred_draws(model_luxury) %>%
  compare_levels(variable = .epred, by = sex, comparison = list(c("1", "0")))

luxury_post_mean_diff %>% 
  filter(.category == "weight") %>% 
  ggplot(aes(x = .epred)) +
  stat_halfeye(fill = clrs[3]) +
  labs(x = "Posterior mean weight contrast (kg)\nWomen − Men", y = "Density")

Posterior predicted contrast in weights:

luxury_post_pred_diff <- expand_grid(
  height_z = seq(min(d$height_z), max(d$height_z), length.out = 50),
  sex = 0:1
) %>% 
  add_predicted_draws(model_luxury) %>%
  compare_levels(variable = .prediction, by = sex, comparison = list(c("1", "0")))

luxury_post_pred_diff %>% 
  filter(.category == "weight") %>% 
  ggplot(aes(x = .prediction)) +
  stat_halfeye(aes(fill = stat(x > 0))) +
  geom_vline(xintercept = 0) +
  scale_fill_manual(values = c(colorspace::lighten(clrs[3], 0.5), clrs[3]),
                    guide = "none") +
  labs(x = "Posterior weight contrast (kg)\nWomen − Men", y = "Density")

Extracting Stan code from Rethinking models

m_SHW_full <- rethinking::quap(
  alist(
    # Weight
    W ~ dnorm(mu, sigma),
    mu <- a[S] + b[S] * (H - Hbar),
    a[S] ~ dnorm(60, 10),
    b[S] ~ dlnorm(0, 1),
    sigma ~ dunif(0, 10),
    
    # Height
    H ~ dnorm(nu, tau),
    nu <- h[S],
    h[S] ~ dnorm(160, 10),
    tau ~ dunif(0, 10)
  ), data = list(
    W = d$weight,
    H = d$height,
    Hbar = mean(d$height),
    S = d$male + 1
  )
)

cat(rethinking::ulam(m_SHW_full, sample = FALSE)$model)

luxury_stan.stan

data {
  // Stuff from R
  int<lower=1> n;
  vector[n] weight;
  real Hbar;
  vector[n] height;
  int sex[n];
}

parameters {
  // Things to estimate
  vector[2] a;
  vector<lower=0>[2] b;
  real<lower=0,upper=10> sigma;
  vector[2] h;
  real<lower=0,upper=10> tau;
}

model {
  vector[n] mu;
  vector[n] nu;
  
  // Height model
  tau ~ uniform(0, 10);
  h ~ normal(160, 10);
  
  for (i in 1:n) {
    nu[i] = h[sex[i]];
  }
  
  // Weight model
  height ~ normal(nu , tau);
  sigma ~ uniform(0, 10);
  b ~ lognormal(0, 1);
  a ~ normal(60, 10);
  
  for (i in 1:n) {
    mu[i] = a[sex[i]] + b[sex[i]] * (height[i] - Hbar);
  }
  
  weight ~ normal(mu, sigma);
}

generated quantities {
  matrix[n, 2] weight_rep;
  matrix[n, 2] height_rep;
  vector[n] w_do_s;
  vector[2] mu_sex;
  real mu_diff;
  
  for (i in 1:2) {
    mu_sex[i] = a[sex[i]] + b[sex[i]] * (h[sex[i]] - Hbar);
  }
  
  mu_diff = mu_sex[1] - mu_sex[2];
  
  // Generate a posterior predictive distribution for each sex
  // To do this we have to create a matrix, with a column per sex
  for (j in 1:2) {
    for (i in 1:n) {
      height_rep[i, j] = normal_rng(h[sex[j]], tau);
      weight_rep[i, j] = normal_rng(a[sex[j]] + b[sex[j]] * (height_rep[i, j] - Hbar), sigma);
    }
  }
  
  // Generate a posterior predictive distribution of group contrasts
  for (i in 1:n) {
    w_do_s[i] = weight_rep[i, 1] - weight_rep[i, 2];
  }
}

stan_data <- list(n = nrow(d),
                  weight = d$weight,
                  height = d$height,
                  Hbar = mean(d$height),
                  sex = d$male + 1)

model_luxury_stan <- rstan::sampling(
  object = luxury_stan,
  data = stan_data,
  iter = 5000, warmup = 1000, seed = BAYES_SEED, chains = 4, cores = 4
)

print(model_luxury_stan,
      pars = c("a[1]", "a[2]", "b[1]", "b[2]", "sigma", "h[1]", "h[2]", "tau", 
               "mu_sex[1]", "mu_sex[2]", "mu_diff"))
## Inference for Stan model: 2ac72225665508fb4100c7f800818c3a.
## 4 chains, each with iter=5000; warmup=1000; thin=1; 
## post-warmup draws per chain=4000, total post-warmup draws=16000.
## 
##             mean se_mean   sd   2.5%    25%    50%    75%  97.5% n_eff Rhat
## a[1]       45.18       0 0.46  44.28  44.87  45.17  45.48  46.08 12392    1
## a[2]       45.14       0 0.46  44.25  44.83  45.14  45.45  46.03 12553    1
## b[1]        0.65       0 0.06   0.52   0.60   0.65   0.69   0.77 12075    1
## b[2]        0.61       0 0.06   0.50   0.57   0.61   0.65   0.72 13048    1
## sigma       4.28       0 0.16   3.97   4.17   4.28   4.39   4.62 21039    1
## h[1]      149.50       0 0.41 148.70 149.21 149.50 149.78 150.30 22463    1
## h[2]      160.37       0 0.44 159.52 160.08 160.37 160.67 161.22 22320    1
## tau         5.57       0 0.21   5.18   5.43   5.56   5.71   6.01 19301    1
## mu_sex[1]  48.63       0 0.42  47.79  48.35  48.63  48.92  49.46 21139    1
## mu_sex[2]  41.85       0 0.42  41.04  41.57  41.85  42.13  42.67 21824    1
## mu_diff     6.78       0 0.59   5.62   6.37   6.78   7.18   7.94 20965    1
## 
## Samples were drawn using NUTS(diag_e) at Thu Sep 15 01:56:43 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

Posterior mean contrast in weights:

luxury_post_mean_diff_stan <- model_luxury_stan %>% 
  spread_draws(mu_diff)

luxury_post_mean_diff_stan %>% 
  mean_hdci()
## # A tibble: 1 × 6
##   mu_diff .lower .upper .width .point .interval
##     <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
## 1    6.78   5.64   7.95   0.95 mean   hdci

ggplot(luxury_post_mean_diff_stan, aes(x = mu_diff)) +
  stat_halfeye(fill = clrs[3]) +
  labs(x = "Posterior mean weight contrast (kg)\nWomen − Men", y = "Density")

luxury_post_pred_diff_stan <- model_luxury_stan %>% 
  spread_draws(w_do_s[i])

luxury_post_pred_diff_stan %>% 
  mean_hdci()
## # A tibble: 346 × 7
##        i w_do_s .lower .upper .width .point .interval
##    <int>  <dbl>  <dbl>  <dbl>  <dbl> <chr>  <chr>    
##  1     1   6.79  -8.96   21.5   0.95 mean   hdci     
##  2     2   6.86  -8.69   22.2   0.95 mean   hdci     
##  3     3   6.79  -8.46   22.6   0.95 mean   hdci     
##  4     4   6.86  -9.38   21.3   0.95 mean   hdci     
##  5     5   6.71  -8.26   22.7   0.95 mean   hdci     
##  6     6   6.85  -8.15   22.8   0.95 mean   hdci     
##  7     7   6.78  -8.74   22.4   0.95 mean   hdci     
##  8     8   6.85  -8.36   22.6   0.95 mean   hdci     
##  9     9   6.82  -8.79   21.9   0.95 mean   hdci     
## 10    10   6.80  -8.53   21.8   0.95 mean   hdci     
## # … with 336 more rows

ggplot(luxury_post_pred_diff_stan, aes(x = w_do_s)) +
  stat_halfeye(aes(fill = stat(x > 0))) +
  geom_vline(xintercept = 0) +
  scale_fill_manual(values = c(colorspace::lighten(clrs[3], 0.5), clrs[3]),
                    guide = "none") +
  labs(x = "Posterior weight contrast (kg)\nWomen − Men", y = "Density")

Curvy linear models

The full data isn’t linear, but linear models can be fit to curvy data, but in geocentric, purposely wrong ways

ggplot(Howell1, aes(x = height, y = weight)) +
  geom_point()

Polynomials

We can use a squared term like:

\[ \mu_i = \alpha + \beta_1 H_i + \beta_2 H_i^2 \]

And that fits okay, but it does weird things on the edges of the data, like weight increasing when height gets really small

ggplot(Howell1, aes(x = height, y = weight)) +
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ poly(x, 2), fullrange = TRUE) +
  xlim(c(40, 200))

We can throw in more terms too:

\[ \mu_i = \alpha + \beta_1 H_i + \beta_2 H_i^2 + \beta_3 H_i^3 + \beta_4 H_i^4 \]

And the line fits better, but it does really weird things on the edges, like dropping precipitously after the max height:

ggplot(Howell1, aes(x = height, y = weight)) +
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ poly(x, 4), fullrange = TRUE) +
  xlim(c(40, 200))

Weirdly, logging works really well because of biological reasons (that he’ll explain in chapter 19)

ggplot(Howell1, aes(x = height, y = weight)) +
  geom_point() +
  geom_smooth(method = "glm", formula = y ~ x,
              method.args = list(family = gaussian(link = "log"))) +
  scale_x_log10()

Splines

data(cherry_blossoms, package = "rethinking")

cherry_blossoms <- cherry_blossoms %>% 
  filter(complete.cases(doy)) %>% 
  mutate(idx = 1:n()) %>% 
  arrange(year)

num_knots <- 3
knot_list <- seq(from = min(cherry_blossoms$year), 
                 to = max(cherry_blossoms$year), 
                 length.out = num_knots)

cherry_splines <- cherry_blossoms %>% 
  nest(data = everything()) %>% 
  mutate(basis_matrix = purrr::map(data, ~{
    t(bs(.$year, knots = knot_list, degree = 2, intercept = FALSE))
  })) %>% 
  mutate(weights = purrr::map(basis_matrix, ~rep(c(1,-1), length = nrow(.)))) 

cherry_splines_mu <- cherry_splines %>% 
  mutate(mu = purrr::map2(weights, basis_matrix, ~as.vector(.x %*% .y))) %>% 
  unnest(c(data, mu))

cherry_basis <- cherry_splines %>% 
  unnest(weights) %>% 
  mutate(row = 1:n()) %>% 
  filter(row != 5) %>% 
  mutate(basis = purrr::pmap(list(basis_matrix, weights, row), ~{
    ..1[..3,] * ..2
  })) %>% 
  mutate(row = glue::glue("Basis {row} (w = {weights})")) %>% 
  unnest(c(data, basis)) 
  
ggplot(data = cherry_splines_mu, aes(x = year)) +
  geom_line(aes(y = mu), size = 2) +
  geom_line(data = cherry_basis, aes(y = basis, color = row), size = 1) +
  labs(y = "Weighted basis function", color = NULL) +
  theme(axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.line.x = element_blank())

Plot from book cover, for fun:

model_doy <- brm(
  bf(doy ~ 1 + s(year, bs = "bs", k = 30)),
  family = gaussian(),
  data = cherry_blossoms,
  prior = c(prior(normal(100, 10), class = Intercept),
                prior(normal(0, 10), class = b),
                prior(student_t(3, 0, 5.9), class = sds),
                prior(exponential(1), class = sigma)),
  chains = 4, cores = 4, seed = BAYES_SEED
)

plot_doy <- cherry_blossoms %>% 
  add_epred_draws(model_doy) %>% 
  summarize(mean_hdci(.epred, .width = 0.89)) %>% 
  ungroup() %>% 
  mutate(day_of_year = as.Date(doy, origin = "2021-12-31"))

panel_bottom <- plot_doy %>% 
  ggplot(aes(x = year)) +
  geom_point(aes(y = doy), pch = 8, size = 3.5, color = "#FF4136", alpha = 0.5) +
  geom_ribbon(aes(ymin = ymin, ymax = ymax), fill = "#111111", alpha = 0.5) +
  scale_x_continuous(breaks = c(900, 1400, 2000),
                     labels = function(x) paste(x, "CE")) +
  scale_y_continuous(labels = function(y) format(as.Date(y, origin = "2021-12-31"), "%B %e"),
                     breaks = yday(ymd(c("2022-04-01", "2022-05-01")))) +
  labs(x = NULL, y = "Day of first blossom")
panel_bottom

The cover uses imputed data for the missing values. I’m assuming that’ll get covered later in the book, but in the meantime, here’s a partial version of the top panel of the front cover plot:

panel_top <- cherry_blossoms %>% 
  drop_na(temp) %>% 
  ggplot(aes(x = year, y = temp)) +
  geom_point(color = "#001f3f", size = 3, alpha = 0.3) +
  scale_x_continuous(breaks = c(900, 1400, 2000),
                     labels = function(x) paste(x, "CE")) +
  scale_y_continuous(labels = function(y) paste0(y, "°C"),
                     breaks = c(5, 8)) +
  labs(x = NULL, y = "March temperature")
panel_top

All together:

panel_top / 
  (panel_bottom + 
     theme(axis.text.x = element_blank(),
           axis.ticks.x = element_blank()))