---
title: "Validating smoothbp against brms and mcp"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Validating smoothbp against brms and mcp}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true")
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>",
  eval     = NOT_CRAN,
  fig.width  = 7,
  fig.height = 4,
  dpi        = 96
)
```

## Why this vignette exists

`smoothbp` is a bespoke Metropolis-within-Gibbs sampler for Bayesian
hierarchical piecewise regression with **multiple** logistic-smoothed
change-points. The recovery tests are a self-consistency check — simulator
and likelihood share the same code path. To trust the package one also
wants to see that it agrees with an independent implementation. `brms`
(via Stan) is the natural reference for fitting the same generative model
via NUTS on a custom non-linear formula. `mcp` (via JAGS) is the closest
specialist competitor and provides a complementary perspective: it fits
**hard** (sharp) change-points rather than the logistic-smoothed
transition that `smoothbp` uses.

Four scenarios are compared below:

1. **Intercept-only** — all parameters are scalar (the baseline case).
   Compared against **brms**.
2. **Covariate on omega** — `omega ~ 1 + treatment`. Compared against **brms**.
3. **Hard vs smooth change-point** — single change-point, fixed effects
   only. Compared against **mcp**.
4. **Multi-breakpoint** — two true change-points; smoothbp vs brms,
   showing overlapping posterior distributions.

### One important practical note before we start

When the change-point parameter `omega` drifts outside the range of the
time variable, the smoothing term collapses and `delta1` becomes effectively
unidentified. NUTS chains that initialise in that region can get stuck
there for the entire run, producing an artificially multimodal posterior
that has nothing to do with the data. The clean fix is to bound `omega`
above at `max(tau)` in both samplers, and to initialise brms chains near
the prior mean (which is what smoothbp does internally). Both fixes
applied below.

```{r setup, message = FALSE, warning = FALSE, eval = TRUE}
library(smoothbp)
library(posterior)
library(ggplot2)
library(dplyr)
library(tidyr)

have_brms <- requireNamespace("brms", quietly = TRUE) && NOT_CRAN
if (!have_brms && NOT_CRAN) {
  message("brms not installed -- install.packages('brms') to render the comparison.")
}

have_mcp <- requireNamespace("mcp",   quietly = TRUE) &&
            requireNamespace("rjags", quietly = TRUE) && NOT_CRAN
if (!have_mcp && NOT_CRAN) {
  message("mcp or rjags not installed -- install JAGS then install.packages('mcp') ",
          "to render the mcp comparison.")
}
```

---

# Scenario 1: Intercept-only model

## Simulate one dataset

```{r simulate}
set.seed(31)
dat <- simulate_smoothbp(
  n_subj = 25, n_obs = 10,
  b0 = 5.0, b1 = -0.4, b2 = 1.4,
  omega = 3.2, rho = 4.0,
  sigma = 0.4, sigma_u = 0.7,
  seed = 31L
)
true_params(dat)
```

## Fit with smoothbp

```{r fit-smoothbp}
t0 <- Sys.time()
fit_sbp <- smoothbp(
  formula = y ~ tau,
  b0      = ~ 1 + (1 | subject),
  b1      = ~ 1,
  deltas  = list(~ 1),
  omega   = list(~ 1),
  rho     = list(~ 1),
  data    = dat,
  priors  = smoothbp_priors(
    b0      = prior_normal(0, 10),
    b1      = prior_normal(0, 2),
    omega   = list(prior_normal(3, 2, lb = 0, ub = max(dat$tau))),
    rho     = list(prior_normal(3, 2, lb = 0))
  ),
  chains  = 4L, iter = 2000L, warmup = 1000L,
  seed    = 31L, .verbose = FALSE
)
time_sbp <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
```

## Fit with brms (Stan / NUTS)

```{r fit-brms, eval = have_brms, message = FALSE, warning = FALSE, results = "hide"}
suppressPackageStartupMessages(library(brms))

bf_smoothed <- brms::bf(
  y ~ b0 + b1 * (tau - omega) +
        b2 * (tau - omega) / (1 + exp(-(tau - omega) * rho)),
  b0 ~ 1 + (1 | subject),
  b1 ~ 1, b2 ~ 1, omega ~ 1, rho ~ 1,
  nl = TRUE
)

# brms::prior() captures arguments by non-standard evaluation, so
# `ub = max(dat$tau)` would be embedded literally in the generated Stan
# code. Pre-compute the value and pass it via prior_string() which uses
# standard evaluation and accepts numeric bounds.
ub_omega <- max(dat$tau)

priors_brms <- c(
  brms::prior(normal(0, 10), nlpar = "b0"),
  brms::prior(normal(0, 2),  nlpar = "b1"),
  brms::prior(normal(0, 2),  nlpar = "b2"),
  brms::prior_string("normal(3, 2)", nlpar = "omega",
                     lb = 0, ub = ub_omega),
  brms::prior(normal(3, 2),  nlpar = "rho",   lb = 0)
)

# Initialise every chain near the prior mean.  Without this, Stan's default
# uniform-on-(-2, 2) initialisation lands some chains in the unidentifiable
# omega > max(tau) trap and they never escape.
init_fun <- function() list(
  b_b0    = array(rnorm(1, 5, 1)),
  b_b1    = array(rnorm(1, 0, 0.3)),
  b_b2    = array(rnorm(1, 0, 0.3)),
  b_omega = array(rnorm(1, 3, 0.3)),
  b_rho   = array(rnorm(1, 3, 0.5))
)

t0 <- Sys.time()
fit_brms <- brms::brm(
  bf_smoothed,
  data    = dat,
  prior   = priors_brms,
  chains  = 4, iter = 2000, warmup = 1000,
  seed    = 31, refresh = 0,
  init    = init_fun,
  control = list(adapt_delta = 0.95)
)
time_brms <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
```

## Did brms converge?

```{r brms-convergence, eval = have_brms}
sbp_names <- c(
  "b0_(Intercept)", "b1_(Intercept)", "delta1_(Intercept)",
  "omega1_(Intercept)", "rho1_(Intercept)",
  "sigma", "sigma_u"
)
brms_names <- c(
  "b_b0_Intercept", "b_b1_Intercept", "b_b2_Intercept",
  "b_omega_Intercept", "b_rho_Intercept",
  "sigma", "sd_subject__b0_Intercept"
)

posterior::summarise_draws(
  posterior::as_draws_df(fit_brms), rhat, ess_bulk
) %>%
  dplyr::filter(variable %in% brms_names) %>%
  dplyr::transmute(parameter = sbp_names[match(variable, brms_names)],
                   rhat, ess_bulk) %>%
  knitr::kable(digits = 3,
               caption = "brms convergence on population-level parameters.")
```

If any `rhat` exceeds 1.05, brms has not mixed and the comparison below is
contaminated; do not interpret disagreements as smoothbp errors. With the
bounded `omega` prior and the seeded `init_fun`, all four chains should land
in the same basin.

## Compare posterior summaries

```{r compare-summaries, eval = have_brms}
sbp_draws  <- posterior::as_draws_df(fit_sbp$draws)[, sbp_names]
brms_draws <- as.data.frame(posterior::as_draws_df(fit_brms))[, brms_names]
colnames(brms_draws) <- sbp_names

draws_long <- dplyr::bind_rows(
  tidyr::pivot_longer(sbp_draws,  cols = dplyr::everything(),
                      names_to = "parameter", values_to = "value") %>%
    dplyr::mutate(package = "smoothbp"),
  tidyr::pivot_longer(brms_draws, cols = dplyr::everything(),
                      names_to = "parameter", values_to = "value") %>%
    dplyr::mutate(package = "brms")
)

draws_long %>%
  dplyr::group_by(parameter, package) %>%
  dplyr::summarise(
    mean = mean(value),
    sd   = sd(value),
    q025 = quantile(value, 0.025),
    q975 = quantile(value, 0.975),
    .groups = "drop"
  ) %>%
  tidyr::pivot_wider(
    names_from  = package,
    values_from = c(mean, sd, q025, q975),
    names_glue  = "{package}_{.value}"
  ) %>%
  dplyr::mutate(
    delta_mean_pct = 100 * abs(smoothbp_mean - brms_mean) /
                       (abs(brms_mean) + 1e-8)
  ) %>%
  knitr::kable(digits = 3,
               caption = "Posterior summaries for both packages.")
```

## Overlaid marginal posteriors

```{r overlay, eval = have_brms, fig.height = 5}
ggplot(draws_long, aes(x = value, fill = package, colour = package)) +
  geom_density(alpha = 0.35) +
  facet_wrap(~ parameter, scales = "free", ncol = 3) +
  scale_fill_manual(values  = c(smoothbp = "#1f77b4", brms = "#d62728")) +
  scale_colour_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) +
  labs(
    title    = "Marginal posteriors: smoothbp vs brms",
    subtitle = sprintf(
      "Same simulated data, same priors. smoothbp: %.1fs   brms: %.1fs",
      time_sbp, time_brms
    ),
    x = NULL, y = "density"
  ) +
  theme_minimal(base_size = 11) +
  theme(legend.position = "bottom")
```

## Effective sample size per second

```{r ess, eval = have_brms}
ess_sbp  <- posterior::summarise_draws(fit_sbp$draws, ess_bulk) %>%
  dplyr::filter(variable %in% sbp_names) %>%
  dplyr::transmute(parameter = variable,
                   smoothbp_ess_per_sec = ess_bulk / time_sbp)

ess_brms <- posterior::summarise_draws(
  posterior::as_draws_df(fit_brms), ess_bulk
) %>%
  dplyr::filter(variable %in% brms_names) %>%
  dplyr::mutate(parameter = sbp_names[match(variable, brms_names)]) %>%
  dplyr::transmute(parameter, brms_ess_per_sec = ess_bulk / time_brms)

dplyr::full_join(ess_sbp, ess_brms, by = "parameter") %>%
  knitr::kable(digits = 1,
               caption = "Bulk ESS / second by package and parameter.")
```

---

# Scenario 2: Covariate on omega

This scenario exercises the **HMC-within-Gibbs** sampler path that is
activated when a non-linear parameter (`omega` or `rho`) has two or more
coefficients. Here `omega ~ 1 + treatment` gives a group-specific
change-point location.

## Simulate data with treatment effect on omega

```{r simulate-cov}
set.seed(8147)

n_subj   <- 30
n_obs    <- 10
tau_range <- c(0, 6)

b0_true      <- 5.0
b1_true      <- -0.4
delta_true   <- 1.4
omega_int    <- 3.2
omega_trt    <- -0.8
rho_true     <- 4.0
sigma_true   <- 0.4
sigma_u_true <- 0.7

treatment <- rep(c(0, 1), each = n_subj / 2)
u_j <- rnorm(n_subj, 0, sigma_u_true)
.sigmoid <- function(x) 1 / (1 + exp(-x))

rows <- vector("list", n_subj)
for (j in seq_len(n_subj)) {
  tau_j   <- seq(tau_range[1], tau_range[2], length.out = n_obs)
  omega_j <- omega_int + omega_trt * treatment[j]
  d_j     <- tau_j - omega_j
  s_j     <- .sigmoid(d_j * rho_true)
  mu_j    <- (b0_true + u_j[j]) + b1_true * d_j + delta_true * d_j * s_j
  rows[[j]] <- data.frame(
    subject   = factor(j),
    tau       = tau_j,
    treatment = treatment[j],
    y         = mu_j + rnorm(n_obs, 0, sigma_true)
  )
}
dat_cov <- do.call(rbind, rows)

cat(sprintf("True omega: intercept = %.1f, treatment effect = %.1f\n",
            omega_int, omega_trt))
cat(sprintf("  Control  (treatment = 0): omega = %.1f\n", omega_int))
cat(sprintf("  Treated  (treatment = 1): omega = %.1f\n", omega_int + omega_trt))
```

## Fit with smoothbp

```{r fit-smoothbp-cov}
t0 <- Sys.time()
fit_sbp_cov <- smoothbp(
  formula = y ~ tau,
  b0      = ~ 1 + (1 | subject),
  b1      = ~ 1,
  deltas  = list(~ 1),
  omega   = list(~ 1 + treatment),
  rho     = list(~ 1),
  data    = dat_cov,
  priors  = smoothbp_priors(
    b0    = prior_normal(0, 10),
    b1    = prior_normal(0, 2),
    omega = list(list(
      "(Intercept)" = prior_normal(3, 2, lb = 0, ub = max(dat_cov$tau)),
      "treatment"   = prior_normal(0, 2)
    )),
    rho   = list(prior_normal(3, 2, lb = 0))
  ),
  chains  = 4L, iter = 2000L, warmup = 1000L,
  seed    = 8147L, .verbose = FALSE
)
time_sbp_cov <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
```

## Fit with brms

```{r fit-brms-cov, eval = have_brms, message = FALSE, warning = FALSE, results = "hide"}
bf_cov <- brms::bf(
  y ~ b0 + b1 * (tau - omega) +
        b2 * (tau - omega) / (1 + exp(-(tau - omega) * rho)),
  b0    ~ 1 + (1 | subject),
  b1    ~ 1,
  b2    ~ 1,
  omega ~ 1 + treatment,
  rho   ~ 1,
  nl = TRUE
)

priors_brms_cov <- c(
  brms::prior(normal(0, 10), nlpar = "b0"),
  brms::prior(normal(0, 2),  nlpar = "b1"),
  brms::prior(normal(0, 2),  nlpar = "b2"),
  brms::prior_string("normal(3, 2)", nlpar = "omega", coef = "Intercept"),
  brms::prior(normal(0, 2),  nlpar = "omega", coef = "treatment"),
  brms::prior(normal(3, 2),  nlpar = "rho", lb = 0)
)

init_fun_cov <- function() list(
  b_b0    = array(rnorm(1, 5, 1)),
  b_b1    = array(rnorm(1, 0, 0.3)),
  b_b2    = array(rnorm(1, 0, 0.3)),
  b_omega = array(c(rnorm(1, 3, 0.3), rnorm(1, -0.5, 0.3))),
  b_rho   = array(rnorm(1, 3, 0.5))
)

t0 <- Sys.time()
fit_brms_cov <- brms::brm(
  bf_cov,
  data    = dat_cov,
  prior   = priors_brms_cov,
  chains  = 4, iter = 2000, warmup = 1000,
  seed    = 8147, refresh = 0,
  init    = init_fun_cov,
  control = list(adapt_delta = 0.95)
)
time_brms_cov <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
```

## Compare posteriors

```{r compare-cov, eval = have_brms}
sbp_names_cov <- c(
  "b0_(Intercept)", "b1_(Intercept)", "delta1_(Intercept)",
  "omega1_(Intercept)", "omega1_treatment",
  "rho1_(Intercept)", "sigma", "sigma_u"
)
brms_names_cov <- c(
  "b_b0_Intercept", "b_b1_Intercept", "b_b2_Intercept",
  "b_omega_Intercept", "b_omega_treatment",
  "b_rho_Intercept", "sigma", "sd_subject__b0_Intercept"
)

sbp_draws_cov  <- posterior::as_draws_df(fit_sbp_cov$draws)[, sbp_names_cov]
brms_draws_cov <- as.data.frame(
  posterior::as_draws_df(fit_brms_cov)
)[, brms_names_cov]
colnames(brms_draws_cov) <- sbp_names_cov

draws_long_cov <- dplyr::bind_rows(
  tidyr::pivot_longer(sbp_draws_cov, cols = dplyr::everything(),
                      names_to = "parameter", values_to = "value") %>%
    dplyr::mutate(package = "smoothbp"),
  tidyr::pivot_longer(brms_draws_cov, cols = dplyr::everything(),
                      names_to = "parameter", values_to = "value") %>%
    dplyr::mutate(package = "brms")
)

draws_long_cov %>%
  dplyr::group_by(parameter, package) %>%
  dplyr::summarise(
    mean = mean(value),
    sd   = sd(value),
    q025 = quantile(value, 0.025),
    q975 = quantile(value, 0.975),
    .groups = "drop"
  ) %>%
  tidyr::pivot_wider(
    names_from  = package,
    values_from = c(mean, sd, q025, q975),
    names_glue  = "{package}_{.value}"
  ) %>%
  dplyr::mutate(
    delta_mean_pct = 100 * abs(smoothbp_mean - brms_mean) /
                       (abs(brms_mean) + 1e-8)
  ) %>%
  knitr::kable(digits = 3,
               caption = "Posterior summaries: omega ~ 1 + treatment.")
```

## Overlaid marginal posteriors (covariate model)

```{r overlay-cov, eval = have_brms, fig.height = 5}
true_vals_cov <- data.frame(
  parameter = c("b0_(Intercept)", "b1_(Intercept)", "delta1_(Intercept)",
                 "omega1_(Intercept)", "omega1_treatment",
                 "rho1_(Intercept)", "sigma", "sigma_u"),
  true_value = c(b0_true, b1_true, delta_true,
                  omega_int, omega_trt, rho_true, sigma_true, sigma_u_true)
)

ggplot(draws_long_cov, aes(x = value, fill = package, colour = package)) +
  geom_density(alpha = 0.35) +
  geom_vline(data = true_vals_cov, aes(xintercept = true_value),
             linetype = "dashed", linewidth = 0.5) +
  facet_wrap(~ parameter, scales = "free", ncol = 3) +
  scale_fill_manual(values  = c(smoothbp = "#1f77b4", brms = "#d62728")) +
  scale_colour_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) +
  labs(
    title    = "Marginal posteriors: smoothbp vs brms (omega ~ 1 + treatment)",
    subtitle = sprintf(
      "smoothbp: %.1fs   brms: %.1fs.  Dashed lines = true values.",
      time_sbp_cov, time_brms_cov
    ),
    x = NULL, y = "density"
  ) +
  theme_minimal(base_size = 11) +
  theme(legend.position = "bottom")
```

## Effective sample size per second (covariate model)

```{r ess-cov, eval = have_brms}
ess_sbp_cov  <- posterior::summarise_draws(fit_sbp_cov$draws, ess_bulk) %>%
  dplyr::filter(variable %in% sbp_names_cov) %>%
  dplyr::transmute(parameter = variable,
                   smoothbp_ess_per_sec = ess_bulk / time_sbp_cov)

ess_brms_cov <- posterior::summarise_draws(
  posterior::as_draws_df(fit_brms_cov), ess_bulk
) %>%
  dplyr::filter(variable %in% brms_names_cov) %>%
  dplyr::mutate(parameter = sbp_names_cov[match(variable, brms_names_cov)]) %>%
  dplyr::transmute(parameter, brms_ess_per_sec = ess_bulk / time_brms_cov)

dplyr::full_join(ess_sbp_cov, ess_brms_cov, by = "parameter") %>%
  knitr::kable(digits = 1,
               caption = "Bulk ESS / second by package and parameter (covariate model).")
```

---

# Scenario 3: Hard vs smooth change-point — comparison with mcp

`mcp` (Lindeløv, 2020) is the closest R package to `smoothbp` in terms
of use case: Bayesian piecewise regression with flexible segment
specification and a formula interface. The fundamental modelling
difference is that `mcp` uses a **hard** (step-function) change-point
— the trajectory makes an instantaneous kink at `cp_1` — while
`smoothbp` uses a **logistic sigmoid** to smooth the transition, with
the sharpness parameter `rho` controlling how abrupt it is. As
`rho -> infinity` the smooth transition converges to the hard kink, so
on data generated with large `rho` the two approaches should give
consistent estimates of the change-point location and slopes.

A second structural difference: Both packages support complex hierarchical designs including **random intercepts** and **varying change-points**. For this comparison, we use a fixed-effects dataset to focus on the core change-point estimation.

## Parameterisation note

`mcp`'s two-segment joined-slope model has parameters:

| mcp parameter | smoothbp equivalent |
|---------------|---------------------|
| `cp_1`        | `omega_(Intercept)` (change-point location) |
| `time_1`      | `b1_(Intercept)` (pre-change slope) |
| `time_2`      | `b1_(Intercept) + delta1_(Intercept)` (post-change slope) |
| `int_1`       | `b0_(Intercept) - b1 * omega` (level at tau = 0, not at change-point) |
| `sigma_1`     | `sigma` |

The intercept-level parameters are not directly comparable because
`smoothbp` anchors `b0` at the change-point while `mcp` anchors
`int_1` at `tau = 0`. The slopes and change-point location are
directly comparable.

## Simulate fixed-effects data

```{r mcp-simulate}
set.seed(5501)

dat_mcp <- simulate_smoothbp(
  n_subj    = 1,
  n_obs     = 150,
  b0        = 5.0,
  b1        = -0.4,
  b2        =  1.4,
  omega     =  3.2,
  rho       =  5.0,   # moderately sharp — should agree well with mcp
  sigma     =  0.4,
  sigma_u   =  0.0,   # no random effects
  tau_range = c(0, 6),
  seed      = 5501L
)

# mcp expects a plain data.frame without the factor column
dat_mcp_plain <- data.frame(tau = dat_mcp$tau, y = dat_mcp$y)

cat(sprintf("n = %d  |  true omega = 3.2  |  true b1 = -0.4  |  true delta1 = 1.4\n",
            nrow(dat_mcp_plain)))
```

## Fit with smoothbp (fixed effects, rho free)

```{r mcp-smoothbp}
t0 <- Sys.time()
fit_sbp_mcp <- smoothbp(
  formula = y ~ tau,
  b0      = ~ 1,
  b1      = ~ 1,
  deltas  = list(~ 1),
  omega   = list(~ 1),
  rho     = list(fixed(100)), # Use fixed() to specify a sharp, hard kink
  data    = dat_mcp,
  priors  = smoothbp_priors(
    b0    = prior_normal(0, 10),
    b1    = prior_normal(0, 2),
    omega = list(prior_normal(3, 2, lb = 0, ub = max(dat_mcp$tau)))
  ),
  chains = 4L, iter = 2000L, warmup = 1000L,
  seed = 5501L, .verbose = FALSE
)
time_sbp_mcp <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
cat(sprintf("smoothbp: %.1f s\n", time_sbp_mcp))
```

## Fit with mcp (hard change-point)

```{r mcp-fit, eval = have_mcp, message = FALSE, warning = FALSE, results = "hide"}
# Two-segment joined-slope model: the natural mcp equivalent
model_mcp <- list(
  y ~ 1 + tau,   # segment 1: intercept + pre-change slope
  ~ 0 + tau      # segment 2: joined at cp_1, post-change slope
)

# Restrict cp_1 to the observed time range, matching the smoothbp bound
prior_mcp <- list(cp_1 = "dunif(0, 6)")
set.seed(5501)
t0 <- Sys.time()
fit_mcp <- mcp::mcp(
  model_mcp,
  data  = dat_mcp_plain,
  prior = prior_mcp,
  par_x = "tau",
  chains = 4L, iter = 2000L, adapt = 1000L,
)
time_mcp <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
cat(sprintf("mcp: %.1f s\n", time_mcp))
```

## Compare posteriors

The critical comparison is on `omega` / `cp_1` (change-point location)
and the two slopes. The post-change slope from mcp (`time_2`) is
equivalent to `b1 + delta1` in smoothbp.

```{r mcp-compare, eval = have_mcp}
sbp_sum_mcp <- posterior::summarise_draws(fit_sbp_mcp$draws) |>
  dplyr::filter(variable %in% c("b1_(Intercept)", "delta1_(Intercept)", "omega1_(Intercept)",
                               "rho1_(Intercept)", "sigma"))

mcp_sum <- summary(fit_mcp) |>
  dplyr::filter(name %in% c("cp_1", "int_1", "tau_1", "tau_2", "sigma_1"))

# Compute post-change slope from smoothbp draws for direct comparison
dm_sbp_mcp <- posterior::as_draws_matrix(fit_sbp_mcp$draws)
post_slope <- as.numeric(dm_sbp_mcp[, "b1_(Intercept)"] +
                           dm_sbp_mcp[, "delta1_(Intercept)"])

comparison_mcp <- data.frame(
  parameter          = c("omega / cp_1",
                          "b1 / time_1 (pre-change slope)",
                          "b1+delta1 / time_2 (post-change slope)",
                          "sigma"),
  truth              = c(3.2, -0.4, -0.4 + 1.4, 0.4),
  sbp_mean           = c(
    sbp_sum_mcp$mean[sbp_sum_mcp$variable == "omega1_(Intercept)"],
    sbp_sum_mcp$mean[sbp_sum_mcp$variable == "b1_(Intercept)"],
    mean(post_slope),
    sbp_sum_mcp$mean[sbp_sum_mcp$variable == "sigma"]
  ),
  sbp_lo             = c(
    sbp_sum_mcp$q2.5[sbp_sum_mcp$variable == "omega1_(Intercept)"],
    sbp_sum_mcp$q2.5[sbp_sum_mcp$variable == "b1_(Intercept)"],
    quantile(post_slope, 0.025),
    sbp_sum_mcp$q2.5[sbp_sum_mcp$variable == "sigma"]
  ),
  sbp_hi             = c(
    sbp_sum_mcp$q97.5[sbp_sum_mcp$variable == "omega1_(Intercept)"],
    sbp_sum_mcp$q97.5[sbp_sum_mcp$variable == "b1_(Intercept)"],
    quantile(post_slope, 0.975),
    sbp_sum_mcp$q97.5[sbp_sum_mcp$variable == "sigma"]
  ),
  mcp_mean           = mcp_sum$mean[match(c("cp_1","tau_1","tau_2","sigma_1"),
                                          mcp_sum$name)],
  mcp_lo             = mcp_sum$lower[match(c("cp_1","tau_1","tau_2","sigma_1"),
                                           mcp_sum$name)],
  mcp_hi             = mcp_sum$upper[match(c("cp_1","tau_1","tau_2","sigma_1"),
                                           mcp_sum$name)]
)

knitr::kable(comparison_mcp, digits = 3,
  caption = "Scenario 3: smoothbp vs mcp on comparable parameters.
             smoothbp post-change slope is b1 + delta1.")
```

## Fitted trajectories: hard vs smooth

The plot below overlays the posterior mean fitted trajectories from both
models. With `rho = 5` the smoothbp transition is fairly sharp and the
two trajectories are visually similar near the change-point, but the
logistic rounding is visible in the smoothbp curve.

```{r mcp-trajectories, eval = have_mcp, fig.height = 4.5}
tau_grid <- seq(0, 6, length.out = 200)

# smoothbp posterior mean trajectory
draws_sbp_mcp  <- posterior::as_draws_matrix(fit_sbp_mcp$draws)
b0_m   <- mean(draws_sbp_mcp[, "b0_(Intercept)"])
b1_m   <- mean(draws_sbp_mcp[, "b1_(Intercept)"])
delta1_m <- mean(draws_sbp_mcp[, "delta1_(Intercept)"])
om_m   <- mean(draws_sbp_mcp[, "omega1_(Intercept)"])
rho_m  <- mean(draws_sbp_mcp[, "rho1_(Intercept)"])

sigmoid  <- function(x) 1 / (1 + exp(-x))
d_grid   <- tau_grid - om_m
mu_sbp   <- b0_m + b1_m * d_grid + delta1_m * d_grid * sigmoid(d_grid * rho_m)

# mcp posterior mean trajectory
cp_m    <- mcp_sum$mean[mcp_sum$name == "cp_1"]
int_m   <- mcp_sum$mean[mcp_sum$name == "int_1"]
t1_m    <- mcp_sum$mean[mcp_sum$name == "tau_1"]
t2_m    <- mcp_sum$mean[mcp_sum$name == "tau_2"]
mu_mcp  <- ifelse(tau_grid < cp_m,
                  int_m + t1_m * tau_grid,
                  int_m + t1_m * cp_m + t2_m * (tau_grid - cp_m))

traj_df <- data.frame(
  tau    = rep(tau_grid, 2),
  mu     = c(mu_sbp, mu_mcp),
  model  = rep(c("smoothbp (logistic)", "mcp (hard kink)"), each = 200)
)

ggplot() +
  geom_point(aes(tau, y), data = dat_mcp_plain, alpha = 0.25, size = 0.8) +
  geom_line(aes(tau, mu, colour = model, linetype = model),
            data = traj_df, linewidth = 0.9) +
  geom_vline(xintercept = 3.2, linetype = "dotted", colour = "grey40") +
  scale_colour_manual(values = c("smoothbp (logistic)" = "#2166ac",
                                 "mcp (hard kink)"     = "#d6604d")) +
  labs(title    = "Posterior mean fitted trajectories",
       subtitle = "Dotted line = true change-point (omega = 3.2)",
       x = "tau", y = "y", colour = NULL, linetype = NULL) +
  theme_bw() +
  theme(legend.position = "bottom")
```

## Timing and ESS comparison

```{r mcp-ess, eval = have_mcp}
# smoothbp ESS on omega
sbp_ess_mcp <- posterior::summarise_draws(
  fit_sbp_mcp$draws[,, c("omega1_(Intercept)", "b1_(Intercept)",
                          "delta1_(Intercept)", "sigma")],
  ess_bulk = posterior::ess_bulk
) |>
  dplyr::mutate(ess_per_sec = ess_bulk / time_sbp_mcp,
                package = "smoothbp")

# mcp ESS — extract draws from mcp object
# mcp stores posterior draws as a coda::mcmc.list in $mcmc_post
mcp_draws <- posterior::as_draws_array(fit_mcp$mcmc_post)
mcp_ess_draws <- posterior::summarise_draws(
  mcp_draws[, , c("cp_1", "tau_1", "sigma_1")],
  ess_bulk = posterior::ess_bulk
)
mcp_ess_raw <- setNames(mcp_ess_draws$ess_bulk, mcp_ess_draws$variable)

data.frame(
  parameter   = c("omega / cp_1", "b1 / time_1", "sigma"),
  sbp_ess_s   = round(sbp_ess_mcp$ess_per_sec[
    match(c("omega1_(Intercept)", "b1_(Intercept)", "sigma"),
          sbp_ess_mcp$variable)], 1),
  mcp_ess_s   = round(c(mcp_ess_raw["cp_1"],
                         mcp_ess_raw["tau_1"],
                         mcp_ess_raw["sigma_1"]) / time_mcp, 1)
) |>
  knitr::kable(caption = "ESS/second: smoothbp vs mcp.
                          Higher is better.")
```

## What this comparison shows

- **Model equivalence at high rho**: when `rho` is large (sharp
  transitions), the smoothbp and mcp posteriors for the change-point
  location and slopes are broadly consistent, validating that both
  packages locate the structural break correctly.

- **Modelling the sharpness**: smoothbp estimates `rho` as a free
  parameter, quantifying transition sharpness as part of the posterior.
  mcp's hard-kink model implicitly assumes `rho = infinity`; this can
  lead to a slightly overconfident change-point posterior when
  transitions are genuinely gradual.

- **Random effects**: Both `mcp` and `smoothbp` support hierarchical models with random intercepts and varying change-points. `smoothbp`'s conjugate Gibbs updates for random intercepts often lead to more efficient sampling of the population mean and group-level deviations compared to general-purpose MCMC.

- **Speed**: smoothbp's compiled Rust back-end is substantially faster
  per iteration than mcp's JAGS back-end, and does not require JAGS to
  be installed separately.

- **Flexibility**: Both packages support **multiple change-points**. `mcp` offers a wider range of likelihood families (non-Gaussian), variance/autoregressive terms, and non-linear segments. `smoothbp` offers unique strengths including **spike-and-slab variable selection** for structural break discovery and the ability to include **covariates on all five structural parameters** (b0, b1, delta, omega, rho) in a single model.

---

## What to look for

If both samplers have converged, the marginal posteriors overlay almost
exactly, the means agree to within Monte Carlo error (`delta_mean_pct`
should be a fraction of a per cent for well-mixed parameters), and the
credible intervals are within sampling variability of each other.

`smoothbp` jointly samples the intercept `b0` and random effects `u` in a
single conjugate Gibbs block.  This avoids the slow-mixing ridge that
arises when `b0` and `u` are updated in separate blocks (since the
likelihood depends on `b0 + u_j`).  As a result, `b0` and `sigma_u` mix
well, with ESS comparable to or exceeding brms on these parameters.

For the non-linear parameters (`omega`, `rho`), NUTS typically achieves
higher ESS per iteration, but smoothbp's cheaper per-iteration cost keeps
ESS per second competitive.

If you remove the `ub = max(dat$tau)` bound or the `init_fun`, you will
likely see the brms posterior on `omega` and `b2` go bimodal: one cluster
near the truth, and a second cluster in the unidentifiable region with
`omega > max(tau)` and `b2` pinned at a ridiculous value matching the
right-edge slope. That is a sampler-mixing artefact, not a real feature of
the posterior, and worth demonstrating in your own analyses before trusting
results from change-point models that allow `omega` to escape the data
range.

A note on smoothbp's robustness: smoothbp initialises every chain near the
prior mean, which keeps it out of the degenerate region by construction.
That is excellent in practice when the user has a sensible prior on
`omega`, and is the typical case. It also means that on a problem where
the posterior is genuinely multimodal, the chains may all converge to
whichever mode is closest to the prior mean and fail to explore the
others. If you suspect multimodality, fit with deliberately overdispersed
initial values by varying the prior across runs.

---

# Scenario 4: Multi-breakpoint model

This scenario validates the new multi-breakpoint architecture by simulating
data with **two true change-points** and fitting both smoothbp and brms.
If the two samplers agree — i.e., marginal posteriors overlap — we have
strong evidence that the implementation is correct.

## Simulate two-breakpoint data

```{r simulate-multi}
set.seed(9801)
n  <- 300
tau <- seq(0, 12, length.out = n)

# True model: two breakpoints
om1_true    <-  3.5;  rho1_true <- 5.0;  delta1_true <- -1.2
om2_true    <-  8.0;  rho2_true <- 4.0;  delta2_true <-  1.8
b0_true     <-  5.0;  b1_true   <-  0.3;  sigma_true  <-  0.3

d1  <- tau - om1_true;  s1 <- plogis(d1 * rho1_true)
d2  <- tau - om2_true;  s2 <- plogis(d2 * rho2_true)
mu  <- b0_true + b1_true * (tau - om1_true) +
       delta1_true * d1 * s1 + delta2_true * d2 * s2
y   <- mu + rnorm(n, sd = sigma_true)
dat_multi <- data.frame(y = y, tau = tau)

cat(sprintf("True: b0=%.1f b1=%.1f delta1=%.1f omega1=%.1f delta2=%.1f omega2=%.1f sigma=%.1f\n",
            b0_true, b1_true, delta1_true, om1_true, delta2_true, om2_true, sigma_true))
```

## Fit with smoothbp

```{r fit-smoothbp-multi}
t0 <- Sys.time()
fit_sbp_multi <- smoothbp(
  formula = y ~ tau,
  b0      = ~ 1,
  b1      = ~ 1,
  deltas  = list(~ 1, ~ 1),
  omega   = list(~ 1, ~ 1),
  rho     = list(~ 1, ~ 1),
  data    = dat_multi,
  priors  = smoothbp_priors(
    b0     = prior_normal(0, 10),
    b1     = prior_normal(0, 2),
    omega  = list(
      prior_normal(3.5, 2, lb = 0, ub = 6),
      prior_normal(8.0, 2, lb = 4, ub = max(dat_multi$tau))
    ),
    rho    = list(prior_normal(4, 2, lb = 0),
                  prior_normal(4, 2, lb = 0))
  ),
  chains = 4L, iter = 2000L, warmup = 1000L,
  seed = 9801L, .verbose = FALSE
)
time_sbp_multi <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
cat(sprintf("smoothbp: %.1f s\n", time_sbp_multi))
```

## Fit with brms (Stan / NUTS)

```{r fit-brms-multi, eval = have_brms, message = FALSE, warning = FALSE, results = "hide"}
suppressPackageStartupMessages(library(brms))

# Two-breakpoint non-linear formula for brms
bf_multi <- brms::bf(
  y ~ b0 + b1 * (tau - om1) +
        d1 * (tau - om1) / (1 + exp(-(tau - om1) * rho1)) +
        d2 * (tau - om2) / (1 + exp(-(tau - om2) * rho2)),
  b0 ~ 1, b1 ~ 1,
  d1 ~ 1, om1 ~ 1, rho1 ~ 1,
  d2 ~ 1, om2 ~ 1, rho2 ~ 1,
  nl = TRUE
)

priors_brms_multi <- c(
  brms::prior(normal(0, 10),  nlpar = "b0"),
  brms::prior(normal(0, 2),   nlpar = "b1"),
  brms::prior(normal(0, 2),   nlpar = "d1"),
  brms::prior_string("normal(3.5, 2)", nlpar = "om1", lb = 0, ub = 6),
  brms::prior(normal(4, 2),   nlpar = "rho1", lb = 0),
  brms::prior(normal(0, 2),   nlpar = "d2"),
  brms::prior_string("normal(8.0, 2)", nlpar = "om2", lb = 4, ub = 12),
  brms::prior(normal(4, 2),   nlpar = "rho2", lb = 0)
)

init_multi <- function() list(
  b_b0   = array(rnorm(1, 5, 0.5)),
  b_b1   = array(rnorm(1, 0.3, 0.1)),
  b_d1   = array(rnorm(1, -1, 0.3)),
  b_om1  = array(rnorm(1, 3.5, 0.3)),
  b_rho1 = array(rnorm(1, 5, 0.5)),
  b_d2   = array(rnorm(1, 1.5, 0.3)),
  b_om2  = array(rnorm(1, 8, 0.3)),
  b_rho2 = array(rnorm(1, 4, 0.5))
)

t0 <- Sys.time()
fit_brms_multi <- brms::brm(
  bf_multi,
  data    = dat_multi,
  prior   = priors_brms_multi,
  chains  = 4, iter = 2000, warmup = 1000,
  seed    = 9801, refresh = 0,
  init    = init_multi,
  control = list(adapt_delta = 0.95)
)
time_brms_multi <- as.numeric(difftime(Sys.time(), t0, units = "secs"))
```

## Compare posteriors

```{r compare-multi, eval = have_brms}
sbp_names_multi <- c(
  "b0_(Intercept)", "b1_(Intercept)",
  "delta1_(Intercept)", "omega1_(Intercept)", "rho1_(Intercept)",
  "delta2_(Intercept)", "omega2_(Intercept)", "rho2_(Intercept)",
  "sigma"
)
brms_names_multi <- c(
  "b_b0_Intercept", "b_b1_Intercept",
  "b_d1_Intercept", "b_om1_Intercept", "b_rho1_Intercept",
  "b_d2_Intercept", "b_om2_Intercept", "b_rho2_Intercept",
  "sigma"
)
true_vals_multi <- c(b0_true, b1_true,
                     delta1_true, om1_true, rho1_true,
                     delta2_true, om2_true, rho2_true,
                     sigma_true)

sbp_draws_multi  <- posterior::as_draws_df(fit_sbp_multi$draws)[, sbp_names_multi]
brms_draws_multi <- as.data.frame(
  posterior::as_draws_df(fit_brms_multi)
)[, brms_names_multi]
colnames(brms_draws_multi) <- sbp_names_multi

draws_long_multi <- dplyr::bind_rows(
  tidyr::pivot_longer(sbp_draws_multi, cols = dplyr::everything(),
                      names_to = "parameter", values_to = "value") |>
    dplyr::mutate(package = "smoothbp"),
  tidyr::pivot_longer(brms_draws_multi, cols = dplyr::everything(),
                      names_to = "parameter", values_to = "value") |>
    dplyr::mutate(package = "brms")
)

draws_long_multi |>
  dplyr::group_by(parameter, package) |>
  dplyr::summarise(mean = mean(value), sd = sd(value),
                   q025 = quantile(value, 0.025),
                   q975 = quantile(value, 0.975), .groups = "drop") |>
  tidyr::pivot_wider(names_from = package,
                     values_from = c(mean, sd, q025, q975),
                     names_glue = "{package}_{.value}") |>
  dplyr::mutate(
    truth          = true_vals_multi[match(parameter, sbp_names_multi)],
    delta_mean_pct = 100 * abs(smoothbp_mean - brms_mean) /
                       (abs(brms_mean) + 1e-8)
  ) |>
  knitr::kable(digits = 3,
               caption = "Scenario 4: two-breakpoint smoothbp vs brms.")
```

## Overlaid marginal posteriors — the key validation

The densities from smoothbp (blue) and brms (red) should overlap almost
exactly if both samplers are targeting the same posterior. Dashed vertical
lines mark the true data-generating values.

```{r overlay-multi, eval = have_brms, fig.height = 6}
true_df_multi <- data.frame(
  parameter  = sbp_names_multi,
  true_value = true_vals_multi
)

ggplot(draws_long_multi, aes(x = value, fill = package, colour = package)) +
  geom_density(alpha = 0.35, linewidth = 0.6) +
  geom_vline(data = true_df_multi,
             aes(xintercept = true_value),
             linetype = "dashed", linewidth = 0.5, colour = "black") +
  facet_wrap(~ parameter, scales = "free", ncol = 3) +
  scale_fill_manual(values   = c(smoothbp = "#1f77b4", brms = "#d62728")) +
  scale_colour_manual(values = c(smoothbp = "#1f77b4", brms = "#d62728")) +
  labs(
    title    = "Scenario 4: Multi-breakpoint posteriors — smoothbp vs brms",
    subtitle = sprintf(
      "Two true change-points. smoothbp: %.1fs   brms: %.1fs\nDashed = true value. Overlapping densities = agreement.",
      time_sbp_multi, time_brms_multi
    ),
    x = NULL, y = "Density"
  ) +
  theme_minimal(base_size = 11) +
  theme(legend.position = "bottom")
```

## What to look for

- **Density overlap**: the blue (smoothbp) and red (brms) curves should be
  nearly indistinguishable for every parameter. Meaningful separation
  would indicate a bug in one of the implementations.
- **True value coverage**: dashed lines should fall inside or very near
  the high-density region of both posteriors.
- **Timing**: smoothbp's Rust sampler should be substantially faster than
  brms/Stan, especially at larger $n$.

```{r ess-multi, eval = have_brms}
ess_sbp_multi <- posterior::summarise_draws(
  fit_sbp_multi$draws[, , sbp_names_multi], ess_bulk = posterior::ess_bulk
) |>
  dplyr::transmute(parameter = variable,
                   smoothbp_ess_per_sec = ess_bulk / time_sbp_multi)

ess_brms_multi <- posterior::summarise_draws(
  posterior::as_draws_df(fit_brms_multi), ess_bulk = posterior::ess_bulk
) |>
  dplyr::filter(variable %in% brms_names_multi) |>
  dplyr::mutate(parameter = sbp_names_multi[match(variable, brms_names_multi)]) |>
  dplyr::transmute(parameter, brms_ess_per_sec = ess_bulk / time_brms_multi)

dplyr::full_join(ess_sbp_multi, ess_brms_multi, by = "parameter") |>
  knitr::kable(digits = 1,
               caption = "ESS/second: smoothbp vs brms (two-breakpoint model).")
```