## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval = FALSE-------------------------------------------------------------
# # if (!require("remotes", quietly = TRUE))
# #     install.packages("remotes")
# #
# # remotes::install_github("statdivlab/radEmu")

## ----setup, message = FALSE---------------------------------------------------
library(magrittr)
library(dplyr)
library(ggplot2)
library(stringr)
library(radEmu)

## -----------------------------------------------------------------------------
J <- 10; n <- 60
# generate design matrix 
set.seed(10)
X <- cbind(1, rep(0:1, each = n / 2))
cov_dat <- data.frame(cov = X[, 2])
# cluster membership 
cluster <- rep(1:4, each = n/4)
cluster_named <- paste("cage", cluster, sep = "")
cov_dat$cluster <- cluster
# intercepts for each category
b0 <- rnorm(J)
# coefficients for X1 for each category 
b1 <- seq(1, 5, length.out = J)
# mean center the coefficients
b1 <- b1 - mean(b1)
# set the coefficient for the 3rd category to 4 (why not!?)
# Note that because of the constraint, we're only able to estimate
# b1 - mean(b1), which is ~3.9.  
b1[3] <- 4
# generate B coefficient matrix 
b <- rbind(b0, b1)

# simulate data according to a zero-inflated negative binomial distribution
# the mean model used to simulate this data takes into account the cluster membership
Y <- radEmu:::simulate_data(n = n,
                            J = J,
                            b0 = b0,
                            b1 = b1,
                            distn = "ZINB",
                            zinb_size = 10,
                            zinb_zero_prop = 0.3,
                            mean_z = 5,
                            X = X,
                            cluster = cluster)

## -----------------------------------------------------------------------------
table(cluster_named)

## -----------------------------------------------------------------------------
ef_cluster <- emuFit(formula = ~ cov,
                     data = cov_dat, 
                     Y = Y,
                     cluster = cluster_named, 
                     test_kj = data.frame(k = 2, j = 3),
                     match_row_names = FALSE)

ef_cluster$coef[3, ]

## -----------------------------------------------------------------------------
ef_no_cluster <- emuFit(formula = ~ cov,
                        data = cov_dat, 
                        Y = Y,
                        test_kj = data.frame(k = 2, j = 3),
                        match_row_names = FALSE)
ef_no_cluster$coef[3, ]

