Measurement System: Two-Factor CFA

Greg Veramendi

Overview

This vignette shows how to specify and estimate a two-factor measurement system with factorana. The latent factors are identified from a set of noisy indicators (measurements) whose loadings the package estimates jointly with the factor variances.

The example below uses two latent factors (“cognitive” and “non-cognitive” skills) each measured by three continuous indicators, and is small enough (n = 300) to run quickly.

Simulate data

library(factorana)

set.seed(1)
n <- 300

# Latent factors (true values, unobserved in practice)
f_cog    <- rnorm(n, mean = 0, sd = 1.0)
f_noncog <- rnorm(n, mean = 0, sd = 0.8)

# Cognitive indicators: loadings (1.0, 0.9, 0.7); error sd = 0.5
cog1 <- 0.0 + 1.0 * f_cog + rnorm(n, 0, 0.5)
cog2 <- 0.2 + 0.9 * f_cog + rnorm(n, 0, 0.5)
cog3 <- 0.1 + 0.7 * f_cog + rnorm(n, 0, 0.5)

# Non-cognitive indicators: loadings (1.0, 1.1, 0.8); error sd = 0.5
nc1 <- 0.0 + 1.0 * f_noncog + rnorm(n, 0, 0.5)
nc2 <- 0.1 + 1.1 * f_noncog + rnorm(n, 0, 0.5)
nc3 <- 0.0 + 0.8 * f_noncog + rnorm(n, 0, 0.5)

dat <- data.frame(
  intercept = 1,
  cog1 = cog1, cog2 = cog2, cog3 = cog3,
  nc1  = nc1,  nc2  = nc2,  nc3  = nc3
)
head(dat)
#>   intercept       cog1       cog2        cog3        nc1        nc2        nc3
#> 1         1 -0.7969873 -1.1345097 -1.11703554  1.1399607  1.2434756 -0.0985743
#> 2         1  0.9348556  0.4624395  1.19013215 -1.3004950 -0.5309301 -0.6931244
#> 3         1 -0.5714748 -0.4198545 -1.41335484  2.0238605  1.7614149  2.3556515
#> 4         1  1.8663765  1.0763851  0.16363734 -0.7774106  0.5158946  0.4655233
#> 5         1  0.2611711  0.8220335  0.67947970  1.5927923  1.4158815  1.1502765
#> 6         1 -1.3888353 -1.0548717 -0.02060567  1.1187830  2.4446165  0.6413497

Define the factor model

Two independent latent factors. Loading normalizations are set on the component side: each factor has one indicator with loading fixed at 1 (to pin the scale) and two free loadings.

fm <- define_factor_model(n_factors = 2, factor_structure = "independent")

Define model components

For each indicator we declare a linear equation, which factor(s) it loads on, and any fixed loadings. loading_normalization takes a vector of length n_factors:

# Cognitive indicators: load on factor 1 only
mc_cog1 <- define_model_component(
  name = "cog1", data = dat, outcome = "cog1", factor = fm,
  covariates = "intercept", model_type = "linear",
  loading_normalization = c(1, 0)        # factor 1 loading = 1, factor 2 loading = 0
)
mc_cog2 <- define_model_component(
  name = "cog2", data = dat, outcome = "cog2", factor = fm,
  covariates = "intercept", model_type = "linear",
  loading_normalization = c(NA_real_, 0) # factor 1 loading free, factor 2 loading = 0
)
mc_cog3 <- define_model_component(
  name = "cog3", data = dat, outcome = "cog3", factor = fm,
  covariates = "intercept", model_type = "linear",
  loading_normalization = c(NA_real_, 0)
)

# Non-cognitive indicators: load on factor 2 only
mc_nc1 <- define_model_component(
  name = "nc1", data = dat, outcome = "nc1", factor = fm,
  covariates = "intercept", model_type = "linear",
  loading_normalization = c(0, 1)
)
mc_nc2 <- define_model_component(
  name = "nc2", data = dat, outcome = "nc2", factor = fm,
  covariates = "intercept", model_type = "linear",
  loading_normalization = c(0, NA_real_)
)
mc_nc3 <- define_model_component(
  name = "nc3", data = dat, outcome = "nc3", factor = fm,
  covariates = "intercept", model_type = "linear",
  loading_normalization = c(0, NA_real_)
)

Assemble the components into a system:

ms <- define_model_system(
  components = list(mc_cog1, mc_cog2, mc_cog3, mc_nc1, mc_nc2, mc_nc3),
  factor = fm
)

Estimate

The estimator uses Gauss-Hermite quadrature to integrate over the latent factors; we keep n_quad_points modest here for speed.

ctrl <- define_estimation_control(n_quad_points = 6, num_cores = 1)

fit <- estimate_model_rcpp(
  model_system = ms,
  data         = dat,
  control      = ctrl,
  optimizer    = "nlminb",
  parallel     = FALSE,
  verbose      = FALSE
)

fit$convergence  # 0 indicates successful convergence
#> [1] 0

Inspect estimates

# Tidy table of parameter estimates with standard errors
components_table(fit, digits = 3)
#> 
#> Factor Model Results by Component
#> ======================================================================================================================== 
#> 
#> Parameter             factor         cog1         cog2         cog3          nc1          nc2          nc3
#> ---------------------------------------------------------------------------------------------------------- 
#> Intercept                         0.170**     0.353***     0.264***       -0.030        0.089        0.025
#>                                   (0.065)      (0.057)      (0.047)      (0.059)      (0.061)      (0.049)
#> Sigma                            0.608***     0.535***     0.483***     0.545***     0.580***     0.531***
#>                                   (0.037)      (0.032)      (0.027)      (0.037)      (0.043)      (0.028)
#> factor_var_1        0.794***                                                                              
#>                      (0.089)                                                                              
#> factor_var_2        0.659***                                                                              
#>                      (0.062)                                                                              
#> cog2_loading_1                                0.888***                                                    
#>                                                (0.053)                                                    
#> cog3_loading_1                                             0.692***                                       
#>                                                             (0.044)                                       
#> nc2_loading_2                                                                        1.036***             
#>                                                                                       (0.067)             
#> nc3_loading_2                                                                                     0.769***
#>                                                                                                    (0.048)
#> 
#> ---------------------------------------------------------------------------------------------------------- 
#> N                          0          300          300          300          300          300          300
#> Log-likelihood: -2053.38
#> 
#> Standard errors in parentheses
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1

Factor variances are near the true values (1.0 for the cognitive factor and 0.64 = 0.8^2 for the non-cognitive factor); loadings recover the simulated values (cog2 ≈ 0.9, cog3 ≈ 0.7, nc2 ≈ 1.1, nc3 ≈ 0.8).

Factor scores

Posterior mean factor scores for each observation can be recovered from the estimated model:

fscores <- estimate_factorscores_rcpp(
  fit, dat, control = ctrl, parallel = FALSE, verbose = FALSE
)
head(fscores[, c("obs_id", "factor_1", "factor_2",
                 "se_factor_1", "se_factor_2", "converged")])
#> Factor Score Estimates
#> ======================
#> 
#> Observations: 6
#> Factors: 2
#> Converged: 6 (100.0%)
#> 
#> Summary Statistics:
#>   Factor 1: mean=-0.295, sd=0.944, mean_se=0.365
#>   Factor 2: mean=0.702, sd=0.957, mean_se=0.340
#> 
#> First 6 rows:
#>   obs_id   factor_1      factor_2 se_factor_1 se_factor_2 converged
#> 1      1 -1.2915708  0.7037167694    0.364723   0.3399574      TRUE
#> 2      2  0.5875902 -0.8011073548    0.364723   0.3399574      TRUE
#> 3      3 -1.0696673  1.8108452339    0.364723   0.3399574      TRUE
#> 4      4  0.7438436 -0.0003981072    0.364723   0.3399574      TRUE
#> 5      5  0.3346083  1.2404898768    0.364723   0.3399574      TRUE
#> 6      6 -1.0742353  1.2587562101    0.364723   0.3399574      TRUE

# Correlation of estimated factor scores with the true (unobserved) factors
cor(fscores$factor_1, f_cog)
#> [1] 0.936484
cor(fscores$factor_2, f_noncog)
#> [1] 0.9422921

Where to go next