mfrmr Workflow

This vignette outlines a reproducible workflow for:

For a plot-first companion guide, see the separate mfrmr-visual-diagnostics vignette.

For speed-sensitive work, a useful pattern is:

MML and Diagnostic Modes

mfrmr treats MML and JML differently on purpose.

For RSM and PCM, diagnostics now expose two distinct evidence paths:

The strict marginal branch is still screening-oriented in the current release. Use summary(diag)$diagnostic_basis to separate the legacy residual evidence from the strict marginal evidence rather than pooling them into one decision.

Load Data

library(mfrmr)

list_mfrmr_data()
#> [1] "example_core"     "example_bias"     "study1"           "study2"          
#> [5] "combined"         "study1_itercal"   "study2_itercal"   "combined_itercal"

data("ej2021_study1", package = "mfrmr")
head(ej2021_study1)
#>    Study Person Rater              Criterion Score
#> 1 Study1   P001   R08      Global_Impression     4
#> 2 Study1   P001   R08 Linguistic_Realization     3
#> 3 Study1   P001   R08       Task_Fulfillment     3
#> 4 Study1   P001   R10      Global_Impression     4
#> 5 Study1   P001   R10 Linguistic_Realization     3
#> 6 Study1   P001   R10       Task_Fulfillment     2

study1_alt <- load_mfrmr_data("study1")
identical(names(ej2021_study1), names(study1_alt))
#> [1] TRUE

Minimal Runnable Example

We start with the packaged example_core dataset. It is intentionally compact, category-complete, and generated from a single latent trait plus facet main effects so that help-page examples stay fast without relying on undersized toy data. The same object is also available via data("mfrmr_example_core", package = "mfrmr"):

data("mfrmr_example_core", package = "mfrmr")
toy <- mfrmr_example_core

fit_toy <- fit_mfrm(
  data = toy,
  person = "Person",
  facets = c("Rater", "Criterion"),
  score = "Score",
  method = "JML",
  model = "RSM",
  maxit = 15
)
#> Warning: Optimizer did not fully converge (code = 1, status = iteration_limit).
#> Optimizer reached the iteration limit before the terminal gradient became small
#> enough for review-only acceptance. Consider increasing maxit (current: 15) or
#> relaxing reltol (current: 1e-06).
diag_toy <- diagnose_mfrm(fit_toy, residual_pca = "none")

summary(fit_toy)$overview
#> # A tibble: 1 × 33
#>   Model Method MethodUsed     N Persons Facets Categories LogLik   AIC   BIC
#>   <chr> <chr>  <chr>      <int>   <int>  <int>      <dbl>  <dbl> <dbl> <dbl>
#> 1 RSM   JML    JMLE         768      48      2          4  -822. 1758. 2022.
#> # ℹ 23 more variables: Converged <lgl>, Iterations <int>,
#> #   IterationsBasis <chr>, MMLEngineRequested <chr>, MMLEngineUsed <chr>,
#> #   MMLEngineDetail <chr>, EMIterations <int>, EMConverged <lgl>,
#> #   EMRelativeChange <dbl>, OptimizerMethod <chr>, ConvergenceCode <int>,
#> #   ConvergenceBasis <chr>, ConvergenceStatus <chr>, ConvergenceReason <chr>,
#> #   ConvergenceSeverity <chr>, ConvergenceMessage <chr>,
#> #   ConvergenceDetail <chr>, ReviewableWarning <lgl>, …
summary(diag_toy)$overview
#> # A tibble: 1 × 10
#>   Observations Persons Facets Categories Subsets ResidualPCA DiagnosticMode
#>          <int>   <int>  <int>      <int>   <int> <chr>       <chr>         
#> 1          768      48      2          4       1 none        legacy        
#> # ℹ 3 more variables: Method <chr>, PrecisionTier <chr>, MarginalFit <chr>
names(plot(fit_toy, draw = FALSE))
#> [1] "wright_map"                     "pathway_map"                   
#> [3] "category_characteristic_curves"

Diagnostics and Reporting

t4_toy <- unexpected_response_table(
  fit_toy,
  diagnostics = diag_toy,
  abs_z_min = 1.5,
  prob_max = 0.4,
  top_n = 10
)
t12_toy <- fair_average_table(fit_toy, diagnostics = diag_toy)
t13_toy <- bias_interaction_report(
  estimate_bias(fit_toy, diag_toy,
                facet_a = "Rater", facet_b = "Criterion",
                max_iter = 2),
  top_n = 10
)

class(summary(t4_toy))
#> [1] "summary.mfrm_bundle"
class(summary(t12_toy))
#> [1] "summary.mfrm_bundle"
class(summary(t13_toy))
#> [1] "summary.mfrm_bundle"

names(plot(t4_toy, draw = FALSE))
#> [1] "name" "data"
names(plot(t12_toy, draw = FALSE))
#> [1] "name" "data"
names(plot(t13_toy, draw = FALSE))
#> [1] "name" "data"

chk_toy <- reporting_checklist(fit_toy, diagnostics = diag_toy)
subset(
  chk_toy$checklist,
  Section == "Visual Displays",
  c("Item", "DraftReady", "NextAction")
)
#>                                   Item DraftReady
#> 21                          Wright map       TRUE
#> 22                QC / facet dashboard       TRUE
#> 23                Residual PCA visuals      FALSE
#> 24 Connectivity / design-matrix visual       TRUE
#> 25  Inter-rater / displacement visuals       TRUE
#> 26             Strict marginal visuals      FALSE
#> 27                  Bias / DIF visuals      FALSE
#> 28      Precision / information curves      FALSE
#> 29                Fit/category visuals       TRUE
#>                                                                                                                                NextAction
#> 21                                               Include a Wright map when the manuscript benefits from a shared-scale targeting display.
#> 22                              Use the dashboard as a first-pass triage view, then move to the specific follow-up plot behind each flag.
#> 23                                                  Run residual PCA if you want scree/loadings visuals for residual-structure follow-up.
#> 24                                                                Use the design-matrix view to support linkage and comparability claims.
#> 25                                                Use displacement and inter-rater views to localize QC issues after dashboard screening.
#> 26 For MML reporting runs, call diagnose_mfrm(..., diagnostic_mode = "both") to enable strict marginal follow-up visuals where supported.
#> 27                                                                 Run bias or DIF screening before discussing interaction-level visuals.
#> 28                                                         Resolve convergence before using information or precision curves in reporting.
#> 29                                                 Use category curves and fit visuals as local descriptive follow-up after QC screening.

Fit and Diagnose with Full Data

For a realistic analysis, we use the packaged Study 1 dataset:

fit <- fit_mfrm(
  data = ej2021_study1,
  person = "Person",
  facets = c("Rater", "Criterion"),
  score = "Score",
  method = "MML",
  model = "RSM",
  quad_points = 7
)

diag <- diagnose_mfrm(
  fit,
  residual_pca = "none"
)

summary(fit)
#> Many-Facet Rasch Model Summary
#>   Model: RSM | Method: MML | N: 1842 | Persons: 307 | Facets: 2 | Categories: 4
#>   MML engine: direct (requested: direct)
#> 
#> Status
#>  - Overall status: usable_fit
#>  - Convergence: converged (severity: pass, sup-norm: 0.374)
#>  - Estimation path: RSM / direct
#>  - Reporting readiness: ready_for_diagnostics_and_reporting_follow_up
#> 
#> Key warnings
#>  - No population model was requested; current MML output uses the package's legacy unconditional prior.
#> 
#> Next actions
#>  - Run `diagnose_mfrm(fit, diagnostic_mode = "both")` for element-level fit review.
#>  - Use `plot(fit, type = "wright", preset = "publication")` for targeting and scale review.
#>  - After diagnostics, use `reporting_checklist(fit, diagnostics = diagnostics)` for reporting readiness.
#> 
#> Fit overview
#>   LogLik: -2102.737 | AIC: 4249.475 | BIC: 4370.884
#>   Converged: Yes | Status: converged | Basis: optimizer_gradient | Fn evals: 80 | Gr evals: 23
#>   Terminal gradient: sup-norm = 0.374 | RMS = 0.15 | Review tol = 0
#>   Optimization note: Optimizer returned convergence code 0.
#> 
#> Population basis
#>  PopulationModel PosteriorBasis Formula PersonRows DesignColumns
#>            FALSE     legacy_mml    <NA>         NA            NA
#>  CodingVariables ContrastVariables Policy ResidualVariance OmittedPersons
#>                                      <NA>               NA              0
#>  OmittedRows
#>            0
#> 
#> Facet overview
#>      Facet Levels MeanEstimate SDEstimate MinEstimate MaxEstimate  Span
#>  Criterion      3            0      0.692      -0.799       0.430 1.229
#>      Rater     18            0      0.665      -0.946       1.619 2.565
#> 
#> Person measure distribution
#>  Persons  Mean    SD Median    Min   Max  Span MeanPosteriorSD
#>      307 0.414 0.812  0.439 -1.451 2.384 3.834           0.482
#> 
#> Step parameter summary
#>  Steps    Min   Max Span Monotonic
#>      3 -1.092 0.957 2.05      TRUE
#> 
#> Estimation settings
#>  StepFacet SlopeFacet NoncenterFacet WeightColumn QuadPoints RatingMin
#>       <NA>       <NA>         Person         <NA>          7         1
#>  RatingMax DummyFacets PositiveFacets UnusedScoreCategories
#>          4                                                 
#>  UnusedScoreCategoryCount UnusedScoreCategoryType
#>                         0                    none
#> 
#> Most extreme facet levels (|estimate|)
#>      Facet             Level Estimate
#>      Rater               R13    1.619
#>      Rater               R08   -0.946
#>      Rater               R09   -0.918
#>      Rater               R06    0.886
#>  Criterion Global_Impression   -0.799
#> 
#> Highest person measures
#>  Person Estimate    SD
#>    P157    2.384 0.466
#>    P239    2.346 0.543
#>    P135    2.263 0.470
#>    P018    2.121 0.612
#>    P209    2.014 0.644
#> 
#> Lowest person measures
#>  Person Estimate    SD
#>    P136   -1.451 0.555
#>    P173   -1.399 0.526
#>    P159   -1.349 0.579
#>    P048   -1.330 0.466
#>    P089   -1.274 0.395
#> 
#> Paper reporting map
#>                                Area CoveredHere
#>  Model identification / convergence         yes
#>        Data structure / missingness          no
#>    Reliability / fit / residual PCA          no
#>                Category functioning     partial
#>     Bias / DIF / interaction checks          no
#>         Draft reporting / checklist          no
#>                                                                CompanionOutput
#>                                                                   summary(fit)
#>                                               summary(describe_mfrm_data(...))
#>                                                    summary(diagnose_mfrm(fit))
#>  rating_scale_table() / category_structure_report() / category_curves_report()
#>         summary(estimate_bias(...)) / analyze_dff() / related bundle summaries
#>                        reporting_checklist() / summary(build_apa_outputs(...))
#> 
#> Notes
#>  - No population model was requested; current MML output uses the package's legacy unconditional prior.
summary(diag)
#> Many-Facet Rasch Diagnostics Summary
#>   Observations: 1842 | Persons: 307 | Facets: 2 | Categories: 4 | Subsets: 1
#>   Residual PCA mode: none
#>   Method: MML | Precision tier: model_based
#>   Diagnostic mode: legacy | Strict marginal fit: not_available
#> 
#> Status
#>  - Overall status: follow_up_needed
#>  - Diagnostic path: legacy
#>  - Strict marginal fit: not_requested
#>  - Precision tier: model_based
#>  - Primary screen: Legacy residual diagnostics only; no strict marginal screen was requested.
#> 
#> Key warnings
#>  - Unexpected responses flagged: 100.
#>  - Flagged displacement levels: 40.
#> 
#> Next actions
#>  - Inspect `diagnostic_basis` before comparing legacy residual evidence with strict marginal evidence.
#>  - Use `unexpected_response_table()` / `plot_unexpected()` and `displacement_table()` / `plot_displacement()` for case-level follow-up.
#> 
#> Overall fit
#>  Infit Outfit InfitZSTD OutfitZSTD DF_Infit DF_Outfit
#>  0.811  0.786    -4.643     -7.029 1059.444      1842
#> 
#> Diagnostic basis guide
#>                    DiagnosticPath                      Component
#>               legacy_residual_fit           element_residual_fit
#>               strict_marginal_fit    first_order_category_counts
#>  strict_pairwise_local_dependence      pairwise_local_dependence
#>    posterior_predictive_follow_up posterior_predictive_follow_up
#>                   Status                                    Basis
#>                 computed          plugin_residuals_and_eap_tables
#>            not_requested     latent_integrated_first_order_counts
#>            not_requested latent_integrated_second_order_agreement
#>  planned_not_implemented         posterior_predictive_replication
#>                                    PrimaryStatistics
#>  Infit / Outfit / ZSTD / PTMEA / residual QC bundles
#>   category residuals / standardized residuals / RMSD
#>               exact and adjacent agreement residuals
#>                   replicated-data discrepancy checks
#>                 ReportingUse
#>  legacy_compatibility_screen
#>               screening_only
#>               screening_only
#>               screening_only
#>                                                                                                                                InterpretationNote
#>                        Legacy fit statistics use plug-in residual machinery and should not be interpreted as latent-integrated marginal evidence.
#>  Strict marginal fit integrates over the latent distribution and is the preferred strict screen for category-level misfit in the current release.
#>               Strict pairwise local-dependence checks are exploratory follow-ups to first-order marginal flags, not standalone inferential tests.
#>           Posterior predictive checking is reserved for corroborating strict marginal flags and practical-significance review in a later release.
#> 
#> Precision basis
#>  Method Converged PrecisionTier SupportsFormalInference HasFallbackSE
#>     MML      TRUE   model_based                    TRUE         FALSE
#>       PersonSEBasis           NonPersonSEBasis
#>  Posterior SD (EAP) Observed information (MML)
#>                              CIBasis
#>  Normal interval from model-based SE
#>                                                  ReliabilityBasis
#>  Observed variance with model-based and fit-adjusted error bounds
#>  HasFitAdjustedSE HasSamplePopulationCoverage
#>              TRUE                        TRUE
#>                                                         RecommendedUse
#>  Use for primary reporting of SE, CI, and reliability in this package.
#> 
#> Facet precision and spread
#>      Facet Levels Separation Strata Reliability RealSeparation RealStrata
#>  Criterion      3     14.910 20.214       0.996         14.910     20.214
#>     Person    307      1.322  2.096       0.636          1.225      1.967
#>      Rater     18      3.118  4.490       0.907          3.107      4.476
#>  RealReliability MeanInfit MeanOutfit
#>            0.996     0.810      0.786
#>            0.600     0.798      0.786
#>            0.906     0.813      0.786
#> 
#> Largest |ZSTD| rows
#>      Facet                  Level Infit Outfit InfitZSTD OutfitZSTD  AbsZ
#>  Criterion      Global_Impression 0.798  0.744    -2.596     -4.922 4.922
#>      Rater                    R08 0.702  0.660    -2.439     -4.107 4.107
#>  Criterion Linguistic_Realization 0.802  0.797    -2.918     -3.817 3.817
#>     Person                   P020 0.027  0.026    -2.768     -3.464 3.464
#>  Criterion       Task_Fulfillment 0.829  0.816    -2.488     -3.422 3.422
#>      Rater                    R10 0.737  0.726    -2.189     -2.939 2.939
#>     Person                   P203 0.041  0.073    -2.497     -2.832 2.832
#>     Person                   P098 2.314  3.361     1.539      2.780 2.780
#>      Rater                    R05 0.744  0.749    -1.923     -2.535 2.535
#>     Person                   P056 0.075  0.125    -1.727     -2.405 2.405
#> 
#> Strict marginal fit
#>  Available Method Model                                Basis
#>      FALSE    MML   RSM latent_integrated_first_order_counts
#>                                                             Reason
#>  Not computed; use `diagnostic_mode = "marginal_fit"` or `"both"`.
#> 
#> Paper reporting map
#>                                   Area CoveredHere
#>              Overall fit / reliability         yes
#>  Precision basis / inferential caveats         yes
#>                    Strict marginal fit          no
#>         Residual PCA / local structure     partial
#>    Unexpected responses / displacement     partial
#>                 Connectivity / subsets     partial
#>          Manuscript checklist / export          no
#>                                                          CompanionOutput
#>                                                     summary(diagnostics)
#>                                                     summary(diagnostics)
#>                             diagnose_mfrm(..., diagnostic_mode = "both")
#>                         analyze_residual_pca() / diagnostics$pca details
#>  unexpected_response_table() / displacement_table() / interaction tables
#>                subset_connectivity_report() / measurable_summary_table()
#>                  reporting_checklist() / summary(build_apa_outputs(...))
#> 
#> Flag counts
#>                                 Metric Count
#>                   Unexpected responses   100
#>            Flagged displacement levels    40
#>                       Interaction rows    20
#>                      Inter-rater pairs   153
#>            Marginal fit flagged groups    NA
#>  Marginal pairwise flagged level pairs    NA
#> 
#> Notes
#>  - Unexpected responses were flagged under current thresholds.
#>  - SE/ModelSE, CI, and reliability conventions depend on the estimation path; see diagnostics$approximation_notes for MML-vs-JML details.
#>  - Use `diagnostics$reliability` for facet-level separation/reliability. Use `diagnostics$interrater` only for observed agreement across matched rater contexts.

If you need dimensionality evidence for a final report, you can add residual PCA after the initial diagnostic pass:

diag_pca <- diagnose_mfrm(
  fit,
  residual_pca = "both",
  pca_max_factors = 6
)

summary(diag_pca)
#> Many-Facet Rasch Diagnostics Summary
#>   Observations: 1842 | Persons: 307 | Facets: 2 | Categories: 4 | Subsets: 1
#>   Residual PCA mode: both
#>   Method: MML | Precision tier: model_based
#>   Diagnostic mode: legacy | Strict marginal fit: not_available
#> 
#> Status
#>  - Overall status: follow_up_needed
#>  - Diagnostic path: legacy
#>  - Strict marginal fit: not_requested
#>  - Precision tier: model_based
#>  - Primary screen: Legacy residual diagnostics only; no strict marginal screen was requested.
#> 
#> Key warnings
#>  - Unexpected responses flagged: 100.
#>  - Flagged displacement levels: 40.
#> 
#> Next actions
#>  - Inspect `diagnostic_basis` before comparing legacy residual evidence with strict marginal evidence.
#>  - Use `unexpected_response_table()` / `plot_unexpected()` and `displacement_table()` / `plot_displacement()` for case-level follow-up.
#>  - Use `analyze_residual_pca()` if residual structure needs deeper follow-up.
#> 
#> Overall fit
#>  Infit Outfit InfitZSTD OutfitZSTD DF_Infit DF_Outfit
#>  0.811  0.786    -4.643     -7.029 1059.444      1842
#> 
#> Diagnostic basis guide
#>                    DiagnosticPath                      Component
#>               legacy_residual_fit           element_residual_fit
#>               strict_marginal_fit    first_order_category_counts
#>  strict_pairwise_local_dependence      pairwise_local_dependence
#>    posterior_predictive_follow_up posterior_predictive_follow_up
#>                   Status                                    Basis
#>                 computed          plugin_residuals_and_eap_tables
#>            not_requested     latent_integrated_first_order_counts
#>            not_requested latent_integrated_second_order_agreement
#>  planned_not_implemented         posterior_predictive_replication
#>                                    PrimaryStatistics
#>  Infit / Outfit / ZSTD / PTMEA / residual QC bundles
#>   category residuals / standardized residuals / RMSD
#>               exact and adjacent agreement residuals
#>                   replicated-data discrepancy checks
#>                 ReportingUse
#>  legacy_compatibility_screen
#>               screening_only
#>               screening_only
#>               screening_only
#>                                                                                                                                InterpretationNote
#>                        Legacy fit statistics use plug-in residual machinery and should not be interpreted as latent-integrated marginal evidence.
#>  Strict marginal fit integrates over the latent distribution and is the preferred strict screen for category-level misfit in the current release.
#>               Strict pairwise local-dependence checks are exploratory follow-ups to first-order marginal flags, not standalone inferential tests.
#>           Posterior predictive checking is reserved for corroborating strict marginal flags and practical-significance review in a later release.
#> 
#> Precision basis
#>  Method Converged PrecisionTier SupportsFormalInference HasFallbackSE
#>     MML      TRUE   model_based                    TRUE         FALSE
#>       PersonSEBasis           NonPersonSEBasis
#>  Posterior SD (EAP) Observed information (MML)
#>                              CIBasis
#>  Normal interval from model-based SE
#>                                                  ReliabilityBasis
#>  Observed variance with model-based and fit-adjusted error bounds
#>  HasFitAdjustedSE HasSamplePopulationCoverage
#>              TRUE                        TRUE
#>                                                         RecommendedUse
#>  Use for primary reporting of SE, CI, and reliability in this package.
#> 
#> Facet precision and spread
#>      Facet Levels Separation Strata Reliability RealSeparation RealStrata
#>  Criterion      3     14.910 20.214       0.996         14.910     20.214
#>     Person    307      1.322  2.096       0.636          1.225      1.967
#>      Rater     18      3.118  4.490       0.907          3.107      4.476
#>  RealReliability MeanInfit MeanOutfit
#>            0.996     0.810      0.786
#>            0.600     0.798      0.786
#>            0.906     0.813      0.786
#> 
#> Largest |ZSTD| rows
#>      Facet                  Level Infit Outfit InfitZSTD OutfitZSTD  AbsZ
#>  Criterion      Global_Impression 0.798  0.744    -2.596     -4.922 4.922
#>      Rater                    R08 0.702  0.660    -2.439     -4.107 4.107
#>  Criterion Linguistic_Realization 0.802  0.797    -2.918     -3.817 3.817
#>     Person                   P020 0.027  0.026    -2.768     -3.464 3.464
#>  Criterion       Task_Fulfillment 0.829  0.816    -2.488     -3.422 3.422
#>      Rater                    R10 0.737  0.726    -2.189     -2.939 2.939
#>     Person                   P203 0.041  0.073    -2.497     -2.832 2.832
#>     Person                   P098 2.314  3.361     1.539      2.780 2.780
#>      Rater                    R05 0.744  0.749    -1.923     -2.535 2.535
#>     Person                   P056 0.075  0.125    -1.727     -2.405 2.405
#> 
#> Strict marginal fit
#>  Available Method Model                                Basis
#>      FALSE    MML   RSM latent_integrated_first_order_counts
#>                                                             Reason
#>  Not computed; use `diagnostic_mode = "marginal_fit"` or `"both"`.
#> 
#> Paper reporting map
#>                                   Area CoveredHere
#>              Overall fit / reliability         yes
#>  Precision basis / inferential caveats         yes
#>                    Strict marginal fit          no
#>         Residual PCA / local structure     partial
#>    Unexpected responses / displacement     partial
#>                 Connectivity / subsets     partial
#>          Manuscript checklist / export          no
#>                                                          CompanionOutput
#>                                                     summary(diagnostics)
#>                                                     summary(diagnostics)
#>                             diagnose_mfrm(..., diagnostic_mode = "both")
#>                         analyze_residual_pca() / diagnostics$pca details
#>  unexpected_response_table() / displacement_table() / interaction tables
#>                subset_connectivity_report() / measurable_summary_table()
#>                  reporting_checklist() / summary(build_apa_outputs(...))
#> 
#> Flag counts
#>                                 Metric Count
#>                   Unexpected responses   100
#>            Flagged displacement levels    40
#>                       Interaction rows    20
#>                      Inter-rater pairs   153
#>            Marginal fit flagged groups    NA
#>  Marginal pairwise flagged level pairs    NA
#> 
#> Notes
#>  - Unexpected responses were flagged under current thresholds.
#>  - SE/ModelSE, CI, and reliability conventions depend on the estimation path; see diagnostics$approximation_notes for MML-vs-JML details.
#>  - Use `diagnostics$reliability` for facet-level separation/reliability. Use `diagnostics$interrater` only for observed agreement across matched rater contexts.

Strict Diagnostics for RSM and PCM

For RSM and PCM, the package can now keep the legacy residual path and the strict marginal path side by side:

fit_rsm_strict <- fit_mfrm(
  data = toy,
  person = "Person",
  facets = c("Rater", "Criterion"),
  score = "Score",
  method = "MML",
  model = "RSM",
  quad_points = 7,
  maxit = 10
)
#> Warning: Optimizer did not fully converge (code = 1, status = iteration_limit).
#> Optimizer reached the iteration limit before the terminal gradient became small
#> enough for review-only acceptance. Consider increasing maxit (current: 10) or
#> relaxing reltol (current: 1e-06).
diag_rsm_strict <- diagnose_mfrm(
  fit_rsm_strict,
  diagnostic_mode = "both",
  residual_pca = "none"
)

fit_pcm_strict <- fit_mfrm(
  data = toy,
  person = "Person",
  facets = c("Rater", "Criterion"),
  score = "Score",
  method = "MML",
  model = "PCM",
  step_facet = "Criterion",
  quad_points = 7,
  maxit = 10
)
#> Warning: Optimizer did not fully converge (code = 1, status = iteration_limit).
#> Optimizer reached the iteration limit before the terminal gradient became small
#> enough for review-only acceptance. Consider increasing maxit (current: 10) or
#> relaxing reltol (current: 1e-06).
diag_pcm_strict <- diagnose_mfrm(
  fit_pcm_strict,
  diagnostic_mode = "both",
  residual_pca = "none"
)

summary(diag_rsm_strict)$diagnostic_basis[, c("DiagnosticPath", "Status", "Basis")]
#> # A tibble: 4 × 3
#>   DiagnosticPath                   Status                  Basis                
#>   <chr>                            <chr>                   <chr>                
#> 1 legacy_residual_fit              computed                plugin_residuals_and…
#> 2 strict_marginal_fit              computed                latent_integrated_fi…
#> 3 strict_pairwise_local_dependence computed                latent_integrated_se…
#> 4 posterior_predictive_follow_up   planned_not_implemented posterior_predictive…
summary(diag_pcm_strict)$diagnostic_basis[, c("DiagnosticPath", "Status", "Basis")]
#> # A tibble: 4 × 3
#>   DiagnosticPath                   Status                  Basis                
#>   <chr>                            <chr>                   <chr>                
#> 1 legacy_residual_fit              computed                plugin_residuals_and…
#> 2 strict_marginal_fit              computed                latent_integrated_fi…
#> 3 strict_pairwise_local_dependence computed                latent_integrated_se…
#> 4 posterior_predictive_follow_up   planned_not_implemented posterior_predictive…

When you want a compact simulation-based screening check for the strict branch, use evaluate_mfrm_diagnostic_screening() on a small design:

screen_rsm <- evaluate_mfrm_diagnostic_screening(
  design = list(person = 18, rater = 3, criterion = 3, assignment = 3),
  reps = 1,
  scenarios = c("well_specified", "local_dependence"),
  model = "RSM",
  maxit = 8,
  quad_points = 7,
  seed = 123
)
#> Warning: Optimizer did not fully converge (code = 1, status = iteration_limit).
#> Optimizer reached the iteration limit before the terminal gradient became small
#> enough for review-only acceptance. Consider increasing maxit (current: 8) or
#> relaxing reltol (current: 1e-06).
#> Warning: Optimizer did not fully converge (code = 1, status = iteration_limit).
#> Optimizer reached the iteration limit before the terminal gradient became small
#> enough for review-only acceptance. Consider increasing maxit (current: 8) or
#> relaxing reltol (current: 1e-06).
screen_pcm <- evaluate_mfrm_diagnostic_screening(
  design = list(person = 18, rater = 3, criterion = 3, assignment = 3),
  reps = 1,
  scenarios = c("well_specified", "step_structure_misspecification"),
  model = "PCM",
  maxit = 8,
  quad_points = 7,
  seed = 123
)
#> Warning: Optimizer did not fully converge (code = 1, status = iteration_limit).
#> Optimizer reached the iteration limit before the terminal gradient became small
#> enough for review-only acceptance. Consider increasing maxit (current: 8) or
#> relaxing reltol (current: 1e-06).
#> Warning: Optimizer did not fully converge (code = 1, status = iteration_limit).
#> Optimizer reached the iteration limit before the terminal gradient became small
#> enough for review-only acceptance. Consider increasing maxit (current: 8) or
#> relaxing reltol (current: 1e-06).

screen_rsm$performance_summary[, c("Scenario", "EvaluationUse", "LegacyAnyFlagRate", "StrictAnyFlagRate")]
#> # A tibble: 2 × 4
#>   Scenario         EvaluationUse     LegacyAnyFlagRate StrictAnyFlagRate
#>   <chr>            <chr>                         <dbl>             <dbl>
#> 1 local_dependence sensitivity_proxy                 0                 1
#> 2 well_specified   type_I_proxy                      1                 1
screen_pcm$performance_summary[, c("Scenario", "EvaluationUse", "LegacySensitivityProxy", "StrictSensitivityProxy", "DeltaStrictMinusLegacyFlagRate")]
#> # A tibble: 2 × 5
#>   Scenario           EvaluationUse LegacySensitivityProxy StrictSensitivityProxy
#>   <chr>              <chr>                          <dbl>                  <dbl>
#> 1 step_structure_mi… sensitivity_…                      1                      1
#> 2 well_specified     type_I_proxy                      NA                     NA
#> # ℹ 1 more variable: DeltaStrictMinusLegacyFlagRate <dbl>

The same strict branch is now reflected in the reporting router:

chk_rsm_strict <- reporting_checklist(fit_rsm_strict, diagnostics = diag_rsm_strict)
subset(
  chk_rsm_strict$checklist,
  Section == "Visual Displays" &
    Item %in% c("QC / facet dashboard", "Strict marginal visuals", "Precision / information curves"),
  c("Item", "Available", "DraftReady", "NextAction")
)
#>                              Item Available DraftReady
#> 22           QC / facet dashboard      TRUE       TRUE
#> 26        Strict marginal visuals      TRUE      FALSE
#> 28 Precision / information curves     FALSE      FALSE
#>                                                                                                                       NextAction
#> 22                     Use the dashboard as a first-pass triage view, then move to the specific follow-up plot behind each flag.
#> 26 Treat strict marginal plots as exploratory corroboration screens, then corroborate with design review and legacy diagnostics.
#> 28                                                Resolve convergence before using information or precision curves in reporting.

Residual PCA and Reporting

pca <- analyze_residual_pca(diag_pca, mode = "both")
plot_residual_pca(pca, mode = "overall", plot_type = "scree")

data("mfrmr_example_bias", package = "mfrmr")
bias_df <- mfrmr_example_bias
fit_bias <- fit_mfrm(
  bias_df,
  person = "Person",
  facets = c("Rater", "Criterion"),
  score = "Score",
  method = "MML",
  model = "RSM",
  quad_points = 7
)
diag_bias <- diagnose_mfrm(fit_bias, residual_pca = "none")
bias <- estimate_bias(fit_bias, diag_bias, facet_a = "Rater", facet_b = "Criterion")
fixed <- build_fixed_reports(bias)
apa <- build_apa_outputs(fit_bias, diag_bias, bias_results = bias)

mfrm_threshold_profiles()
#> $profiles
#> $profiles$strict
#> $profiles$strict$n_obs_min
#> [1] 200
#> 
#> $profiles$strict$n_person_min
#> [1] 50
#> 
#> $profiles$strict$low_cat_min
#> [1] 15
#> 
#> $profiles$strict$min_facet_levels
#> [1] 4
#> 
#> $profiles$strict$misfit_ratio_warn
#> [1] 0.08
#> 
#> $profiles$strict$missing_fit_ratio_warn
#> [1] 0.15
#> 
#> $profiles$strict$zstd2_ratio_warn
#> [1] 0.08
#> 
#> $profiles$strict$zstd3_ratio_warn
#> [1] 0.03
#> 
#> $profiles$strict$expected_var_min
#> [1] 0.3
#> 
#> $profiles$strict$pca_first_eigen_warn
#> [1] 1.5
#> 
#> $profiles$strict$pca_first_prop_warn
#> [1] 0.1
#> 
#> 
#> $profiles$standard
#> $profiles$standard$n_obs_min
#> [1] 100
#> 
#> $profiles$standard$n_person_min
#> [1] 30
#> 
#> $profiles$standard$low_cat_min
#> [1] 10
#> 
#> $profiles$standard$min_facet_levels
#> [1] 3
#> 
#> $profiles$standard$misfit_ratio_warn
#> [1] 0.1
#> 
#> $profiles$standard$missing_fit_ratio_warn
#> [1] 0.2
#> 
#> $profiles$standard$zstd2_ratio_warn
#> [1] 0.1
#> 
#> $profiles$standard$zstd3_ratio_warn
#> [1] 0.05
#> 
#> $profiles$standard$expected_var_min
#> [1] 0.2
#> 
#> $profiles$standard$pca_first_eigen_warn
#> [1] 2
#> 
#> $profiles$standard$pca_first_prop_warn
#> [1] 0.1
#> 
#> 
#> $profiles$lenient
#> $profiles$lenient$n_obs_min
#> [1] 60
#> 
#> $profiles$lenient$n_person_min
#> [1] 20
#> 
#> $profiles$lenient$low_cat_min
#> [1] 5
#> 
#> $profiles$lenient$min_facet_levels
#> [1] 2
#> 
#> $profiles$lenient$misfit_ratio_warn
#> [1] 0.15
#> 
#> $profiles$lenient$missing_fit_ratio_warn
#> [1] 0.3
#> 
#> $profiles$lenient$zstd2_ratio_warn
#> [1] 0.15
#> 
#> $profiles$lenient$zstd3_ratio_warn
#> [1] 0.08
#> 
#> $profiles$lenient$expected_var_min
#> [1] 0.1
#> 
#> $profiles$lenient$pca_first_eigen_warn
#> [1] 3
#> 
#> $profiles$lenient$pca_first_prop_warn
#> [1] 0.2
#> 
#> 
#> 
#> $pca_reference_bands
#> $pca_reference_bands$eigenvalue
#> critical_minimum          caution           common           strong 
#>              1.4              1.5              2.0              3.0 
#> 
#> $pca_reference_bands$proportion
#>   minor caution  strong 
#>    0.05    0.10    0.20 
#> 
#> 
#> attr(,"class")
#> [1] "mfrm_threshold_profiles" "list"
vis <- build_visual_summaries(fit_bias, diag_bias, threshold_profile = "standard")
vis$warning_map$residual_pca_overall
#> [1] "Threshold profile: standard (PC1 EV >= 2.0, variance >= 10%)."                                                                                                          
#> [2] "Heuristic reference bands: EV >= 1.4 (critical minimum), >= 1.5 (caution), >= 2.0 (common), >= 3.0 (strong); variance >= 5% (minor), >= 10% (caution), >= 20% (strong)."
#> [3] "Current exploratory PC1 checks: EV>=1.5:Y, EV>=2.0:Y, EV>=3.0:Y, Var>=10%:Y, Var>=20%:Y."                                                                               
#> [4] "Overall residual PCA PC1 exceeds the current heuristic eigenvalue band (3.22)."                                                                                         
#> [5] "Overall residual PCA PC1 explains 20.1% variance."

The same example_bias dataset also carries a Group variable so DIF-oriented examples can show a non-null pattern instead of a fully clean result. It can be loaded either with load_mfrmr_data("example_bias") or data("mfrmr_example_bias", package = "mfrmr").

Human-Readable Reporting API

spec <- specifications_report(fit, title = "Study run")
data_qc <- data_quality_report(
  fit,
  data = ej2021_study1,
  person = "Person",
  facets = c("Rater", "Criterion"),
  score = "Score"
)
iter <- estimation_iteration_report(fit, max_iter = 8)
subset_rep <- subset_connectivity_report(fit, diagnostics = diag)
facet_stats <- facet_statistics_report(fit, diagnostics = diag)
cat_structure <- category_structure_report(fit, diagnostics = diag)
cat_curves <- category_curves_report(fit, theta_points = 101)
bias_rep <- bias_interaction_report(bias, top_n = 20)
plot_bias_interaction(bias_rep, plot = "scatter")

Design Simulation and Prediction

The package also supports a separate simulation/prediction layer. The key distinction is:

sim_spec <- build_mfrm_sim_spec(
  n_person = 30,
  n_rater = 4,
  n_criterion = 4,
  raters_per_person = 2,
  assignment = "rotating"
)

pred_pop <- predict_mfrm_population(
  sim_spec = sim_spec,
  reps = 2,
  maxit = 10,
  seed = 1
)
#> Warning: Optimizer did not fully converge (code = 1, status = iteration_limit).
#> Optimizer reached the iteration limit before the terminal gradient became small
#> enough for review-only acceptance. Consider increasing maxit (current: 10) or
#> relaxing reltol (current: 1e-06).
#> Warning: Optimizer did not fully converge (code = 1, status = iteration_limit).
#> Optimizer reached the iteration limit before the terminal gradient became small
#> enough for review-only acceptance. Consider increasing maxit (current: 10) or
#> relaxing reltol (current: 1e-06).

summary(pred_pop)$forecast[, c("Facet", "MeanSeparation", "McseSeparation")]
#> # A tibble: 3 × 3
#>   Facet     MeanSeparation McseSeparation
#>   <chr>              <dbl>          <dbl>
#> 1 Criterion          1.87           0.076
#> 2 Person             2.03           0.027
#> 3 Rater              0.728          0.728

keep_people <- unique(toy$Person)[1:18]
toy_mml <- suppressWarnings(
  fit_mfrm(
    toy[toy$Person %in% keep_people, , drop = FALSE],
    person = "Person",
    facets = c("Rater", "Criterion"),
    score = "Score",
    method = "MML",
    quad_points = 5,
    maxit = 15
  )
)

new_units <- data.frame(
  Person = c("NEW01", "NEW01"),
  Rater = unique(toy$Rater)[1],
  Criterion = unique(toy$Criterion)[1:2],
  Score = c(2, 3)
)

pred_units <- predict_mfrm_units(toy_mml, new_units, n_draws = 0)
pv_units <- sample_mfrm_plausible_values(toy_mml, new_units, n_draws = 2, seed = 1)

summary(pred_units)$estimates[, c("Person", "Estimate", "Lower", "Upper")]
#> # A tibble: 1 × 4
#>   Person Estimate Lower Upper
#>   <chr>     <dbl> <dbl> <dbl>
#> 1 NEW01    -0.097 -1.36  1.36
summary(pv_units)$draw_summary[, c("Person", "Draws", "MeanValue")]
#> # A tibble: 1 × 3
#>   Person Draws MeanValue
#>   <chr>  <dbl>     <dbl>
#> 1 NEW01      2         0