#' Correlation-Based Clustering Tree
#'
#' @description
#' Builds a binary tree for clustering time series data based on covariates.
#' The splitting criterion minimizes the average absolute Pearson correlation
#' between time series across child nodes.
#'
#' @param X A numeric matrix of covariates with dimension \eqn{N \times p},
#'   where \eqn{N} is the number of time series and \eqn{p} is the number of features.
#'   Each row corresponds to the covariates for one time series.
#' @param Y A numeric matrix of time series data with dimension \eqn{T \times N},
#'   where \eqn{T} is the length of each series. Each column represents one time series.
#' @param control A list of control parameters for tree construction:
#'   \describe{
#'     \item{\code{minsplit}}{Minimum number of observations required to attempt
#'       a split. Default: \code{90}.}
#'     \item{\code{minbucket}}{Minimum number of observations in any terminal node.
#'       Default: \code{30}.}
#'     \item{\code{alpha}}{Significance level for the permutation test.
#'       Default: \code{0.01}.}
#'     \item{\code{R}}{Number of permutations for the hypothesis test.
#'       Default: \code{199}.}
#'     \item{\code{parallel}}{Logical; if \code{TRUE}, enables parallel computation
#'       for permutation tests. Default: \code{FALSE}.}
#'     \item{\code{n_cores}}{Number of cores for parallel processing.
#'       If \code{NULL} (default), uses \code{detectCores() - 1}.}
#'   }
#'
#' @return An object of class \code{"FACT"} containing:
#'   \describe{
#'     \item{\code{frame}}{A data frame describing the tree structure, with one row
#'       per node containing split variable, split value, test statistic, and p-value.
#'       A smaller test statistic suggests more heterogeneity between child nodes.}
#'     \item{\code{membership}}{An integer vector of length \eqn{N} indicating
#'       the terminal node assignment for each observation.}
#'     \item{\code{control}}{The control parameters used.}
#'     \item{\code{terms}}{Metadata including covariate names and data dimensions.}
#'   }
#'
#' @details
#' The algorithm recursively partitions the data by finding splits that minimize
#' the average absolute correlation between time series in different child nodes.
#' Statistical significance of each split is assessed via a permutation test.
#'
#' At each node, the optimal split is found by exhaustively searching over all

#' covariates and candidate split points. The permutation test shuffles the
#' time series labels to generate a null distribution for the test statistic.
#'
#' @seealso
#' \code{\link{FACT}} for factor model-based clustering,
#' \code{\link{gendata}} for generating synthetic data,
#' \code{\link{print.FACT}} and \code{\link{plot.FACT}} for visualization.
#'
#' @examples
#' # Generate synthetic data
#' data <- gendata(seed = 42, T = 100, N = c(50, 50, 50, 50))
#'
#' # Build correlation-based tree
#' result <- COR(data$X, data$Y, control = list(R = 99, alpha = 0.05))
#'
#' # Examine results
#' print(result)
#' plot(result)
#' table(result$membership, data$group)
#'
#' @export
#' @importFrom parallel detectCores makeCluster stopCluster
#' @importFrom doParallel registerDoParallel
#' @importFrom doRNG %dorng%
#' @importFrom foreach foreach registerDoSEQ
#' @importFrom stats cor
#' @importFrom utils modifyList
COR <- function(X, Y, control = list()) {
  # --- 1. Control parameter setup ---
  defaults <- list(
    minsplit = 90, minbucket = 30, alpha = 0.01, R = 199,
    parallel = FALSE, n_cores = NULL
  )
  control <- utils::modifyList(defaults, control)
  if (is.null(control$minbucket)) {
    control$minbucket <- round(control$minsplit / 3)
  }
  # --- 2. Parallel backend setup (once, if needed) ---
  if (control$parallel) {
    n_cores <- if (is.null(control$n_cores)) {
      max(1, parallel::detectCores() - 1)
    } else {
      as.integer(control$n_cores)
    }
    if (n_cores > 0) {
      cl <- parallel::makeCluster(n_cores)
      doParallel::registerDoParallel(cl)
      # Ensure cleanup on function exit
      on.exit({
        parallel::stopCluster(cl)
        foreach::registerDoSEQ() # De-register to avoid side-effects
      }, add = TRUE)
      message(paste("Parallel backend registered with", n_cores, "cores."))
    } else {
      # If 0 cores are specified, switch off parallel mode.
      control$parallel <- FALSE
      message("n_cores is 0. Running in sequential mode.")
    }
  }
  # --- 3. Argument validation ---
  stopifnot(is.matrix(X), is.matrix(Y), ncol(Y) == nrow(X))
  if (is.null(colnames(X))) {
    message("Input matrix X has no column names. Generating defaults: X1, X2, ...")
    colnames(X) <- paste0("X", 1:ncol(X))
  }
  # --- 4. Initialize tree structure ---
  tree_env <- new.env()
  tree_env$nodes <- data.frame(
    node_id = 1, is_leaf = FALSE, n_obs = ncol(Y), parent_id = 0,
    split_var = NA_character_, split_val = NA_real_, statistic = NA_real_,
    p_value = NA_real_, left_child = NA_integer_, right_child = NA_integer_
  )
  tree_env$next_node_id <- 2
  # --- 5. Recursive growing function ---
  grow <- function(node_id, indices) {
    X_node <- X[indices, , drop = FALSE]
    Y_node <- Y[, indices, drop = FALSE]
    n_node <- length(indices)
    if (n_node < control$minsplit || nrow(unique(X_node)) < 2) {
      tree_env$nodes[tree_env$nodes$node_id == node_id, "is_leaf"] <<- TRUE
      return()
    }
    message(paste("Processing node", node_id, "with", n_node, "observations..."))
    best_split <- find_best_split_COR(X_node, Y_node, indices, control)
    if (is.infinite(best_split$stat)) {
      tree_env$nodes[tree_env$nodes$node_id == node_id, "is_leaf"] <<- TRUE
      return()
    }
    T_opt <- best_split$stat
    # Use %dorng% for reproducible results
    perm_stats <- foreach::foreach(
      m = 1:control$R,
      .combine = 'c',
      .export = c("find_best_split_COR", "control"),
      .packages = "stats"
    ) %dorng% {
      perm_indices_node <- sample(1:n_node)
      Y_perm_node <- Y_node[, perm_indices_node, drop = FALSE]
      perm_split <- find_best_split_COR(X_node, Y_perm_node, indices, control)
      perm_split$stat
    }
    # p-value calculation direction
    p_value <- mean(c(perm_stats, T_opt) <= T_opt, na.rm = TRUE)
    tree_env$nodes[tree_env$nodes$node_id == node_id, "p_value"] <<- p_value
    if (p_value <= control$alpha) {
      node_idx <- which(tree_env$nodes$node_id == node_id)
      tree_env$nodes[node_idx, "split_var"]   <- colnames(X)[best_split$var_idx]
      tree_env$nodes[node_idx, "split_val"]   <- best_split$split_val
      tree_env$nodes[node_idx, "statistic"]   <- T_opt
      left_id <- tree_env$next_node_id
      right_id <- tree_env$next_node_id + 1
      tree_env$next_node_id <<- tree_env$next_node_id + 2
      tree_env$nodes[node_idx, "left_child"]  <- left_id
      tree_env$nodes[node_idx, "right_child"] <- right_id
      new_nodes <- data.frame(
        node_id = c(left_id, right_id), is_leaf = FALSE,
        n_obs = c(length(best_split$left_indices), length(best_split$right_indices)),
        parent_id = node_id, split_var = NA, split_val = NA, statistic = NA, p_value = NA,
        left_child = NA, right_child = NA
      )
      tree_env$nodes <<- rbind(tree_env$nodes, new_nodes)
      grow(left_id, best_split$left_indices)
      grow(right_id, best_split$right_indices)
    } else {
      node_idx <- which(tree_env$nodes$node_id == node_id)
      tree_env$nodes[node_idx, "is_leaf"]     <- TRUE
      tree_env$nodes[node_idx, "split_var"]   <- colnames(X)[best_split$var_idx]
      tree_env$nodes[node_idx, "split_val"]   <- best_split$split_val
      tree_env$nodes[node_idx, "statistic"]   <- T_opt
    }
  }
  # --- 6. Execute and finalize ---
  grow(node_id = 1, indices = 1:ncol(Y))
  get_node_membership <- function(tree_df, X_full) {
    N <- nrow(X_full)
    membership <- integer(N)
    assign_obs <- function(node_id, indices) {
      if (length(indices) == 0) return()
      node_info <- tree_df[tree_df$node_id == node_id, ]
      if (node_info$is_leaf) {
        membership[indices] <<- node_id
      } else {
        x_feature <- X_full[indices, node_info$split_var, drop = FALSE]
        relative_left_indices <- which(x_feature < node_info$split_val)
        relative_right_indices <- which(x_feature >= node_info$split_val)
        assign_obs(node_info$left_child, indices[relative_left_indices])
        assign_obs(node_info$right_child, indices[relative_right_indices])
      }
    }
    assign_obs(1, 1:N)
    return(membership)
  }

  tree_env$nodes <- tree_env$nodes[order(tree_env$nodes$node_id), ]
  rownames(tree_env$nodes) <- NULL

  result <- list(
    frame = tree_env$nodes,
    membership = get_node_membership(tree_env$nodes, X),
    control = control,
    terms = list(X_names = colnames(X), Y_dim = dim(Y))
  )
  class(result) <- "FACT"

  return(result)
}
