#' @title Weighted Longitudinal Mean (WLM)
#'
#' @description
#' This function identifies influential subjects in a longitudinal dataset based on their
#' weighted mean response values. It computes weighted averages for each subject and
#' detects anomalies by comparing them against an overall mean threshold.
#'
#' @details
#' The function follows these steps:
#' \itemize{
#'   \item Computes the weighted mean response for each subject.
#'   \item Calculates the overall mean and standard deviation of weighted responses.
#'   \item Identifies subjects whose weighted mean response deviates beyond \code{k} standard deviations.
#'   \item Separates data into influential and non-influential subjects.
#'   \item Provides visualizations of the detected anomalies using \code{ggplot2}.
#' }
#'
#' This method is beneficial for detecting influential subjects in longitudinal studies, where responses
#' may vary over time and require weighted adjustments.
#'
#' @param data A dataframe containing longitudinal data.
#' @param subject_id A column specifying the column name representing subject IDs.
#' @param time A column specifying different time points that observations are measured.
#' @param response A column specifying the column name representing the response variable.
#' @param k A numeric value specifying the threshold multiplier for detecting influential subjects (default: 2).
#' @param verbose Logical; if TRUE, prints informative messages during execution.
#'
#' @return A list containing:
#' \item{influential_subjects}{A vector of subject IDs identified as influential.}
#' \item{influential_data}{A dataframe of influential subjects' data.}
#' \item{non_influential_data}{A dataframe of non-influential subjects' data.}
#' \item{weighted_plot}{A ggplot object showing the weighted mean response for each subject.}
#' \item{longitudinal_plot}{A ggplot object visualizing the longitudinal data with influential subjects highlighted.}
#' \item{IS_table}{A data frame containing the Influence Score (IS) and the Partial Influence Score (PIS) values for each subject at each time point.}
#'
#' @examples
#' data(infsdata)
#' infsdata <- infsdata[1:5,]
#' result <- wlm(infsdata, "subject_id", "time", "response", k = 2)
#' print(result$influential_subjects)
#' head(result$influential_data)
#' head(result$non_influential_data)
#' print(result$weighted_plot)
#' print(result$longitudinal_plot)
#'
#' @export
#'
#' @seealso tvm, slm, sld, rld
#'

wlm <- function(data, subject_id, time, response, k = 2, verbose = FALSE) {

  subject_groups <- split(data, data[[subject_id]])

  data_summary <- lapply(subject_groups, function(sub_data) {

    avg_time <- mean(sub_data[[time]], na.rm = TRUE)
    weights <- 1 / (1 + abs(sub_data[[time]] - avg_time))

    weighted_mean_response <-
      sum(weights * sub_data[[response]]) / sum(weights)

    data.frame(
      subject_id = sub_data[[subject_id]][1],
      avg_time = avg_time,
      weighted_mean_response = weighted_mean_response
    )
  })

  data_summary <- do.call(rbind, data_summary)

  weights_all <- unlist(lapply(subject_groups, function(sub_data) {
    avg_time <- mean(sub_data[[time]], na.rm = TRUE)
    1 / (1 + abs(sub_data[[time]] - avg_time))
  }))

  overall_mean <- sum(data[[response]] * weights_all) / sum(weights_all)
  overall_sd <- stats::sd(data_summary$weighted_mean_response, na.rm = TRUE)

  lower_threshold <- overall_mean - k * overall_sd
  upper_threshold <- overall_mean + k * overall_sd

  influential_ids <-
    data_summary$subject_id[
      abs(data_summary$weighted_mean_response - overall_mean) > k * overall_sd
    ]

  if (length(influential_ids) == 0) {
    warning("No influential subjects detected based on the given threshold.")
    return(list(
      influential_subjects = integer(0),
      influential_data = data[0, ],
      non_influential_data = data,
      weighted_plot = NULL,
      longitudinal_plot = NULL,
      IS_table = NULL
    ))
  }

  influential_data <- data[data[[subject_id]] %in% influential_ids, ]
  non_influential_data <- data[!(data[[subject_id]] %in% influential_ids), ]

  if (verbose) {
    message(
      "Influential subjects detected: ",
      paste(influential_ids, collapse = ", ")
    )
  }

  weighted_plot <-
    ggplot2::ggplot(
      data_summary,
      ggplot2::aes(x = .data[[subject_id]], y = weighted_mean_response)
    ) +
    ggplot2::geom_point(size = 3) +
    ggplot2::geom_point(
      data = data_summary[data_summary$subject_id %in% influential_ids, ],
      color = "red",
      size = 4
    ) +
    ggplot2::geom_hline(
      yintercept = overall_mean,
      linetype = "dashed"
    ) +
    ggplot2::geom_hline(
      yintercept = upper_threshold,
      linetype = "dotted",
      color = "red"
    ) +
    ggplot2::geom_hline(
      yintercept = lower_threshold,
      linetype = "dotted",
      color = "red"
    ) +
    ggplot2::labs(
      title = "Influential Subjects Identification (WLM Method)",
      x = subject_id,
      y = "Weighted Mean Response"
    ) +
    ggplot2::theme_minimal()

  longitudinal_plot <-
    ggplot2::ggplot(
      data,
      ggplot2::aes(
        x = .data[[time]],
        y = .data[[response]],
        group = .data[[subject_id]]
      )
    ) +
    ggplot2::geom_line(alpha = 0.8) +
    ggplot2::geom_line(
      data = influential_data,
      color = "red",
      linewidth = 1.2
    ) +
    ggplot2::labs(
      title = "Longitudinal Data with Influential Subjects (WLM Method)",
      x = time,
      y = response
    ) +
    ggplot2::theme_minimal()

  IS_table <- data_summary |>
    dplyr::mutate(
      IS  = abs((weighted_mean_response - overall_mean) / overall_sd),
      PIS = (weighted_mean_response - overall_mean)^2 /
        (nrow(data) * overall_sd^2)
    )

  list(
    influential_subjects = influential_ids,
    influential_data = influential_data,
    non_influential_data = non_influential_data,
    weighted_plot = weighted_plot,
    longitudinal_plot = longitudinal_plot,
    IS_table = IS_table
  )
}

utils::globalVariables(c(".data", "weighted_mean_response"))
