% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/diagnosis.R
\name{load_and_prepare_data_dia}
\alias{load_and_prepare_data_dia}
\title{Load and Prepare Data for Diagnostic Models}
\usage{
load_and_prepare_data_dia(
  data_path,
  label_col_name,
  positive_label_value = 1,
  negative_label_value = 0,
  new_positive_label = "Positive",
  new_negative_label = "Negative"
)
}
\arguments{
\item{data_path}{A character string, the file path to the input CSV data.
The first column is assumed to be a sample ID.}

\item{label_col_name}{A character string, the name of the column containing
the class labels.}

\item{positive_label_value}{A numeric or character value that represents
the positive class in the raw data.}

\item{negative_label_value}{A numeric or character value that represents
the negative class in the raw data.}

\item{new_positive_label}{A character string, the desired factor level name
for the positive class (e.g., "Positive").}

\item{new_negative_label}{A character string, the desired factor level name
for the negative class (e.g., "Negative").}
}
\value{
A list containing:
\itemize{
\item \code{X}: A data frame of features (all columns except ID and label).
\item \code{y}: A factor vector of class labels, with levels \code{new_negative_label}
and \code{new_positive_label}.
\item \code{sample_ids}: A vector of sample IDs (the first column of the input data).
\item \code{pos_class_label}: The character string used for the positive class factor level.
\item \code{neg_class_label}: The character string used for the negative class factor level.
\item \code{y_original_numeric}: The original numeric/character vector of labels.
}
}
\description{
Loads a CSV file containing patient data, extracts features,
and converts the label column into a factor suitable for classification
models. Handles basic data cleaning like trimming whitespace and type conversion.
}
\examples{
\donttest{
# Create a dummy CSV file in a temporary directory for demonstration
temp_csv_path <- tempfile(fileext = ".csv")
dummy_data <- data.frame(
  ID = paste0("Patient", 1:50),
  Disease_Status = sample(c(0, 1), 50, replace = TRUE),
  FeatureA = rnorm(50),
  FeatureB = runif(50, 0, 100),
  CategoricalFeature = sample(c("X", "Y", "Z"), 50, replace = TRUE)
)
write.csv(dummy_data, temp_csv_path, row.names = FALSE)

# Load and prepare data from the temporary file
prepared_data <- load_and_prepare_data_dia(
  data_path = temp_csv_path,
  label_col_name = "Disease_Status",
  positive_label_value = 1,
  negative_label_value = 0,
  new_positive_label = "Case",
  new_negative_label = "Control"
)

# Check prepared data structure
str(prepared_data$X)
table(prepared_data$y)

# Clean up the dummy file
unlink(temp_csv_path)
}
}
